diff --git a/cmake/Env.cmake b/cmake/Env.cmake index 955a391469..469bf6631c 100644 --- a/cmake/Env.cmake +++ b/cmake/Env.cmake @@ -90,7 +90,6 @@ if(OB_BUILD_CLOSE_MODULES) # oralce ob_define(OB_BUILD_ORACLE_PARSER ON) ob_define(OB_BUILD_ORACLE_PL ON) - ob_define(OB_BUILD_ORACLE_XML ON) # dblink ob_define(OB_BUILD_DBLINK ON) # 仲裁功能 @@ -136,10 +135,6 @@ if(OB_BUILD_ORACLE_PL) add_definitions(-DOB_BUILD_ORACLE_PL) endif() -if(OB_BUILD_ORACLE_XML) - add_definitions(-DOB_BUILD_ORACLE_XML) -endif() - if(OB_BUILD_ARBITRATION) add_definitions(-DOB_BUILD_ARBITRATION) endif() diff --git a/deps/oblib/src/CMakeLists.txt b/deps/oblib/src/CMakeLists.txt index 5f923ac613..a21b892c0d 100644 --- a/deps/oblib/src/CMakeLists.txt +++ b/deps/oblib/src/CMakeLists.txt @@ -78,14 +78,6 @@ if(OB_BUILD_TDE_SECURITY) ) endif() -if(OB_BUILD_ORACLE_XML) - target_include_directories( - oblib_base_base_base INTERFACE - ${CMAKE_SOURCE_DIR}/close_modules/xml - ${CMAKE_SOURCE_DIR}/close_modules/xml/deps/oblib/src/ - ) -endif() - if(OB_BUILD_AUDIT_SECURITY) target_include_directories( oblib_base_base_base INTERFACE diff --git a/deps/oblib/src/common/object/ob_obj_compare.cpp b/deps/oblib/src/common/object/ob_obj_compare.cpp index f997f86cdf..a3937327cb 100644 --- a/deps/oblib/src/common/object/ob_obj_compare.cpp +++ b/deps/oblib/src/common/object/ob_obj_compare.cpp @@ -1340,8 +1340,10 @@ int ObObjCmpFuncs::cmp_func(const ObObj &obj1, \ K(obj1.get_collation_type()), K(obj2.get_collation_type()), \ K(obj1), K(obj2)); \ } else { \ - ObJsonBin j_bin1(data_str1.ptr(), data_str1.length()); \ - ObJsonBin j_bin2(data_str2.ptr(), data_str2.length()); \ + ObJsonBinCtx ctx1; \ + ObJsonBinCtx ctx2; \ + ObJsonBin j_bin1(data_str1.ptr(), data_str1.length(), &ctx1); \ + ObJsonBin j_bin2(data_str2.ptr(), data_str2.length(), &ctx2); \ ObIJsonBase *j_base1 = &j_bin1; \ ObIJsonBase *j_base2 = &j_bin2; \ if (OB_FAIL(j_bin1.reset_iter())) { \ @@ -1384,8 +1386,10 @@ int ObObjCmpFuncs::cmp_func(const ObObj &obj1, \ K(obj1), K(obj2)); \ ret = CR_OB_ERROR; \ } else { \ - ObJsonBin j_bin1(data_str1.ptr(), data_str1.length()); \ - ObJsonBin j_bin2(data_str2.ptr(), data_str2.length()); \ + ObJsonBinCtx ctx1; \ + ObJsonBinCtx ctx2; \ + ObJsonBin j_bin1(data_str1.ptr(), data_str1.length(), &ctx1); \ + ObJsonBin j_bin2(data_str2.ptr(), data_str2.length(), &ctx2); \ ObIJsonBase *j_base1 = &j_bin1; \ ObIJsonBase *j_base2 = &j_bin2; \ if (OB_FAIL(j_bin1.reset_iter())) { \ diff --git a/deps/oblib/src/common/object/ob_obj_funcs.h b/deps/oblib/src/common/object/ob_obj_funcs.h index 0ec746126b..ea0ef5e6d7 100644 --- a/deps/oblib/src/common/object/ob_obj_funcs.h +++ b/deps/oblib/src/common/object/ob_obj_funcs.h @@ -1729,7 +1729,8 @@ DEF_GEO_FUNCS(ObGeometryType, string, ObString); } else if (OB_FAIL(lob.get_inrow_data(j_bin_str))) { \ COMMON_LOG(WARN, "fail to get inrow data", K(ret), K(lob)); \ } else { \ - ObJsonBin j_bin(j_bin_str.ptr(), j_bin_str.length()); \ + ObJsonBinCtx ctx; \ + ObJsonBin j_bin(j_bin_str.ptr(), j_bin_str.length(), &ctx); \ ObIJsonBase *j_base = &j_bin; \ if (j_bin_str.length() == 0 || param.is_null()) { \ res = hash; \ diff --git a/deps/oblib/src/common/object/ob_object.cpp b/deps/oblib/src/common/object/ob_object.cpp index 4a15e4c0f8..02c22079c2 100644 --- a/deps/oblib/src/common/object/ob_object.cpp +++ b/deps/oblib/src/common/object/ob_object.cpp @@ -122,6 +122,13 @@ bool ObLobCommon::operator!=(const ObLobCommon &other) const return !(operator==(other)); } +int64_t ObLobDataOutRowCtx::get_real_chunk_size() const +{ + // ObLobDataOutRowCtx::chunk_size_ only have 8 bit, range is 0~255 + // and chunk size can not be zero, so if chunk size is 256KB, ObLobDataOutRowCtx::chunk_size_ is zero + return chunk_size_ == 0 ? OB_MAX_LOB_CHUNK_SIZE : chunk_size_ * OUTROW_LOB_CHUNK_SIZE_UNIT; +} + void ObLobData::reset() { id_.reset(); @@ -809,6 +816,35 @@ int ObLobLocatorV2::get_real_locator_len(int64_t &real_len) const return ret; } +int ObLobLocatorV2::get_chunk_size(int64_t &chunk_size) const +{ + int ret = OB_SUCCESS; + ObLobCommon *disk_loc = nullptr; + if (! has_lob_header_ || size_ == 0 || OB_ISNULL(ptr_)) { + ret = OB_ERR_UNEXPECTED; + COMMON_LOG(WARN, "no locator or is null", K(ret), K(has_lob_header_), K(size_), KP(ptr_)); + } else if (! is_persist_lob()) { + ret = OB_ERR_UNEXPECTED; + COMMON_LOG(WARN, "non-persist_lob should not call this function", K(ret), KPC(this)); + } else if (is_inrow()) { + ret = OB_ERR_UNEXPECTED; + COMMON_LOG(WARN, "inrow-persist_lob should not call this function", K(ret), KPC(this)); + } else if (OB_FAIL(get_disk_locator(disk_loc))) { + COMMON_LOG(WARN, "get disk locator fail", K(ret), KPC(this)); + } else if(((uintptr_t)disk_loc - (uintptr_t)ptr_) < DISK_LOB_OUTROW_FULL_SIZE) { + ret = OB_ERR_UNEXPECTED; + COMMON_LOG(WARN, "size overflow", K(ret), KPC(this), KP(disk_loc), KP(ptr_), "left_size", ((uintptr_t)disk_loc - (uintptr_t)ptr_)); + } else if (! disk_loc ->is_init_) { + ret = OB_ERR_UNEXPECTED; + COMMON_LOG(WARN, "disk lob not init", K(ret), KPC(this), KPC(disk_loc)); + } else { + const ObLobData *lob_data = reinterpret_cast(disk_loc->buffer_); + const ObLobDataOutRowCtx *ctx = reinterpret_cast(lob_data->buffer_); + chunk_size = ctx->get_real_chunk_size(); + } + return ret; +} + // Notice: this payload is payload with disk locator if it exist int ObLobLocatorV2::set_payload_data(const ObString& payload) { diff --git a/deps/oblib/src/common/object/ob_object.h b/deps/oblib/src/common/object/ob_object.h index ef230552ab..bf5f2991ce 100644 --- a/deps/oblib/src/common/object/ob_object.h +++ b/deps/oblib/src/common/object/ob_object.h @@ -527,6 +527,7 @@ struct ObLobId struct ObLobDataOutRowCtx { + static const int64_t OUTROW_LOB_CHUNK_SIZE_UNIT = 1024; // 1KB enum OpType { SQL = 0, // all sql op @@ -539,10 +540,10 @@ struct ObLobDataOutRowCtx }; ObLobDataOutRowCtx() : is_full_(0), op_(0), offset_(0), check_sum_(0), seq_no_st_(0), seq_no_cnt_(0), - del_seq_no_cnt_(0), modified_len_(0), first_meta_offset_(0) + del_seq_no_cnt_(0), modified_len_(0), first_meta_offset_(0), chunk_size_(0) {} TO_STRING_KV(K_(is_full), K_(op), K_(offset), K_(check_sum), K_(seq_no_st), K_(seq_no_cnt), - K_(del_seq_no_cnt), K_(modified_len), K_(first_meta_offset)); + K_(del_seq_no_cnt), K_(modified_len), K_(first_meta_offset), K_(chunk_size)); uint64_t is_full_ : 1; uint64_t op_ : 8; uint64_t offset_ : 55; @@ -553,6 +554,9 @@ struct ObLobDataOutRowCtx uint64_t modified_len_; uint32_t first_meta_offset_ : 24; uint32_t chunk_size_ : 8; // unit is kb + + bool is_diff() const { return OpType::DIFF == op_; } + int64_t get_real_chunk_size() const; }; struct ObLobData @@ -925,6 +929,7 @@ public: static const uint32_t MEM_LOB_EXTERN_RETRYINFO_LEN = sizeof(ObMemLobRetryInfo); static const uint16_t MEM_LOB_EXTERN_SIZE_LEN = sizeof(uint16_t); static const uint32_t MEM_LOB_ADDR_LEN = 0; // reserved for temp lob address + static const int64_t DISK_LOB_OUTROW_FULL_SIZE = sizeof(ObLobCommon) + sizeof(ObLobData) + sizeof(ObLobDataOutRowCtx) + sizeof(uint64_t); ObLobLocatorV2() : ptr_(NULL), size_(0), has_lob_header_(true) {} ObLobLocatorV2(char *loc_ptr, uint32_t loc_size, uint32_t has_lob_header = true) : @@ -1051,6 +1056,7 @@ public: int get_location_info(ObMemLobLocationInfo *&location_info) const; int get_retry_info(ObMemLobRetryInfo *&retry_info) const; int get_real_locator_len(int64_t &real_len) const; + int get_chunk_size(int64_t &chunk_size) const; bool is_empty_lob() const; bool is_inrow() const; diff --git a/deps/oblib/src/lib/CMakeLists.txt b/deps/oblib/src/lib/CMakeLists.txt index 3c71488de5..b08baa5ef5 100644 --- a/deps/oblib/src/lib/CMakeLists.txt +++ b/deps/oblib/src/lib/CMakeLists.txt @@ -146,6 +146,7 @@ ob_set_subtarget(oblib_lib common_mixed hash/ob_hashutils.cpp hash/xxhash.c hash_func/murmur_hash.cpp + lob/ob_lob_base.cpp json/ob_json.cpp json/ob_json_print_utils.cpp json/ob_yson.cpp @@ -154,6 +155,8 @@ ob_set_subtarget(oblib_lib common_mixed json_type/ob_json_bin.cpp json_type/ob_json_base.cpp json_type/ob_json_parse.cpp + json_type/ob_json_schema.cpp + json_type/ob_json_diff.cpp lds/ob_lds_define.cpp net/ob_addr.cpp net/ob_net_util.cpp @@ -204,6 +207,19 @@ ob_set_subtarget(oblib_lib common_mixed wide_integer/ob_wide_integer_cmp_funcs.cpp wide_integer/ob_wide_integer_str_funcs.cpp udt/ob_udt_type.cpp + xml/ob_mul_mode_reader.cpp + xml/ob_xml.cpp + xml/ob_xml_parser.cpp + xml/ob_libxml2_sax_handler.cpp + xml/ob_tree_base.cpp + xml/ob_xml_tree.cpp + xml/ob_xml_util.cpp + xml/ob_xpath.cpp + xml/ob_multi_mode_bin.cpp + xml/ob_xml_bin.cpp + xml/ob_multi_mode_interface.cpp + xml/ob_path_parser.cpp + xml/ob_binary_aggregate.cpp locale/ob_locale_type.cc locale/ob_locale.cpp ) @@ -358,17 +374,16 @@ add_library(malloc_hook STATIC alloc/malloc_hook.h) target_link_libraries(malloc_hook oblib_base) -if(OB_BUILD_ORACLE_XML) +if(OB_BUILD_CLOSE_MODULES) + target_link_libraries(oblib_lib PUBLIC + ${DEP_3RD_DIR}/usr/local/lib/libxslt.a + ${DEP_3RD_DIR}/usr/local/lib/libexslt.a + ${ob_close_deps_static_name} + ) +endif() + target_link_libraries(oblib_lib PUBLIC ob_malloc compress restore ${DEP_DIR}/lib/libxml2.a ${DEP_DIR}/lib/liblzma.a - ${DEP_3RD_DIR}/usr/local/lib/libxslt.a - ${DEP_3RD_DIR}/usr/local/lib/libexslt.a - ${ob_close_deps_static_name} ) -else() -target_link_libraries(oblib_lib - PUBLIC ob_malloc -) -endif() diff --git a/deps/oblib/src/lib/json_type/ob_json_base.cpp b/deps/oblib/src/lib/json_type/ob_json_base.cpp index c2b254004b..3c30f6d77e 100644 --- a/deps/oblib/src/lib/json_type/ob_json_base.cpp +++ b/deps/oblib/src/lib/json_type/ob_json_base.cpp @@ -127,17 +127,61 @@ bool ObIJsonBase::is_json_date(ObJsonNodeType json_type) const return ret_bool; } -// only use in seek, this can not from stack memory -int ObIJsonBase::add_if_missing(ObJsonBaseSortedVector &dup, ObJsonBaseVector &res) const +// apend node to unique vector judge duplicate +int ObJsonSortedResult::insert_unique(ObIJsonBase* node) { INIT_SUCC(ret); - ObJsonBaseCmp cmp; ObJsonBaseUnique unique; - ObJsonBaseSortedVector::iterator pos = dup.end(); + ObJsonBaseSortedVector::iterator pos = sort_vector_.end(); + if (size_ == 0) { // only one result should not compare + json_point_ = node; + } else if (size_ == 1 && sort_vector_.size() == 0) { + // if have two result, should append json_point_ first, then append new node. + if (OB_ISNULL(json_point_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("fail to get first node", K(ret)); + } else if (sort_vector_.remain() < 2 && OB_FAIL(sort_vector_.reserve(OB_PATH_RESULT_EXPAND_SIZE))) { + LOG_WARN("fail to expand vactor", K(ret)); + } else if (OB_FAIL(sort_vector_.insert_unique(json_point_, pos, cmp, unique))) { + LOG_WARN("fail to push_back value into duplicate", K(ret), K(sort_vector_.size())); + } else if (OB_FAIL(sort_vector_.insert_unique(node, pos, cmp, unique))) { + LOG_WARN("fail to push_back value into duplicate", K(ret), K(sort_vector_.size())); + } + } else if (sort_vector_.remain() == 0 && OB_FAIL(sort_vector_.reserve(OB_PATH_RESULT_EXPAND_SIZE))) { + LOG_WARN("fail to expand vactor", K(ret)); + } else if (OB_FAIL(sort_vector_.insert_unique(node, pos, cmp, unique))) { + LOG_WARN("fail to push_back value into result", K(ret), K(sort_vector_.size())); + } + if (OB_SUCC(ret)) { + size_ ++; + } + return ret; +} - if ((OB_SUCC(dup.insert_unique(this, pos, cmp, unique)))) { - if (OB_FAIL(res.push_back(this))) { +// only use in seek, use stack memory should deep copy. +int ObIJsonBase::add_if_missing(ObJsonSortedResult &dup, ObJsonSeekResult &res, ObIAllocator* allocator) const +{ + INIT_SUCC(ret); + ObIJsonBase* cur_json = const_cast(this); + ObJsonBin* json_bin = NULL; + + // Reduce array allocation size : 2 + // binary need clone new node + if (is_bin()) { + if (res.size() == 0) { + json_bin = static_cast(res.res_point_); + } + if (OB_FAIL((static_cast(cur_json))->clone_new_node(json_bin, allocator))) { + LOG_WARN("failed to create json binary", K(ret)); + } else if (OB_ISNULL(cur_json = json_bin)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("failed to get json binary value", K(ret)); + } + } + if (OB_FAIL(ret)) { + } else if (OB_SUCC(dup.insert_unique(cur_json))) { + if (OB_FAIL(res.push_node(cur_json))) { LOG_WARN("fail to push_back value into result", K(ret), K(res.size())); } } else if (ret == OB_CONFLICT_VALUE) { @@ -150,10 +194,11 @@ int ObIJsonBase::add_if_missing(ObJsonBaseSortedVector &dup, ObJsonBaseVector &r int ObIJsonBase::find_array_range(ObIAllocator* allocator, ObSeekParentInfo &parent_info, const JsonPathIterator &cur_node, const JsonPathIterator &last_node, const ObJsonPathBasicNode *path_node, bool is_auto_wrap, - bool only_need_one, bool is_lax, ObJsonBaseSortedVector &dup, - ObJsonBaseVector &res, PassingMap* sql_var) const + bool only_need_one, bool is_lax, ObJsonSortedResult &dup, + ObJsonSeekResult &res, PassingMap* sql_var) const { INIT_SUCC(ret); + ObJsonBin st_json(allocator_); // use stack variable instead of deep copy SMART_VAR (ObArrayRange, range) { if (OB_FAIL(path_node->get_array_range(element_count(), range))) { LOG_WARN("fail to get array range", K(ret), K(element_count())); @@ -161,7 +206,7 @@ int ObIJsonBase::find_array_range(ObIAllocator* allocator, ObSeekParentInfo &par bool is_done = false; ObIJsonBase *jb_ptr = NULL; for (uint32_t i = range.array_begin_; OB_SUCC(ret) && i < range.array_end_ && !is_done; ++i) { - jb_ptr = NULL; // reset jb_ptr to NULL + jb_ptr = &st_json; // reset jb_ptr to stack var ret = get_array_element(i, jb_ptr); if (OB_ISNULL(jb_ptr)) { ret = OB_ERR_NULL_VALUE; @@ -182,17 +227,18 @@ int ObIJsonBase::find_array_range(ObIAllocator* allocator, ObSeekParentInfo &par int ObIJsonBase::find_array_cell(ObIAllocator* allocator, ObSeekParentInfo &parent_info, const JsonPathIterator &cur_node, const JsonPathIterator &last_node, const ObJsonPathBasicNode *path_node, bool is_auto_wrap, - bool only_need_one, bool is_lax, ObJsonBaseSortedVector &dup, - ObJsonBaseVector &res, PassingMap* sql_var) const + bool only_need_one, bool is_lax, ObJsonSortedResult &dup, + ObJsonSeekResult &res, PassingMap* sql_var) const { INIT_SUCC(ret); - + ObJsonBin st_json(allocator_); // use stack variable instead of deep copy + ObIJsonBase *jb_ptr = NULL; SMART_VAR (ObJsonArrayIndex, idx) { if (OB_FAIL(path_node->get_first_array_index(element_count(), idx))) { LOG_WARN("failed to get array index.", K(ret), K(element_count())); } else { if (idx.is_within_bounds()) { - ObIJsonBase *jb_ptr = NULL; + jb_ptr = &st_json; // reset jb_ptr to stack var ret = get_array_element(idx.get_array_index(), jb_ptr); if (OB_ISNULL(jb_ptr)) { ret = OB_ERR_NULL_VALUE; @@ -210,13 +256,13 @@ int ObIJsonBase::find_array_cell(ObIAllocator* allocator, ObSeekParentInfo &pare } int ObIJsonBase::seek(const ObJsonPath &path, uint32_t node_cnt, bool is_auto_wrap, - bool only_need_one, ObJsonBaseVector &res, PassingMap* sql_var) const + bool only_need_one, ObJsonSeekResult &res, PassingMap* sql_var) const { INIT_SUCC(ret); ObSeekParentInfo parent_info; JsonPathIterator cur_node = path.begin(); JsonPathIterator last_node = path.begin() + node_cnt; - ObJsonBaseSortedVector dup; + ObJsonSortedResult dup; if (OB_ISNULL(allocator_)) { // check allocator ret = OB_ERR_NULL_VALUE; @@ -243,7 +289,7 @@ int ObIJsonBase::seek(const ObJsonPath &path, uint32_t node_cnt, bool is_auto_wr int ObIJsonBase::seek(ObIAllocator* allocator, const ObJsonPath &path, uint32_t node_cnt, bool is_auto_wrap,bool only_need_one, - bool is_lax, ObJsonBaseVector &res, PassingMap* sql_var) const + bool is_lax, ObJsonSeekResult &res, PassingMap* sql_var) const { INIT_SUCC(ret); // 对于$后的path节点而言,其parent_info.parent_path为begin() @@ -256,7 +302,7 @@ int ObIJsonBase::seek(ObIAllocator* allocator, const ObJsonPath &path, parent_info.path_size_ = node_cnt; JsonPathIterator cur_node = path.begin(); JsonPathIterator last_node = path.begin() + node_cnt; - ObJsonBaseSortedVector dup; + ObJsonSortedResult dup; if (OB_FAIL(find_child(allocator, parent_info, cur_node, last_node, is_auto_wrap, @@ -274,13 +320,15 @@ int ObIJsonBase::seek(ObIAllocator* allocator, const ObJsonPath &path, int ObIJsonBase::find_member(ObIAllocator* allocator, ObSeekParentInfo &parent_info, const JsonPathIterator &cur_node, const JsonPathIterator &last_node, const ObJsonPathBasicNode *path_node, bool is_auto_wrap, - bool only_need_one, bool is_lax, ObJsonBaseSortedVector &dup, - ObJsonBaseVector &res, PassingMap* sql_var) const + bool only_need_one, bool is_lax, ObJsonSortedResult &dup, + ObJsonSeekResult &res, PassingMap* sql_var) const { INIT_SUCC(ret); ObIJsonBase *jb_ptr = NULL; + ObJsonBin st_json(allocator_); // use stack variable instead of deep copy if (json_type() == ObJsonNodeType::J_OBJECT) { + jb_ptr = &st_json; ObString key_name(path_node->get_object().len_, path_node->get_object().object_name_); ret = get_object_value(key_name, jb_ptr); if (OB_SUCC(ret)) { @@ -300,7 +348,7 @@ int ObIJsonBase::find_member(ObIAllocator* allocator, ObSeekParentInfo &parent_i } else if (is_lax && is_auto_wrap && json_type() == ObJsonNodeType::J_ARRAY) { bool is_done = false; for (uint32_t i = 0; OB_SUCC(ret) && i < element_count() && !is_done; ++i) { - jb_ptr = NULL; // reset jb_ptr to NULL + jb_ptr = &st_json; // reset jb_ptr to stack var ret = get_array_element(i, jb_ptr); if (OB_ISNULL(jb_ptr)) { ret = OB_ERR_NULL_VALUE; @@ -321,20 +369,21 @@ int ObIJsonBase::find_member(ObIAllocator* allocator, ObSeekParentInfo &parent_i int ObIJsonBase::find_member_wildcard(ObIAllocator* allocator, ObSeekParentInfo &parent_info, const JsonPathIterator &cur_node, const JsonPathIterator &last_node, const ObJsonPathBasicNode *path_node, bool is_auto_wrap, - bool only_need_one, bool is_lax, ObJsonBaseSortedVector &dup, - ObJsonBaseVector &res, PassingMap* sql_var) const + bool only_need_one, bool is_lax, ObJsonSortedResult &dup, + ObJsonSeekResult &res, PassingMap* sql_var) const { INIT_SUCC(ret); ObIJsonBase *jb_ptr = NULL; bool is_done = false; + ObJsonBin st_json(allocator_); // use stack variable instead of deep copy if (json_type() == ObJsonNodeType::J_OBJECT) { uint64_t count = element_count(); is_done = false; if (is_lax && !is_auto_wrap) is_auto_wrap = true; for (uint64_t i = 0; i < count && OB_SUCC(ret) && !is_done; ++i) { - jb_ptr = NULL; // reset jb_ptr to NULL + jb_ptr = &st_json; ret = get_object_value(i, jb_ptr); if (OB_ISNULL(jb_ptr)) { ret = OB_ERR_NULL_VALUE; @@ -351,7 +400,7 @@ int ObIJsonBase::find_member_wildcard(ObIAllocator* allocator, ObSeekParentInfo } else if (is_lax && is_auto_wrap && json_type() == ObJsonNodeType::J_ARRAY) { is_done = false; for (uint32_t i = 0; OB_SUCC(ret) && i < element_count() && !is_done; ++i) { - jb_ptr = NULL; // reset jb_ptr to NULL + jb_ptr = &st_json; ret = get_array_element(i, jb_ptr); if (OB_ISNULL(jb_ptr)) { ret = OB_ERR_NULL_VALUE; @@ -372,12 +421,12 @@ int ObIJsonBase::find_member_wildcard(ObIAllocator* allocator, ObSeekParentInfo int ObIJsonBase::find_ellipsis(ObIAllocator* allocator, ObSeekParentInfo &parent_info, const JsonPathIterator &cur_node, const JsonPathIterator &last_node, const ObJsonPathBasicNode *path_node, bool is_auto_wrap, - bool only_need_one, bool is_lax, ObJsonBaseSortedVector &dup, - ObJsonBaseVector &res, PassingMap* sql_var) const + bool only_need_one, bool is_lax, ObJsonSortedResult &dup, + ObJsonSeekResult &res, PassingMap* sql_var) const { INIT_SUCC(ret); - bool is_done = false; - + bool is_done = false; + ObJsonBin st_json(allocator_); // use stack variable instead of deep copy if (OB_FAIL(find_child(allocator, parent_info, cur_node + 1, last_node, is_auto_wrap, only_need_one, is_lax, dup, res, sql_var))) { LOG_WARN("fail to seek recursively", K(ret), K(is_auto_wrap), K(only_need_one)); @@ -385,7 +434,7 @@ int ObIJsonBase::find_ellipsis(ObIAllocator* allocator, ObSeekParentInfo &parent uint64_t size = element_count(); ObIJsonBase *jb_ptr = NULL; for (uint32_t i = 0; i < size && !is_done && OB_SUCC(ret); ++i) { - jb_ptr = NULL; // reset jb_ptr to NULL + jb_ptr = &st_json; // reset jb_ptr to stack var ret = get_array_element(i, jb_ptr); if (OB_ISNULL(jb_ptr)) { ret = OB_ERR_NULL_VALUE; @@ -400,9 +449,9 @@ int ObIJsonBase::find_ellipsis(ObIAllocator* allocator, ObSeekParentInfo &parent } } else if (json_type() == ObJsonNodeType::J_OBJECT) { uint64_t count = element_count(); - ObIJsonBase *jb_ptr = NULL; + ObIJsonBase *jb_ptr = NULL; // set jb_ptr to stack var for (uint32_t i = 0; i < count && !is_done && OB_SUCC(ret); ++i) { - jb_ptr = NULL; // reset jb_ptr to NULL + jb_ptr = &st_json;; // reset jb_ptr to NULL ret = get_object_value(i, jb_ptr); if (OB_ISNULL(jb_ptr)) { ret = OB_ERR_NULL_VALUE; @@ -422,15 +471,16 @@ int ObIJsonBase::find_ellipsis(ObIAllocator* allocator, ObSeekParentInfo &parent int ObIJsonBase::find_array_wildcard(ObIAllocator* allocator, ObSeekParentInfo &parent_info, const JsonPathIterator &cur_node, const JsonPathIterator &last_node, const ObJsonPathBasicNode *path_node, bool is_auto_wrap, - bool only_need_one, bool is_lax, ObJsonBaseSortedVector &dup, - ObJsonBaseVector &res, PassingMap* sql_var) const + bool only_need_one, bool is_lax, ObJsonSortedResult &dup, + ObJsonSeekResult &res, PassingMap* sql_var) const { INIT_SUCC(ret); ObIJsonBase *jb_ptr = NULL; bool is_done = false; + ObJsonBin st_json(allocator_); // use stack variable instead of deep copy for (uint32_t i = 0; OB_SUCC(ret) && i < element_count() && !is_done; ++i) { - jb_ptr = NULL; // reset jb_ptr to NULL + jb_ptr = &st_json; // reset jb_ptr to stack var ret = get_array_element(i, jb_ptr); if (OB_ISNULL(jb_ptr)) { ret = OB_ERR_NULL_VALUE; @@ -450,11 +500,12 @@ int ObIJsonBase::find_array_wildcard(ObIAllocator* allocator, ObSeekParentInfo & int ObIJsonBase::find_multi_array_ranges(ObIAllocator* allocator, ObSeekParentInfo &parent_info, const JsonPathIterator &cur_node, const JsonPathIterator &last_node, const ObJsonPathBasicNode *path_node, bool is_auto_wrap, - bool only_need_one, bool is_lax, ObJsonBaseSortedVector &dup, - ObJsonBaseVector &res, PassingMap* sql_var) const + bool only_need_one, bool is_lax, ObJsonSortedResult &dup, + ObJsonSeekResult &res, PassingMap* sql_var) const { INIT_SUCC(ret); uint64_t array_size = path_node->get_multi_array_size(); + ObJsonBin st_json(allocator_); // use stack variable instead of deep copy for (uint64_t array_idx = 0; array_idx < array_size && OB_SUCC(ret); ++array_idx) { SMART_VAR (ObArrayRange, range) { @@ -463,28 +514,21 @@ int ObIJsonBase::find_multi_array_ranges(ObIAllocator* allocator, ObSeekParentIn } else { bool is_done = false; ObIJsonBase *jb_ptr = NULL; - ObJsonBaseVector hit; - ObJsonBaseSortedVector tmp_dup; + ObJsonSortedResult tmp_dup; for (uint32_t i = range.array_begin_; OB_SUCC(ret) && i < range.array_end_ && !is_done; ++i) { - jb_ptr = NULL; // reset jb_ptr to NULL + jb_ptr = &st_json; // reset jb_ptr to stack var ret = get_array_element(i, jb_ptr); if (OB_ISNULL(jb_ptr)) { ret = OB_ERR_NULL_VALUE; LOG_WARN("fail to get array child dom", K(ret), K(i)); } else if (OB_FAIL(jb_ptr->find_child(allocator, parent_info, cur_node + 1, last_node, is_auto_wrap, only_need_one, - is_lax, tmp_dup, hit, sql_var))) { + is_lax, tmp_dup, res, sql_var))) { LOG_WARN("fail to seek recursively", K(ret), K(i)); } else { is_done = is_seek_done(res, only_need_one); } } // end of search for each cell in arrar_range - - if (OB_SUCC(ret) && hit.size() > 0 ) { - for (int hit_idx = 0; hit_idx < hit.size(); ++hit_idx) { - res.push_back(hit[hit_idx]); - } - } // add the result into res } } // end of each range } @@ -494,7 +538,7 @@ int ObIJsonBase::find_multi_array_ranges(ObIAllocator* allocator, ObSeekParentIn int ObIJsonBase::find_basic_child(ObIAllocator* allocator, ObSeekParentInfo &parent_info, const JsonPathIterator &cur_node, const JsonPathIterator &last_node, bool is_auto_wrap, bool only_need_one, bool is_lax, - ObJsonBaseSortedVector &dup, ObJsonBaseVector &res, + ObJsonSortedResult &dup, ObJsonSeekResult &res, PassingMap* sql_var) const { INIT_SUCC(ret); @@ -552,7 +596,7 @@ int ObIJsonBase::find_basic_child(ObIAllocator* allocator, ObSeekParentInfo &par int ObIJsonBase::find_array_child(ObIAllocator* allocator, ObSeekParentInfo &parent_info, const JsonPathIterator &cur_node, const JsonPathIterator &last_node, bool is_auto_wrap, bool only_need_one, bool is_lax, - ObJsonBaseSortedVector &dup, ObJsonBaseVector &res, + ObJsonSortedResult &dup, ObJsonSeekResult &res, PassingMap* sql_var) const { INIT_SUCC(ret); @@ -583,7 +627,7 @@ int ObIJsonBase::find_array_child(ObIAllocator* allocator, ObSeekParentInfo &par if (!is_lax) { ret = OB_ERR_UNEXPECTED; LOG_WARN("should be oracle mode!", K(ret), K(only_need_one)); - }else if (cur_json_type == ObJsonNodeType::J_ARRAY) { + } else if (cur_json_type == ObJsonNodeType::J_ARRAY) { if (OB_FAIL(find_multi_array_ranges(allocator, parent_info, cur_node, last_node, path_node, is_auto_wrap, only_need_one, is_lax, dup, res, sql_var))) { LOG_WARN("fail in find array range.", K(ret)); @@ -646,7 +690,7 @@ int ObIJsonBase::find_array_child(ObIAllocator* allocator, ObSeekParentInfo &par int ObIJsonBase::find_abs_method(ObIAllocator* allocator, ObSeekParentInfo &parent_info, const JsonPathIterator &cur_node, const JsonPathIterator &last_node, const ObJsonPathFuncNode *path_node, bool is_auto_wrap, bool only_need_one, - bool is_lax, ObJsonBaseSortedVector &dup, ObJsonBaseVector &res) const + bool is_lax, ObJsonSortedResult &dup, ObJsonSeekResult &res) const { INIT_SUCC(ret); switch (json_type()) { @@ -742,7 +786,7 @@ int ObIJsonBase::find_abs_method(ObIAllocator* allocator, ObSeekParentInfo &pare int ObIJsonBase::find_ceiling_method(ObIAllocator* allocator, ObSeekParentInfo &parent_info, const JsonPathIterator &cur_node, const JsonPathIterator &last_node, const ObJsonPathFuncNode *path_node, bool is_auto_wrap, bool only_need_one, - bool is_lax, ObJsonBaseSortedVector &dup, ObJsonBaseVector &res) const + bool is_lax, ObJsonSortedResult &dup, ObJsonSeekResult &res) const { INIT_SUCC(ret); switch (json_type()) { @@ -814,7 +858,7 @@ int ObIJsonBase::find_ceiling_method(ObIAllocator* allocator, ObSeekParentInfo & int ObIJsonBase::find_floor_method(ObIAllocator* allocator, ObSeekParentInfo &parent_info, const JsonPathIterator &cur_node, const JsonPathIterator &last_node, const ObJsonPathFuncNode *path_node, bool is_auto_wrap, bool only_need_one, - bool is_lax, ObJsonBaseSortedVector &dup, ObJsonBaseVector &res) const + bool is_lax, ObJsonSortedResult &dup, ObJsonSeekResult &res) const { INIT_SUCC(ret); switch (json_type()) { @@ -888,14 +932,15 @@ int ObIJsonBase::find_floor_method(ObIAllocator* allocator, ObSeekParentInfo &pa int ObIJsonBase::find_numeric_item_method(ObIAllocator* allocator, ObSeekParentInfo &parent_info, const JsonPathIterator &cur_node, const JsonPathIterator &last_node, const ObJsonPathFuncNode *path_node, bool is_auto_wrap, bool only_need_one, - bool is_lax, ObJsonBaseSortedVector &dup, ObJsonBaseVector &res) const + bool is_lax, ObJsonSortedResult &dup, ObJsonSeekResult &res) const { INIT_SUCC(ret); ObIJsonBase *jb_ptr = NULL; + ObJsonBin st_json(allocator_); // use stack variable instead of deep copy if (json_type() == ObJsonNodeType::J_ARRAY && is_auto_wrap) { bool is_done = false; for (uint32_t i = 0; OB_SUCC(ret) && i < element_count() && !is_done; ++i) { - jb_ptr = NULL; // reset jb_ptr to NULL + jb_ptr = &st_json; // reset jb_ptr to stack var ret = get_array_element(i, jb_ptr); if (OB_ISNULL(jb_ptr)) { ret = OB_ERR_NULL_VALUE; @@ -960,7 +1005,7 @@ int ObIJsonBase::find_numeric_item_method(ObIAllocator* allocator, ObSeekParentI int ObIJsonBase::find_type_method(ObIAllocator* allocator, ObSeekParentInfo &parent_info, const JsonPathIterator &cur_node, const JsonPathIterator &last_node, const ObJsonPathFuncNode *path_node, bool is_auto_wrap, bool only_need_one, - bool is_lax, ObJsonBaseSortedVector &dup, ObJsonBaseVector &res) const + bool is_lax, ObJsonSortedResult &dup, ObJsonSeekResult &res) const { INIT_SUCC(ret); char* ans_char = nullptr; @@ -1050,7 +1095,7 @@ int ObIJsonBase::find_type_method(ObIAllocator* allocator, ObSeekParentInfo &par int ObIJsonBase::find_length_method(ObIAllocator* allocator, ObSeekParentInfo &parent_info, const JsonPathIterator &cur_node, const JsonPathIterator &last_node, const ObJsonPathFuncNode *path_node, bool is_auto_wrap, bool only_need_one, - bool is_lax, ObJsonBaseSortedVector &dup, ObJsonBaseVector &res) const + bool is_lax, ObJsonSortedResult &dup, ObJsonSeekResult &res) const { INIT_SUCC(ret); ObIJsonBase *jb_ptr = NULL; @@ -1106,7 +1151,7 @@ int ObIJsonBase::find_length_method(ObIAllocator* allocator, ObSeekParentInfo &p int ObIJsonBase::find_size_method(ObIAllocator* allocator, ObSeekParentInfo &parent_info, const JsonPathIterator &cur_node, const JsonPathIterator &last_node, const ObJsonPathFuncNode *path_node, bool is_auto_wrap, bool only_need_one, - bool is_lax, ObJsonBaseSortedVector &dup, ObJsonBaseVector &res) const + bool is_lax, ObJsonSortedResult &dup, ObJsonSeekResult &res) const { INIT_SUCC(ret); ObIJsonBase *jb_ptr = NULL; @@ -1139,7 +1184,7 @@ int ObIJsonBase::find_size_method(ObIAllocator* allocator, ObSeekParentInfo &par int ObIJsonBase::find_boolean_method(ObIAllocator* allocator, ObSeekParentInfo &parent_info, const JsonPathIterator &cur_node, const JsonPathIterator &last_node, const ObJsonPathFuncNode *path_node, bool is_auto_wrap, bool only_need_one, - bool is_lax, ObJsonBaseSortedVector &dup, ObJsonBaseVector &res) const + bool is_lax, ObJsonSortedResult &dup, ObJsonSeekResult &res) const { INIT_SUCC(ret); ObIJsonBase* jb_ptr = nullptr; @@ -1284,7 +1329,7 @@ bool ObIJsonBase::check_legal_ora_date(const ObString date) const int ObIJsonBase::find_date_method(ObIAllocator* allocator, ObSeekParentInfo &parent_info, const JsonPathIterator &cur_node, const JsonPathIterator &last_node, const ObJsonPathFuncNode *path_node, bool is_auto_wrap, bool only_need_one, - bool is_lax, ObJsonBaseSortedVector &dup, ObJsonBaseVector &res) const + bool is_lax, ObJsonSortedResult &dup, ObJsonSeekResult &res) const { INIT_SUCC(ret); ObIJsonBase* jb_ptr = nullptr; @@ -1373,7 +1418,7 @@ int ObIJsonBase::find_date_method(ObIAllocator* allocator, ObSeekParentInfo &par int ObIJsonBase::find_timestamp_method(ObIAllocator* allocator, ObSeekParentInfo &parent_info, const JsonPathIterator &cur_node, const JsonPathIterator &last_node, const ObJsonPathFuncNode *path_node, bool is_auto_wrap, bool only_need_one, - bool is_lax, ObJsonBaseSortedVector &dup, ObJsonBaseVector &res) const + bool is_lax, ObJsonSortedResult &dup, ObJsonSeekResult &res) const { INIT_SUCC(ret); ObIJsonBase *jb_ptr = NULL; @@ -1470,7 +1515,7 @@ int ObIJsonBase::find_timestamp_method(ObIAllocator* allocator, ObSeekParentInfo int ObIJsonBase::find_double_method(ObIAllocator* allocator, ObSeekParentInfo &parent_info, const JsonPathIterator &cur_node, const JsonPathIterator &last_node, const ObJsonPathFuncNode *path_node, bool is_auto_wrap, bool only_need_one, - bool is_lax, ObJsonBaseSortedVector &dup, ObJsonBaseVector &res) const + bool is_lax, ObJsonSortedResult &dup, ObJsonSeekResult &res) const { INIT_SUCC(ret); ObIJsonBase *jb_ptr = NULL; @@ -1553,7 +1598,7 @@ int ObIJsonBase::find_double_method(ObIAllocator* allocator, ObSeekParentInfo &p int ObIJsonBase::find_number_method(ObIAllocator* allocator, ObSeekParentInfo &parent_info, const JsonPathIterator &cur_node, const JsonPathIterator &last_node, const ObJsonPathFuncNode *path_node, bool is_auto_wrap, bool only_need_one, - bool is_lax, ObJsonBaseSortedVector &dup, ObJsonBaseVector &res) const + bool is_lax, ObJsonSortedResult &dup, ObJsonSeekResult &res) const { INIT_SUCC(ret); @@ -1620,7 +1665,7 @@ int ObIJsonBase::find_number_method(ObIAllocator* allocator, ObSeekParentInfo &p int ObIJsonBase::find_string_method(ObIAllocator* allocator, ObSeekParentInfo &parent_info, const JsonPathIterator &cur_node, const JsonPathIterator &last_node, const ObJsonPathFuncNode *path_node, bool is_auto_wrap, bool only_need_one, - bool is_lax, ObJsonBaseSortedVector &dup, ObJsonBaseVector &res) const + bool is_lax, ObJsonSortedResult &dup, ObJsonSeekResult &res) const { INIT_SUCC(ret); ObIJsonBase *jb_ptr = NULL; @@ -1700,7 +1745,7 @@ int ObIJsonBase::find_string_method(ObIAllocator* allocator, ObSeekParentInfo &p int ObIJsonBase::find_trans_method(ObIAllocator* allocator, ObSeekParentInfo &parent_info, const JsonPathIterator &cur_node, const JsonPathIterator &last_node, const ObJsonPathFuncNode *path_node, bool is_auto_wrap, bool only_need_one, - bool is_lax, ObJsonBaseSortedVector &dup, ObJsonBaseVector &res) const + bool is_lax, ObJsonSortedResult &dup, ObJsonSeekResult &res) const { INIT_SUCC(ret); ObIJsonBase *jb_ptr = NULL; @@ -1808,7 +1853,7 @@ int ObIJsonBase::find_trans_method(ObIAllocator* allocator, ObSeekParentInfo &pa int ObIJsonBase::find_func_child(ObIAllocator* allocator, ObSeekParentInfo &parent_info, const JsonPathIterator &cur_node, const JsonPathIterator &last_node, bool is_auto_wrap, bool only_need_one, bool is_lax, - ObJsonBaseSortedVector &dup, ObJsonBaseVector &res) const + ObJsonSortedResult &dup, ObJsonSeekResult &res) const { INIT_SUCC(ret); SMART_VAR (ObJsonPathFuncNode*, path_node) { @@ -2140,9 +2185,13 @@ bool ObIJsonBase::is_real_json_null(const ObIJsonBase* ptr) const { bool ret_bool = false; if (ptr->json_type() == ObJsonNodeType::J_NULL) { - ObIJsonBase* tmp = const_cast(ptr); - ObJsonNull* tmp_null = static_cast(tmp); - ret_bool = !(tmp_null->is_not_null()); + if (ptr->is_bin()) { + ret_bool = true; + } else { + ObIJsonBase* tmp = const_cast(ptr); + ObJsonNull* tmp_null = static_cast(tmp); + ret_bool = !(tmp_null->is_not_null()); + } } return ret_bool; } @@ -2214,13 +2263,14 @@ int ObIJsonBase::trans_json_node(ObIAllocator* allocator, ObIJsonBase* &scalar, // for compare ——> (subpath, scalar/sql_var) // 左边调用compare,左边遇到数组自动解包 // 只要有一个结果为true则返回true,找不到或结果为false均返回false -int ObIJsonBase::cmp_to_right_recursively(ObIAllocator* allocator, const ObJsonBaseVector& hit, +int ObIJsonBase::cmp_to_right_recursively(ObIAllocator* allocator, ObJsonSeekResult& hit, const ObJsonPathNodeType node_type, ObIJsonBase* right_arg, bool& filter_result) const { INIT_SUCC(ret); bool cmp_result = false; bool null_flag = false; + ObJsonBin st_json(allocator_); // use stack variable instead of deep copy if (OB_ISNULL(right_arg)) { ret = OB_ERR_NULL_VALUE; LOG_WARN("compare value is null.", K(ret)); @@ -2238,7 +2288,7 @@ int ObIJsonBase::cmp_to_right_recursively(ObIAllocator* allocator, const ObJsonB uint64_t size = hit[i]->element_count(); ObIJsonBase *jb_ptr = NULL; for (uint32_t array_i = 0; array_i < size && !cmp_result && OB_SUCC(ret); ++array_i) { - jb_ptr = NULL; // reset jb_ptr to NULL + jb_ptr = &st_json; // reset jb_ptr to stack var ret = hit[i]->get_array_element(array_i, jb_ptr); int cmp_res = -3; // 类型相同可以直接用compare函数比较 @@ -2302,13 +2352,14 @@ int ObIJsonBase::cmp_to_right_recursively(ObIAllocator* allocator, const ObJsonB // for compare ——> ( scalar/sql_var, subpath) // 只要有一个结果为true则返回true,找不到或结果为false均返回false -int ObIJsonBase::cmp_to_left_recursively(ObIAllocator* allocator, const ObJsonBaseVector& hit, +int ObIJsonBase::cmp_to_left_recursively(ObIAllocator* allocator, ObJsonSeekResult& hit, const ObJsonPathNodeType node_type, ObIJsonBase* left_arg, bool& filter_result) const { INIT_SUCC(ret); bool cmp_result = false; bool null_flag = false; + ObJsonBin st_json(allocator_); // use stack variable instead of deep copy if (OB_ISNULL(left_arg)) { ret = OB_ERR_NULL_VALUE; LOG_WARN("compare value is null.", K(ret)); @@ -2319,7 +2370,44 @@ int ObIJsonBase::cmp_to_left_recursively(ObIAllocator* allocator, const ObJsonBa } else if (hit[i]->json_type() == ObJsonNodeType::J_NULL && !is_real_json_null(hit[i])) { cmp_result = false; } else { - if (hit[i]->json_type() == ObJsonNodeType::J_OBJECT || hit[i]->json_type() == ObJsonNodeType::J_ARRAY) { + // error is ok + // if is array, compare with every node + // but only autowrap once + if (hit[i]->json_type() == ObJsonNodeType::J_ARRAY) { + uint64_t size = hit[i]->element_count(); + ObIJsonBase *jb_ptr = NULL; + for (uint32_t array_i = 0; array_i < size && !cmp_result && OB_SUCC(ret); ++array_i) { + jb_ptr = &st_json; // reset jb_ptr to stack var + ret = hit[i]->get_array_element(array_i, jb_ptr); + int cmp_res = -3; + // 类型相同可以直接用compare函数比较 + if(OB_FAIL(ret) || OB_ISNULL(jb_ptr)) { + ret = OB_ERR_NULL_VALUE; + LOG_WARN("compare value is null.", K(ret)); + } else if (is_same_type(left_arg, jb_ptr)) { + if (OB_SUCC(left_arg->compare((*jb_ptr), cmp_res, true))) { + cmp_based_on_node_type(node_type, cmp_res, cmp_result); + } + } else { + // 不相同的类型,oracle会将string类型转换为对应类型再进行比较 + // 转换或比较失败也正常,并不报错 + // 例如: [*].a == 123 + // 里面可能有多个元素无法转换成数字或无法和数字比较甚至找不到.a + // 但只要有一个找到,且为123或"123"则为true + ObIJsonBase* left = left_arg; + ObIJsonBase* right = jb_ptr; + if (OB_FAIL(trans_json_node(allocator, left, right))) { + // fail is normal, it is not an error. + ret = OB_SUCCESS; + cmp_result = false; + } else if (OB_SUCC(left->compare((*right), cmp_res, true))) { + cmp_based_on_node_type(node_type, cmp_res, cmp_result); + } else { + cmp_result = false; + } + } + } + } else if (hit[i]->json_type() == ObJsonNodeType::J_OBJECT) { cmp_result = false; } else { int cmp_res = -3; @@ -2621,7 +2709,7 @@ int ObIJsonBase::get_sign_result_left_subpath(ObIAllocator* allocator, ObSeekPar } else { ObPathComparison comp_content = path_node->node_content_.comp_; - SMART_VAR (ObJsonBaseVector, hit) { + SMART_VAR (ObJsonSeekResult, hit) { ObJsonPath* sub_path = comp_content.comp_left_.filter_path_; // get left arg @@ -2671,7 +2759,7 @@ int ObIJsonBase::get_sign_result_right_subpath(ObIAllocator* allocator, ObSeekPa } else { ObPathComparison comp_content = path_node->node_content_.comp_; - SMART_VAR (ObJsonBaseVector,hit) { + SMART_VAR (ObJsonSeekResult, hit) { ObJsonPath* sub_path = comp_content.comp_right_.filter_path_; // get right arg if (OB_FAIL(parent_info.parent_jb_->seek(allocator, (*sub_path), @@ -3037,12 +3125,14 @@ int ObIJsonBase::str_cmp_autowrap(ObIAllocator* allocator, const ObString& right { INIT_SUCC(ret); ObJsonNodeType j_type = json_type(); + ObJsonBin st_json(allocator_); // use stack variable instead of deep copy + ObIJsonBase* jb_ptr = NULL; if (j_type == ObJsonNodeType::J_NULL && is_real_json_null(this)) { filter_result = false; } else if (j_type == ObJsonNodeType::J_ARRAY) { if (autowrap) { for (uint32_t i = 0; OB_SUCC(ret) && i < element_count() && !filter_result; ++i) { - ObIJsonBase* jb_ptr = NULL; // reset jb_ptr to NULL + jb_ptr = &st_json; // reset jb_ptr to stack var ret = get_array_element(i, jb_ptr); if (OB_ISNULL(jb_ptr)) { ret = OB_ERR_NULL_VALUE; @@ -3124,7 +3214,7 @@ int ObIJsonBase::get_str_comp_result(ObIAllocator* allocator, ObSeekParentInfo & if (OB_SUCC(ret) && !end_comp && comp_content.left_type_ == ObJsonPathNodeType::JPN_SUB_PATH) { // compare recursively - SMART_VAR (ObJsonBaseVector, hit) { + SMART_VAR (ObJsonSeekResult, hit) { ObJsonPath* sub_path = comp_content.comp_left_.filter_path_; if (sub_path->path_not_str()) { // 如果最后一个节点的类型是item function,且返回值一定不为string,如number/abs/length...会报错 @@ -3193,7 +3283,7 @@ int ObIJsonBase::find_comp_result(ObIAllocator* allocator, ObSeekParentInfo &par } else { ObJsonPath* sub_path = path_node->node_content_.comp_.comp_right_.filter_path_; ObJsonPathNodeType last_node_type = sub_path->get_last_node_type(); - SMART_VAR (ObJsonBaseVector, hit) { + SMART_VAR (ObJsonSeekResult, hit) { if (OB_FAIL(parent_info.parent_jb_->seek(allocator, (*sub_path), sub_path->path_node_cnt(), true, true, true, hit, sql_var))) { // 查找失败则直接将结果视为false @@ -3320,7 +3410,7 @@ int ObIJsonBase::find_cond_result(ObIAllocator* allocator, ObSeekParentInfo &par int ObIJsonBase::find_filter_child(ObIAllocator* allocator, ObSeekParentInfo &parent_info, const JsonPathIterator &cur_node, const JsonPathIterator &last_node, bool is_auto_wrap, bool only_need_one, bool is_lax, - ObJsonBaseSortedVector &dup, ObJsonBaseVector &res, + ObJsonSortedResult &dup, ObJsonSeekResult &res, PassingMap* sql_var) const { INIT_SUCC(ret); @@ -3374,7 +3464,7 @@ int ObIJsonBase::find_filter_child(ObIAllocator* allocator, ObSeekParentInfo &pa int ObIJsonBase::find_child(ObIAllocator* allocator, ObSeekParentInfo &parent_info, const JsonPathIterator &cur_node, const JsonPathIterator &last_node, bool is_auto_wrap, bool only_need_one, bool is_lax, - ObJsonBaseSortedVector &dup, ObJsonBaseVector &res, + ObJsonSortedResult &dup, ObJsonSeekResult &res, PassingMap* sql_var) const { INIT_SUCC(ret); @@ -3382,7 +3472,7 @@ int ObIJsonBase::find_child(ObIAllocator* allocator, ObSeekParentInfo &parent_in // If the path expression is already at the end, the current DOM is the res, // and it is added to the res if (cur_node == last_node) { - add_if_missing(dup, res); + ret = add_if_missing(dup, res, allocator); } else { ObJsonNodeType cur_json_type = json_type(); ObJsonPathNodeType cur_node_type = (*cur_node)->get_node_type(); @@ -3473,9 +3563,7 @@ int ObIJsonBase::print_array(ObJsonBuffer &j_buf, uint64_t depth, bool is_pretty } else { ObIJsonBase *jb_ptr = NULL; ObJsonBin j_bin(allocator_); - if (is_bin()) { - jb_ptr = &j_bin; - } + jb_ptr = &j_bin; if (OB_FAIL(get_array_element(i, jb_ptr))) { LOG_WARN("fail to get array element", K(ret), K(depth), K(i)); } else if (OB_FAIL(jb_ptr->print(j_buf, true, is_pretty, depth))) { @@ -3497,6 +3585,19 @@ int ObIJsonBase::print_array(ObJsonBuffer &j_buf, uint64_t depth, bool is_pretty return ret; } +int ObIJsonBase::pint_colon(ObJsonBuffer &j_buf, bool is_pretty) const +{ + INIT_SUCC(ret); + if (lib::is_oracle_mode() && is_pretty) { + if (OB_FAIL(j_buf.append(" : "))) { + LOG_WARN("fail to append \" : \"", K(ret)); + } + } else if (OB_FAIL(j_buf.append(":"))) { + LOG_WARN("fail to append \":\"", K(ret)); + } + return ret; +} + int ObIJsonBase::print_object(ObJsonBuffer &j_buf, uint64_t depth, bool is_pretty) const { INIT_SUCC(ret); @@ -3520,16 +3621,14 @@ int ObIJsonBase::print_object(ObJsonBuffer &j_buf, uint64_t depth, bool is_prett LOG_WARN("fail to newline and indent", K(ret), K(depth), K(i), K(key)); } else if (!key.empty() && OB_FAIL(ObJsonBaseUtil::append_string(j_buf, true, key.ptr(), key.length()))) { // key LOG_WARN("fail to print string", K(ret), K(depth), K(i), K(key)); - } else if (OB_FAIL(j_buf.append(":"))) { + } else if (OB_FAIL(pint_colon(j_buf, is_pretty))) { LOG_WARN("fail to append \":\"", K(ret), K(depth), K(i), K(key)); } else if (lib::is_mysql_mode() && OB_FAIL(j_buf.append(" "))) { LOG_WARN("fail to append \" \"", K(ret), K(depth), K(i), K(key)); } else { ObIJsonBase *jb_ptr = NULL; ObJsonBin j_bin(allocator_); - if (is_bin()) { - jb_ptr = &j_bin; - } + jb_ptr = &j_bin; if (OB_FAIL(get_object_value(i, jb_ptr))) { LOG_WARN("fail to get object value", K(ret), K(i), K(is_pretty), K(depth)); } else if (OB_FAIL(jb_ptr->print(j_buf, true, is_pretty, depth))) { // value @@ -3617,7 +3716,7 @@ int ObIJsonBase::print_float(ObJsonBuffer &j_buf) const if (OB_FAIL(j_buf.reserve(FLOAT_TO_STRING_CONVERSION_BUFFER_SIZE + 1))) { LOG_WARN("fail to reserve memory for j_buf", K(ret)); } else { - double val = get_double(); + double val = get_float(); char *start = j_buf.ptr() + j_buf.length(); uint64_t len = ob_gcvt(val, ob_gcvt_arg_type::OB_GCVT_ARG_FLOAT, FLOAT_TO_STRING_CONVERSION_BUFFER_SIZE, start, NULL); @@ -3911,9 +4010,7 @@ int ObIJsonBase::calc_json_hash_value(uint64_t val, hash_algo hash_func, uint64_ uint64_t size = element_count(); ObIJsonBase *jb_ptr = NULL; ObJsonBin j_bin(allocator_); - if (is_bin()) { - jb_ptr = &j_bin; - } + jb_ptr = &j_bin; for (uint64_t i = 0; i < size && OB_SUCC(ret); i++) { if (OB_FAIL(get_array_element(i, jb_ptr))) { LOG_WARN("fail to get this json array element", K(ret), K(i), K(size)); @@ -3936,9 +4033,7 @@ int ObIJsonBase::calc_json_hash_value(uint64_t val, hash_algo hash_func, uint64_ ObString key; ObIJsonBase *jb_ptr = NULL; ObJsonBin j_bin(allocator_); - if (is_bin()) { - jb_ptr = &j_bin; - } + jb_ptr = &j_bin; for (uint64_t i = 0; OB_SUCC(ret) && i < count; i++) { if (OB_FAIL(get_key(i, key))) { LOG_WARN("failed to get key", K(ret), K(i)); @@ -4077,10 +4172,8 @@ int ObIJsonBase::compare_array(const ObIJsonBase &other, int &res) const ObIJsonBase *jb_b_ptr = NULL; ObJsonBin j_bin_a(allocator_); ObJsonBin j_bin_b(allocator_); - if (is_bin()) { - jb_a_ptr = &j_bin_a; - jb_b_ptr = &j_bin_b; - } + jb_a_ptr = &j_bin_a; + jb_b_ptr = &j_bin_b; if (OB_FAIL(get_array_element(i, jb_a_ptr))) { LOG_WARN("fail to get this json array element", K(ret), K(i), K(size_a)); } else if (OB_FAIL(other.get_array_element(i, jb_b_ptr))) { @@ -4129,10 +4222,8 @@ int ObIJsonBase::compare_object(const ObIJsonBase &other, int &res) const ObIJsonBase *jb_b_ptr = NULL; ObJsonBin j_bin_a(allocator_); ObJsonBin j_bin_b(allocator_); - if (is_bin()) { - jb_a_ptr = &j_bin_a; - jb_b_ptr = &j_bin_b; - } + jb_a_ptr = &j_bin_a; + jb_b_ptr = &j_bin_b; // Compare value. if (OB_FAIL(get_object_value(i, jb_a_ptr))) { LOG_WARN("fail to get this json obj element", K(ret), K(i), K(len_a)); @@ -4531,7 +4622,7 @@ static constexpr int type_comparison[JSON_TYPE_NUM][JSON_TYPE_NUM] = { /* 9 DATE */ {1, 1, 1, 1, 1, 1, 1, 1, 1, 0, -1, -1, -1, -1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2}, /* 10 TIME */ {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, -1, -1, -1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2}, /* 11 DATETIME */ {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, -1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2}, - /* 12 TIMESTAMP */ {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, -1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2}, + /* 12 TIMESTAMP */ {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, -1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 2, 2, 2, 2}, /* 13 OPAQUE */ {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2}, /* 14 empty */ {2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2}, /* ORACLE MODE */ @@ -4546,7 +4637,7 @@ static constexpr int type_comparison[JSON_TYPE_NUM][JSON_TYPE_NUM] = { /* 23 ORAWID */ {2, 2, 2, 2, 2, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2}, /* 24 ORACLEDATE*/ {2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 2, 2, 2}, /* 25 ODATE */ {2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 2, 2, 2}, - /* 26 OTIMESTAMP */ {2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 2, 2, 2}, + /* 26 OTIMESTAMP */ {2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 2, 2, 2}, /* 27 TIMESTAMPTZ*/ {2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 2, 2, 2}, /* 28 ODAYSECOND */ {2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 2, 2}, /* 29 OYEARMONTH */ {2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 2}, @@ -4801,14 +4892,14 @@ int ObIJsonBase::get_used_size(uint64_t &size) if (is_bin()) { const ObJsonBin *j_bin = static_cast(this); - size = j_bin->get_serialize_size(); + ret = j_bin->get_used_bytes(size); } else { // is tree ObArenaAllocator allocator; ObIJsonBase *j_bin = NULL; if (OB_FAIL(ObJsonBaseFactory::transform(&allocator, this, ObJsonInType::JSON_BIN, j_bin))) { LOG_WARN("fail to transform to tree", K(ret)); } else { - size = static_cast(j_bin)->get_serialize_size(); + ret = static_cast(j_bin)->get_used_bytes(size); } } @@ -4863,6 +4954,32 @@ int ObIJsonBase::get_raw_binary(common::ObString &out, ObIAllocator *allocator) return ret; } +int ObIJsonBase::get_raw_binary_v0(common::ObString &out, ObIAllocator *allocator) +{ + INIT_SUCC(ret); + + if (is_bin()) { + const ObJsonBin *j_bin = static_cast(this); + if (OB_FAIL(j_bin->get_raw_binary_v0(out, allocator))) { + LOG_WARN("fail to get raw binary", K(ret), K(*j_bin)); + } + } else { // is tree + if (OB_ISNULL(allocator)) { // check param + ret = OB_INVALID_ARGUMENT; + LOG_WARN("param allocator is null", K(ret)); + } else { + ObIJsonBase *j_bin = NULL; + if (OB_FAIL(ObJsonBaseFactory::transform(allocator, this, ObJsonInType::JSON_BIN, j_bin))) { + LOG_WARN("fail to transform to tree", K(ret)); + } else if (OB_FAIL(static_cast(j_bin)->get_raw_binary_v0(out, allocator))) { + LOG_WARN("fail to get raw binary after transforming", K(ret), K(*j_bin)); + } + } + } + + return ret; +} + JsonObjectIterator ObIJsonBase::object_iterator() const { return JsonObjectIterator(this); @@ -5661,14 +5778,6 @@ int ObIJsonBase::to_time(int64_t &value) const ret = OB_OPERATE_OVERFLOW; break; } - case ObJsonNodeType::J_INT: { - int64_t in_val = get_int(); - if (OB_FAIL(ObTimeConverter::int_to_time(in_val, time))) { - LOG_WARN("int_to_date failed", K(ret), K(in_val), K(time)); - } - break; - } - default: { ret = OB_ERR_UNEXPECTED; LOG_WARN("fail to cast json type to time", K(ret), K(json_type())); @@ -5838,6 +5947,7 @@ int ObIJsonBase::to_bit(uint64_t &value) const return ret; } + int ObJsonBaseFactory::get_json_base(ObIAllocator *allocator, const ObString &buf, ObJsonInType in_type, ObJsonInType expect_type, ObIJsonBase *&out, uint32_t parse_flag) @@ -5851,8 +5961,20 @@ int ObJsonBaseFactory::get_json_base(ObIAllocator *allocator, const char *ptr, u { INIT_SUCC(ret); void *buf = NULL; + ObJsonBin *j_bin = NULL; + ObArenaAllocator tmp_allocator; + bool is_schema = HAS_FLAG(parse_flag, ObJsonParser::JSN_SCHEMA_FLAG); + ObIAllocator* t_allocator = allocator; - if (OB_ISNULL(allocator)) { // check allocator + if (OB_NOT_NULL(out) && out->is_bin()) { + buf = out; + j_bin = static_cast(out); + allocator = out->get_allocator(); + } + if (in_type != expect_type) { + t_allocator = &tmp_allocator; + } + if (OB_ISNULL(allocator) || OB_ISNULL(t_allocator)) { // check allocator ret = OB_ERR_NULL_VALUE; LOG_WARN("param allocator is NULL", K(ret), KP(allocator), KP(ptr)); } else if (OB_ISNULL(ptr) || length == 0) { @@ -5866,35 +5988,47 @@ int ObJsonBaseFactory::get_json_base(ObIAllocator *allocator, const char *ptr, u LOG_WARN("param expect_type is invalid", K(ret), K(expect_type)); } else if (in_type == ObJsonInType::JSON_TREE) { ObJsonNode *j_tree = NULL; - if (OB_FAIL(ObJsonParser::get_tree(allocator, ptr, length, j_tree, parse_flag))) { + if (OB_FAIL(ObJsonParser::get_tree(t_allocator, ptr, length, j_tree, parse_flag))) { LOG_WARN("fail to get json tree", K(ret), K(length), K(in_type), K(expect_type)); } else if (expect_type == ObJsonInType::JSON_TREE) { out = j_tree; } else { // expect json bin - if (OB_ISNULL(buf = allocator->alloc(sizeof(ObJsonBin)))) { + if (OB_NOT_NULL(j_bin)) { + } else if (OB_ISNULL(buf = allocator->alloc(sizeof(ObJsonBin)))) { ret = OB_ALLOCATE_MEMORY_FAILED; LOG_WARN("fail to alloc memory", K(ret), K(in_type), K(expect_type), K(sizeof(ObJsonBin))); } else { - ObJsonBin *j_bin = new (buf) ObJsonBin(allocator); - if (OB_FAIL(j_bin->parse_tree(j_tree))) { - LOG_WARN("fail to parse tree", K(ret), K(in_type), K(expect_type), K(*j_tree)); - } else { - out = j_bin; - } + j_bin = new (buf) ObJsonBin(allocator); + } + if (OB_FAIL(ret)) { + } else if (OB_ISNULL(j_bin) || !j_bin->is_bin()) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("j_bin can not be null", K(ret)); + } else if (OB_FAIL(j_bin->parse_tree(j_tree))) { + LOG_WARN("fail to parse tree", K(ret), K(in_type), K(expect_type), K(*j_tree)); + } else { + out = j_bin; } } } else if (in_type == ObJsonInType::JSON_BIN) { - if (OB_ISNULL(buf = allocator->alloc(sizeof(ObJsonBin)))) { + if (OB_NOT_NULL(buf)) { + } else if (OB_ISNULL(buf = allocator->alloc(sizeof(ObJsonBin)))) { ret = OB_ALLOCATE_MEMORY_FAILED; LOG_WARN("fail to alloc memory", K(ret), K(in_type), K(expect_type), K(sizeof(ObJsonBin))); - } else { - ObJsonBin *j_bin = new (buf) ObJsonBin(ptr, length, allocator); + } + if (OB_SUCC(ret)) { + j_bin = new (buf) ObJsonBin(ptr, length, allocator); if (OB_FAIL(j_bin->reset_iter())) { LOG_WARN("fail to reset iter", K(ret), K(in_type), K(expect_type)); } else if (expect_type == ObJsonInType::JSON_BIN) { - out = j_bin; + if (is_schema && OB_FAIL(ObJsonBaseUtil::check_json_schema_ref_def(*allocator, j_bin))) { + LOG_WARN("fail to check json sceham", K(ret), K(in_type), K(expect_type)); + } else { + out = j_bin; + } } else { // expect json tree ObJsonNode *j_tree = NULL; + j_bin->set_is_schema(is_schema); if (OB_FAIL(j_bin->to_tree(j_tree))) { LOG_WARN("fail to change bin to tree", K(ret), K(in_type), K(expect_type), K(*j_bin)); } else { @@ -6586,6 +6720,17 @@ int ObJsonBaseUtil::get_bit_len(const ObString &str, int32_t &bit_len) return ret; } +int32_t ObJsonBaseUtil::get_bit_len(uint64_t value) +{ + int32_t bit_len = 0; + if (0 == value) { + bit_len = 1; + } else { + bit_len = static_cast(sizeof(unsigned long long) * 8 - __builtin_clzll(value)); + } + return bit_len; +} + int ObJsonBaseUtil::get_bit_len(uint64_t value, int32_t &bit_len) { int ret = OB_SUCCESS; @@ -6719,6 +6864,39 @@ bool ObJsonBaseUtil::binary_search(ObSortedVector &vec, ObIJsonBa return is_found; } +int ObJsonBaseUtil::check_json_schema_ref_def(ObIAllocator& allocator, ObIJsonBase* json_doc) +{ + INIT_SUCC(ret); + if (OB_ISNULL(json_doc)) { + ret = OB_ERR_NULL_VALUE; + LOG_WARN("should not be null", K(ret)); + } else { + ObJsonSeekResult hit; + common::ObString path_ref; + path_ref = lib::is_mysql_mode() ? "$**.\"$ref\"" : "$..\"$ref\""; + ObJsonPath j_path(path_ref, &allocator); + if (OB_FAIL(j_path.parse_path())) { + LOG_WARN("fail to parse json path", K(ret)); + } else if (OB_FAIL(json_doc->seek(j_path, j_path.path_node_cnt(), false, false, hit))) { + LOG_WARN("fail to seek $ref definition", K(ret)); + } else { + for (int i = 0; OB_SUCC(ret) && i < hit.size(); ++i) { + if (OB_ISNULL(hit[i])) { + ret = OB_ERR_NULL_VALUE; + LOG_WARN("should not be null", K(ret)); + } else if (hit[i]->json_type() == ObJsonNodeType::J_STRING) { + ObString str(hit[i]->get_data_length(), hit[i]->get_data()); + if (str.length() > 0 && str[0] != '#') { + ret = OB_ERR_UNSUPPROTED_REF_IN_JSON_SCHEMA; + LOG_WARN("unsupported ref in json schema", K(ret)); + } + } + } // check value of "$ref", if is string must begin with "#" + } + } + return ret; +} + int ObJsonHashValue::calc_time(ObDTMode dt_mode, const ObIJsonBase *jb) { INIT_SUCC(ret); @@ -6791,5 +6969,53 @@ void JsonObjectIterator::next() curr_element_++; } +int ObJsonSeekResult::push_node(ObIJsonBase *node) +{ + INIT_SUCC(ret); + if (size_ == 0) { + res_point_ = node; + } else if (res_vector_.remain() == 0 && OB_FAIL(res_vector_.reserve(OB_PATH_RESULT_EXPAND_SIZE))) { + LOG_WARN("fail to expand vactor", K(ret)); + } else if (OB_FAIL(res_vector_.push_back(node))) { + LOG_WARN("fail to push_back value into result", K(ret), K(res_vector_.size())); + } + size_ ++; + return ret; +} + +ObIJsonBase* ObJsonSeekResult::get_node(const int idx) const +{ + ObIJsonBase* res = NULL; + if (idx >= size_ || idx < 0) { + } else if (idx == 0) { + res = res_point_; + } else { + res = res_vector_[idx - 1]; + } + return res; +} + +ObIJsonBase* ObJsonSeekResult::last() +{ + ObIJsonBase* res = NULL; + if (size_ == 0) { + } else if (size_ == 1) { + res = res_point_; + } else { + res = *res_vector_.last(); + } + return res; +} + +void ObJsonSeekResult::set_node(int idx, ObIJsonBase* node) +{ + if (idx >= size_ || idx < 0) { + } else if (idx == 0) { + res_point_ = node; + } else { + res_vector_[idx - 1] = node; + } +} + } // namespace common } // namespace oceanbase diff --git a/deps/oblib/src/lib/json_type/ob_json_base.h b/deps/oblib/src/lib/json_type/ob_json_base.h index 2d7b193c78..4f82e1d63c 100644 --- a/deps/oblib/src/lib/json_type/ob_json_base.h +++ b/deps/oblib/src/lib/json_type/ob_json_base.h @@ -81,11 +81,60 @@ enum class JsonOpaqueType #define FROM_BLOB_FLAG 1 #define HAS_BLOB_FLAG(flags) ((flags) & 1) -typedef common::ObVector ObJsonBaseVector; -typedef common::ObSortedVector ObJsonBaseSortedVector; +const static int8_t OB_PATH_RESULT_EXPAND_SIZE = 2; + +typedef PageArena JsonBaseArena; +typedef common::ObVector ObJsonBaseVector; +typedef common::ObSortedVector ObJsonBaseSortedVector; typedef std::pair ObJsonObjPair; typedef common::hash::ObHashMap PassingMap; +struct ObJsonSeekResult { + explicit ObJsonSeekResult() : size_(0), res_point_(nullptr), res_vector_() {} + ~ObJsonSeekResult() {} + int push_node(ObIJsonBase *node); + ObIJsonBase* get_node(int idx) const; + ObIJsonBase* last(); + int size() { return size_; } + void reset() { + size_ = 0; + res_point_ = nullptr; + res_vector_.reset(); + } + void clear() { + size_ = 0; + res_point_ = nullptr; + res_vector_.clear(); + } + OB_INLINE ObIJsonBase *operator[](const int index) const { + return get_node(index); + } + + void set_node(int idx, ObIJsonBase* node); + + // node size + int size_; + // single res point + ObIJsonBase *res_point_; + // multi res point + ObJsonBaseVector res_vector_; + +}; + +struct ObJsonSortedResult { + explicit ObJsonSortedResult() : size_(0), json_point_(nullptr), sort_vector_() {} + ~ObJsonSortedResult() {} + int insert_unique(ObIJsonBase *node); + + // node size + int size_; + // single res point not use compare vector + ObIJsonBase *json_point_; + // multi res point + ObJsonBaseSortedVector sort_vector_; + +}; + typedef struct ObSeekParentInfo { ObIJsonBase *parent_jb_; @@ -134,6 +183,7 @@ public: OB_INLINE bool is_bin() const { return get_internal_type() == ObJsonInType::JSON_BIN; } OB_INLINE ObIAllocator *get_allocator() { return allocator_; } OB_INLINE void set_allocator(ObIAllocator *allocator) { allocator_ = allocator; } + virtual int reset() { return OB_SUCCESS; } public: // Get internal json type(tree or binary). // @@ -182,6 +232,7 @@ public: // @param [out] value The result. // @return Returns OB_SUCCESS on success, error code otherwise. virtual int get_object_value(uint64_t index, ObIJsonBase *&value) const = 0; + virtual int get_object_value(uint64_t index, ObString &key, ObIJsonBase *&value) const = 0; // Gey object value by key. // @@ -246,9 +297,9 @@ public: // @return Returns OB_SUCCESS on success, error code otherwise. virtual bool is_real_json_null(const ObIJsonBase* ptr) const; virtual int seek(const ObJsonPath &path, uint32_t node_cnt, bool is_auto_wrap, - bool only_need_one, ObJsonBaseVector &res, PassingMap* sql_var = NULL) const; + bool only_need_one, ObJsonSeekResult &res, PassingMap* sql_var = NULL) const; virtual int seek(ObIAllocator* allocator, const ObJsonPath &path, uint32_t node_cnt, bool is_auto_wrap, - bool only_need_one, bool is_lax, ObJsonBaseVector &res, PassingMap* sql_var = NULL) const; + bool only_need_one, bool is_lax, ObJsonSeekResult &res, PassingMap* sql_var = NULL) const; // Change json to string // // @param [in, out] j_buf The dest buf. @@ -302,6 +353,8 @@ public: // @param [out] out The string of json binary. // @return Returns OB_SUCCESS on success, error code otherwise. virtual int get_raw_binary(common::ObString &out, ObIAllocator *allocator = NULL); + // for old version that without json doc header + virtual int get_raw_binary_v0(common::ObString &out, ObIAllocator *allocator = NULL); // get object_iterator // @@ -374,6 +427,7 @@ private: // @param [in] is_pretty Whether is from json funcion JSON_PRETTY or not. // @return Returns OB_SUCCESS on success, error code otherwise. int print_object(ObJsonBuffer &j_buf, uint64_t depth, bool is_pretty) const; + int pint_colon(ObJsonBuffer &j_buf, bool is_pretty) const; // Change json decimal to string. // @@ -456,16 +510,18 @@ private: // @param [in] res The result. // @param [in] only_need_one Whether only need one result or not. // @return Returns true if finished, false otherwise. - OB_INLINE bool is_seek_done(ObJsonBaseVector &res, bool only_need_one) const + OB_INLINE bool is_seek_done(ObJsonSeekResult &res, bool only_need_one) const { return (only_need_one && res.size() > 0); } + // append node to sort vector, if size == 1, then ignore + int append_node_to_sort_vector(ObJsonSortedResult &dup, ObJsonSeekResult& res, const ObIJsonBase* node) const; // Store the seeking results of all path statements to hits. // // @param [in] dup The answer found in the current path expression, preventing repeated additions. // @param [in] res The result of seeking. // @return Returns OB_SUCCESS on success, error code otherwise. - int add_if_missing(ObJsonBaseSortedVector &dup, ObJsonBaseVector &res) const; + int add_if_missing(ObJsonSortedResult &dup, ObJsonSeekResult &res, ObIAllocator* allocator) const; // Find in ellipsis. // // @param [in] cur_node The current path node. @@ -478,7 +534,7 @@ private: // @return Returns OB_SUCCESS on success, error code otherwise. int find_ellipsis(const JsonPathIterator &cur_node, const JsonPathIterator &last_node, const JsonPathIterator &next_node, bool is_auto_wrap, bool only_need_one, - ObJsonBaseSortedVector &dup, ObJsonBaseVector &res) const; + ObJsonSortedResult &dup, ObJsonSeekResult &res) const; // Find in array range. // @@ -492,8 +548,8 @@ private: int find_array_range(ObIAllocator* allocator, ObSeekParentInfo &parent_info, const JsonPathIterator &cur_node, const JsonPathIterator &last_node, const ObJsonPathBasicNode *path_node, bool is_auto_wrap, - bool only_need_one, bool is_lax, ObJsonBaseSortedVector &dup, - ObJsonBaseVector &res, PassingMap* sql_var = NULL) const; + bool only_need_one, bool is_lax, ObJsonSortedResult &dup, + ObJsonSeekResult &res, PassingMap* sql_var = NULL) const; // Find in array cell. // // @param [in] cur_node The current path node. @@ -506,8 +562,8 @@ private: int find_array_cell(ObIAllocator* allocator, ObSeekParentInfo &parent_info, const JsonPathIterator &cur_node, const JsonPathIterator &last_node, const ObJsonPathBasicNode *path_node, bool is_auto_wrap, - bool only_need_one, bool is_lax, ObJsonBaseSortedVector &dup, - ObJsonBaseVector &res, PassingMap* sql_var = NULL) const; + bool only_need_one, bool is_lax, ObJsonSortedResult &dup, + ObJsonSeekResult &res, PassingMap* sql_var = NULL) const; // Find in member wildcard. // @@ -520,8 +576,8 @@ private: // @return Returns OB_SUCCESS on success, error code otherwise. int find_member_wildcard(const JsonPathIterator &next_node, const JsonPathIterator &last_node, bool is_auto_wrap, - bool only_need_one, ObJsonBaseSortedVector &dup, - ObJsonBaseVector &res) const; + bool only_need_one, ObJsonSortedResult &dup, + ObJsonSeekResult &res) const; // Find in member. // @@ -534,7 +590,7 @@ private: // @return Returns OB_SUCCESS on success, error code otherwise. int find_member(const JsonPathIterator &next_node, const JsonPathIterator &last_node, const ObJsonPathBasicNode *path_node, bool is_auto_wrap, bool only_need_one, - ObJsonBaseSortedVector &dup, ObJsonBaseVector &res) const; + ObJsonSortedResult &dup, ObJsonSeekResult &res) const; // find in oracle wildcard: .. (allocator can not be null) // @@ -548,8 +604,8 @@ private: int find_ellipsis(ObIAllocator* allocator, ObSeekParentInfo &parent_info, const JsonPathIterator &cur_node, const JsonPathIterator &last_node, const ObJsonPathBasicNode *path_node, bool is_auto_wrap, - bool only_need_one, bool is_lax, ObJsonBaseSortedVector &dup, - ObJsonBaseVector &res, PassingMap* sql_var) const; + bool only_need_one, bool is_lax, ObJsonSortedResult &dup, + ObJsonSeekResult &res, PassingMap* sql_var) const; // find in oracle array wildcard:[*] (allocator can not be null) // // @param [in] allocator The json allocator. @@ -562,8 +618,8 @@ private: int find_array_wildcard(ObIAllocator* allocator, ObSeekParentInfo &parent_info, const JsonPathIterator &cur_node, const JsonPathIterator &last_node, const ObJsonPathBasicNode *path_node, bool is_auto_wrap, - bool only_need_one, bool is_lax, ObJsonBaseSortedVector &dup, - ObJsonBaseVector &res, PassingMap* sql_var = NULL) const; + bool only_need_one, bool is_lax, ObJsonSortedResult &dup, + ObJsonSeekResult &res, PassingMap* sql_var = NULL) const; // find in oracle array_range:[index, index1 to index2, last-index3,...] // // @param [in] allocator The json allocator. @@ -576,8 +632,8 @@ private: int find_multi_array_ranges(ObIAllocator* allocator, ObSeekParentInfo &parent_info, const JsonPathIterator &cur_node, const JsonPathIterator &last_node, const ObJsonPathBasicNode *path_node, bool is_auto_wrap, - bool only_need_one, bool is_lax, ObJsonBaseSortedVector &dup, - ObJsonBaseVector &res, PassingMap* sql_var = NULL) const; + bool only_need_one, bool is_lax, ObJsonSortedResult &dup, + ObJsonSeekResult &res, PassingMap* sql_var = NULL) const; // Find in member wildcard: .* // // @param [in] allocator The json allocator. @@ -590,8 +646,8 @@ private: int find_member_wildcard(ObIAllocator* allocator, ObSeekParentInfo &parent_info, const JsonPathIterator &cur_node, const JsonPathIterator &last_node, const ObJsonPathBasicNode *path_node, bool is_auto_wrap, - bool only_need_one, bool is_lax, ObJsonBaseSortedVector &dup, - ObJsonBaseVector &res, PassingMap* sql_var = NULL) const; + bool only_need_one, bool is_lax, ObJsonSortedResult &dup, + ObJsonSeekResult &res, PassingMap* sql_var = NULL) const; // Find in member: .keyname // // @param [in] allocator The json allocator. @@ -604,8 +660,8 @@ private: int find_member(ObIAllocator* allocator, ObSeekParentInfo &parent_info, const JsonPathIterator &cur_node, const JsonPathIterator &last_node, const ObJsonPathBasicNode *path_node, bool is_auto_wrap, - bool only_need_one, bool is_lax, ObJsonBaseSortedVector &dup, - ObJsonBaseVector &res, PassingMap* sql_var = NULL) const; + bool only_need_one, bool is_lax, ObJsonSortedResult &dup, + ObJsonSeekResult &res, PassingMap* sql_var = NULL) const; // Find in basic_node, including: .., .*, [*], .keyname, [index, index1 to index2, last-index3,...] // // @param [in] allocator The json allocator. @@ -618,12 +674,12 @@ private: int find_basic_child(ObIAllocator* allocator, ObSeekParentInfo &parent_info, const JsonPathIterator &cur_node, const JsonPathIterator &last_node, bool is_auto_wrap, bool only_need_one, bool is_lax, - ObJsonBaseSortedVector &dup, ObJsonBaseVector &res, + ObJsonSortedResult &dup, ObJsonSeekResult &res, PassingMap* sql_var = NULL) const; int find_array_child(ObIAllocator* allocator, ObSeekParentInfo &parent_info, const JsonPathIterator &cur_node, const JsonPathIterator &last_node, bool is_auto_wrap, bool only_need_one, bool is_lax, - ObJsonBaseSortedVector &dup, ObJsonBaseVector &res, + ObJsonSortedResult &dup, ObJsonSeekResult &res, PassingMap* sql_var = NULL) const; // item_function:ceiling(). (allocator can not be null) // legal for number @@ -638,7 +694,7 @@ private: int find_ceiling_method(ObIAllocator* allocator, ObSeekParentInfo &parent_info, const JsonPathIterator &cur_node, const JsonPathIterator &last_node, const ObJsonPathFuncNode *path_node, bool is_auto_wrap, bool only_need_one, - bool is_lax, ObJsonBaseSortedVector &dup, ObJsonBaseVector &res) const;// According to the path node, recursively query results dow. + bool is_lax, ObJsonSortedResult &dup, ObJsonSeekResult &res) const;// According to the path node, recursively query results dow. // item_function:floor(). (allocator can not be null) // legal for number // @param [in] allocator The json allocator. @@ -652,7 +708,7 @@ private: int find_floor_method(ObIAllocator* allocator, ObSeekParentInfo &parent_info, const JsonPathIterator &cur_node, const JsonPathIterator &last_node, const ObJsonPathFuncNode *path_node, bool is_auto_wrap, bool only_need_one, - bool is_lax, ObJsonBaseSortedVector &dup, ObJsonBaseVector &res) const; + bool is_lax, ObJsonSortedResult &dup, ObJsonSeekResult &res) const; // item_function:abs(). (allocator can not be null) // legal for number // @param [in] allocator The json allocator. @@ -666,7 +722,7 @@ private: int find_abs_method(ObIAllocator* allocator, ObSeekParentInfo &parent_info, const JsonPathIterator &cur_node, const JsonPathIterator &last_node, const ObJsonPathFuncNode *path_node, bool is_auto_wrap, bool only_need_one, - bool is_lax, ObJsonBaseSortedVector &dup, ObJsonBaseVector &res) const; + bool is_lax, ObJsonSortedResult &dup, ObJsonSeekResult &res) const; // numeric_item_method:abs(), floor(), ceiling(). (allocator can not be null) // legal for number // @param [in] allocator The json allocator. @@ -679,7 +735,7 @@ private: // @return Returns OB_SUCCESS on success, error code otherwise. int find_numeric_item_method (ObIAllocator* allocator, ObSeekParentInfo &parent_info, const JsonPathIterator &cur_node, const JsonPathIterator &last_node,const ObJsonPathFuncNode *path_node, bool is_auto_wrap, bool only_need_one, - bool is_lax, ObJsonBaseSortedVector &dup, ObJsonBaseVector &res) const; + bool is_lax, ObJsonSortedResult &dup, ObJsonSeekResult &res) const; // item_function:type(). (allocator can not be null) /* returning J_STRING: "null" for a value of null. @@ -700,7 +756,7 @@ private: int find_type_method(ObIAllocator* allocator, ObSeekParentInfo &parent_info, const JsonPathIterator &cur_node, const JsonPathIterator &last_node, const ObJsonPathFuncNode *path_node, bool is_auto_wrap, bool only_need_one, - bool is_lax, ObJsonBaseSortedVector &dup, ObJsonBaseVector &res) const; + bool is_lax, ObJsonSortedResult &dup, ObJsonSeekResult &res) const; // item_function:size(). (allocator can not be null) // returning uint: the number of elements in an array, or 1 for a scalar or an object // @param [in] allocator The json allocator. @@ -714,7 +770,7 @@ private: int find_size_method(ObIAllocator* allocator, ObSeekParentInfo &parent_info, const JsonPathIterator &cur_node, const JsonPathIterator &last_node, const ObJsonPathFuncNode *path_node, bool is_auto_wrap, bool only_need_one, - bool is_lax, ObJsonBaseSortedVector &dup, ObJsonBaseVector &res) const; + bool is_lax, ObJsonSortedResult &dup, ObJsonSeekResult &res) const; // item_function:length(). (allocator can not be null) // returning uint: the number of characters in the targeted JSON string, interpreted as a SQL NUMBER. // returng J_NULL(is_null_ = false) for other json_type() @@ -729,7 +785,7 @@ private: int find_length_method(ObIAllocator* allocator, ObSeekParentInfo &parent_info, const JsonPathIterator &cur_node, const JsonPathIterator &last_node, const ObJsonPathFuncNode *path_node, bool is_auto_wrap, bool only_need_one, - bool is_lax, ObJsonBaseSortedVector &dup, ObJsonBaseVector &res) const; + bool is_lax, ObJsonSortedResult &dup, ObJsonSeekResult &res) const; // item_function:boolean()/booleanOnly(). (allocator can not be null) // for booleanOnly() :return boolean for J_BOOL, returng J_NULL(is_null_ = false) for other json_type() // for boolean() :return boolean for J_BOOL, @@ -746,7 +802,7 @@ private: int find_boolean_method(ObIAllocator* allocator, ObSeekParentInfo &parent_info, const JsonPathIterator &cur_node, const JsonPathIterator &last_node, const ObJsonPathFuncNode *path_node, bool is_auto_wrap, bool only_need_one, - bool is_lax, ObJsonBaseSortedVector &dup, ObJsonBaseVector &res) const; + bool is_lax, ObJsonSortedResult &dup, ObJsonSeekResult &res) const; bool check_legal_ora_date(const ObString date) const; // item_function: date() // legal for J_String @@ -761,7 +817,7 @@ private: int find_date_method(ObIAllocator* allocator, ObSeekParentInfo &parent_info, const JsonPathIterator &cur_node, const JsonPathIterator &last_node, const ObJsonPathFuncNode *path_node, bool is_auto_wrap, bool only_need_one, - bool is_lax, ObJsonBaseSortedVector &dup, ObJsonBaseVector &res) const; + bool is_lax, ObJsonSortedResult &dup, ObJsonSeekResult &res) const; // item_function: timestamp() // legal for J_String // @param [in] allocator The json allocator. @@ -775,7 +831,7 @@ private: int find_timestamp_method(ObIAllocator* allocator, ObSeekParentInfo &parent_info, const JsonPathIterator &cur_node, const JsonPathIterator &last_node, const ObJsonPathFuncNode *path_node, bool is_auto_wrap, bool only_need_one, - bool is_lax, ObJsonBaseSortedVector &dup, ObJsonBaseVector &res) const; + bool is_lax, ObJsonSortedResult &dup, ObJsonSeekResult &res) const; // item_function: double() // legal for J_Number & J_String, but retun J_NULL when str_to_double failed. // @param [in] allocator The json allocator. @@ -789,7 +845,7 @@ private: int find_double_method(ObIAllocator* allocator, ObSeekParentInfo &parent_info, const JsonPathIterator &cur_node, const JsonPathIterator &last_node, const ObJsonPathFuncNode *path_node, bool is_auto_wrap, bool only_need_one, - bool is_lax, ObJsonBaseSortedVector &dup, ObJsonBaseVector &res) const; + bool is_lax, ObJsonSortedResult &dup, ObJsonSeekResult &res) const; // item_function: number() // legal for J_Number & J_String, but retun J_NULL when str_to_number failed. // @param [in] allocator The json allocator. @@ -803,7 +859,7 @@ private: int find_number_method(ObIAllocator* allocator, ObSeekParentInfo &parent_info, const JsonPathIterator &cur_node, const JsonPathIterator &last_node, const ObJsonPathFuncNode *path_node, bool is_auto_wrap, bool only_need_one, - bool is_lax, ObJsonBaseSortedVector &dup, ObJsonBaseVector &res) const; + bool is_lax, ObJsonSortedResult &dup, ObJsonSeekResult &res) const; // item_function: number() // legal for JSON Scalar. // @param [in] allocator The json allocator. @@ -817,7 +873,7 @@ private: int find_string_method(ObIAllocator* allocator, ObSeekParentInfo &parent_info, const JsonPathIterator &cur_node, const JsonPathIterator &last_node, const ObJsonPathFuncNode *path_node, bool is_auto_wrap, bool only_need_one, - bool is_lax, ObJsonBaseSortedVector &dup, ObJsonBaseVector &res) const; + bool is_lax, ObJsonSortedResult &dup, ObJsonSeekResult &res) const; // item_function: upper() & lower() // legal for JSON String. // @param [in] allocator The json allocator. @@ -831,7 +887,7 @@ private: int find_trans_method(ObIAllocator* allocator, ObSeekParentInfo &parent_info, const JsonPathIterator &cur_node, const JsonPathIterator &last_node, const ObJsonPathFuncNode *path_node, bool is_auto_wrap, bool only_need_one, - bool is_lax, ObJsonBaseSortedVector &dup, ObJsonBaseVector &res) const; + bool is_lax, ObJsonSortedResult &dup, ObJsonSeekResult &res) const; // Used to determine whether type conversion is required // is left and right the same josn type, divided into three categories: string, number and date // @param [in] allocator The json allocator. @@ -853,7 +909,7 @@ private: // @param [in] lax_mode Whether lax_mode or not. // @param [out] filter_result The result of compare. // @return Returns OB_SUCCESS on success, error code otherwise. - int cmp_to_right_recursively(ObIAllocator* allocator, const ObJsonBaseVector& hit, + int cmp_to_right_recursively(ObIAllocator* allocator, ObJsonSeekResult& hit, const ObJsonPathNodeType node_type, ObIJsonBase* right_arg, bool& filter_result) const; // Used to compare with multiple left_arg, when right_arg is the results found by the sub_path @@ -866,7 +922,7 @@ private: // @param [in] lax_mode Whether lax_mode or not. // @param [out] filter_result The result of compare. // @return Returns OB_SUCCESS on success, error code otherwise. - int cmp_to_left_recursively(ObIAllocator* allocator, const ObJsonBaseVector& hit, + int cmp_to_left_recursively(ObIAllocator* allocator, ObJsonSeekResult& hit, const ObJsonPathNodeType node_type, ObIJsonBase* right_arg, bool& filter_result) const; // Used to compare with the right_arg @@ -925,7 +981,7 @@ private: int find_func_child(ObIAllocator* allocator, ObSeekParentInfo &parent_info, const JsonPathIterator &cur_node, const JsonPathIterator &last_node, bool is_auto_wrap, bool only_need_one, bool is_lax, - ObJsonBaseSortedVector &dup, ObJsonBaseVector &res) const; + ObJsonSortedResult &dup, ObJsonSeekResult &res) const; // According to the path node, recursively query results dow. // // @param [in] cur_node The current path node. @@ -965,7 +1021,7 @@ private: int find_filter_child(ObIAllocator* allocator, ObSeekParentInfo &parent_info, const JsonPathIterator &cur_node, const JsonPathIterator &last_node, bool is_auto_wrap, bool only_need_one, bool is_lax, - ObJsonBaseSortedVector &dup, ObJsonBaseVector &res, + ObJsonSortedResult &dup, ObJsonSeekResult &res, PassingMap* sql_var) const; // According to the path node, recursively query results dow. // @@ -979,7 +1035,7 @@ private: int find_child(ObIAllocator* allocator, ObSeekParentInfo &parent_info, const JsonPathIterator &cur_node, const JsonPathIterator &last_node, bool is_auto_wrap, bool only_need_one, bool is_lax, - ObJsonBaseSortedVector &dup, ObJsonBaseVector &res, + ObJsonSortedResult &dup, ObJsonSeekResult &res, PassingMap* sql_var = NULL) const; private: ObIAllocator *allocator_; @@ -1187,8 +1243,8 @@ public: // // @param [in] int The int64_t. // @param [out] bit_len The bit length of int - // @return Returns OB_SUCCESS on success, error code otherwise. static int get_bit_len(uint64_t value, int32_t &bit_len); + static int32_t get_bit_len(uint64_t value); // ObString to uint64_t // @@ -1218,6 +1274,7 @@ public: // @param [in] value The element to be searched. // @return Return true if it exists, return false if it does not. static bool binary_search(ObSortedVector &vec, ObIJsonBase *value); + static int check_json_schema_ref_def(ObIAllocator& allocator, ObIJsonBase* json_doc); private: DISALLOW_COPY_AND_ASSIGN(ObJsonBaseUtil); }; diff --git a/deps/oblib/src/lib/json_type/ob_json_bin.cpp b/deps/oblib/src/lib/json_type/ob_json_bin.cpp index 0bcf1ed9f9..78160042d4 100644 --- a/deps/oblib/src/lib/json_type/ob_json_bin.cpp +++ b/deps/oblib/src/lib/json_type/ob_json_bin.cpp @@ -9,10 +9,11 @@ * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. * See the Mulan PubL v2 for more details. */ -#define USING_LOG_PREFIX SQL +#define USING_LOG_PREFIX LIB #include "common/object/ob_obj_type.h" #include "ob_json_bin.h" #include "ob_json_tree.h" +#include "ob_json_diff.h" namespace oceanbase { namespace common { @@ -170,44 +171,35 @@ int ObJsonBin::array_append(ObIJsonBase *value) int ObJsonBin::replace(const ObIJsonBase *old_node, ObIJsonBase *new_node) { INIT_SUCC(ret); - + int parent_idx = -1; + int is_equal = -1; if (OB_ISNULL(old_node) || OB_ISNULL(new_node)) { // check param ret = OB_INVALID_ARGUMENT; LOG_WARN("null param", K(ret), KP(old_node), KP(new_node)); } else if (old_node->is_tree() || new_node->is_tree()) { ret = OB_INVALID_ARGUMENT; LOG_WARN("input is tree, but need binary", K(ret), K(old_node), K(new_node)); - } else if (is_alloc_ == false) { - ret = OB_ERR_READ_ONLY; - LOG_WARN("json bin is read only.", K(ret), K(is_alloc_)); + } else if (old_node->json_type() == new_node->json_type() && OB_FAIL(old_node->compare(*new_node, is_equal))) { + LOG_WARN("compare fail", K(ret), KPC(old_node), KPC(new_node)); + } else if (0 == is_equal) { // if is equal, no need do update } else { + ObJBNodeMeta node_meta; ObJsonNodeType j_type = json_type(); ObJBVerType vertype = get_vertype(); + const ObJsonBin *old_bin = static_cast(old_node); if (j_type != ObJsonNodeType::J_ARRAY && j_type != ObJsonNodeType::J_OBJECT) { ret = OB_ERR_UNEXPECTED; LOG_WARN("error curr type. not support replace", K(ret), K(j_type)); - } else { - char *child_val_addr = NULL; - const ObJsonBin *old_bin = static_cast(old_node); - const char *expect_val_addr = old_bin->curr_.ptr() + old_bin->pos_; - for (int i = 0; OB_SUCC(ret) && i < element_count_; i++) { - if (j_type == ObJsonNodeType::J_ARRAY) { - if (OB_FAIL(get_element_in_array(i, &child_val_addr))) { - LOG_WARN("failed to get child in array.", K(ret), K(i)); - } - } else { // ObJsonNodeType::J_OBJECT - if (OB_FAIL(get_element_in_object(i, &child_val_addr))) { - LOG_WARN("failed to get child in object.", K(ret), K(i)); - } - } - if (OB_SUCC(ret) && child_val_addr == expect_val_addr) { - // found old child, do update - if (OB_FAIL(update(i, static_cast(new_node)))) { - LOG_WARN("replace with new value failed.", K(ret), K(i), K(old_node), K(new_node)); - } - break; - } - } + } else if (old_bin->node_stack_.size() <= 0) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("old child no parent", K(ret), K(meta_), KPC(old_bin)); + } else if (OB_FAIL(old_bin->node_stack_.back(node_meta))) { + LOG_WARN("old_bin get node meta fail", K(ret), KPC(old_bin)); + } else if (node_meta.offset_ != pos_) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("old_bin parent not match", K(ret), K(node_meta), K(pos_),KPC(old_bin)); + } else if (OB_FAIL(update(node_meta.idx_, static_cast(new_node)))) { + LOG_WARN("replace with new value failed.", K(ret), K(node_meta), KPC(old_bin), KPC(new_node)); } } @@ -242,17 +234,6 @@ int ObJsonBin::array_remove(uint64_t index) return ret; } -int ObJsonBin::get_raw_binary(common::ObString &out, ObIAllocator *allocator) const -{ - INIT_SUCC(ret); - - if (OB_FAIL(raw_binary(out, allocator))) { - LOG_WARN("fail to get json raw binary", K(ret)); - } - - return ret; -} - int ObJsonBin::get_key(uint64_t index, common::ObString &key_out) const { INIT_SUCC(ret); @@ -268,11 +249,11 @@ int ObJsonBin::get_key(uint64_t index, common::ObString &key_out) const return ret; } -int ObJsonBin::create_new_binary(ObIJsonBase *&value, ObJsonBin *&new_bin) const +int ObJsonBin::create_new_binary(ObIJsonBase *value, ObJsonBin *&new_bin) const { INIT_SUCC(ret); ObString sub; - + bool is_seek_only = get_seek_flag(); common::ObIAllocator *allocator = NULL; void *buf = NULL; if (value != NULL) { // use stack memory @@ -290,17 +271,110 @@ int ObJsonBin::create_new_binary(ObIJsonBase *&value, ObJsonBin *&new_bin) const } } - if (OB_SUCC(ret)) { - if (OB_FAIL(raw_binary_at_iter(sub))) { - LOG_WARN("fail to get sub json binary.", K(ret)); + if (OB_FAIL(ret)) { + } else if (OB_FALSE_IT(new_bin = new (buf) ObJsonBin(allocator))) { + } else if (!(json_type() == ObJsonNodeType::J_ARRAY || json_type() == ObJsonNodeType::J_OBJECT || ObJsonVerType::is_opaque_or_string(json_type()))) { + if (OB_FAIL(reset_child(*new_bin, meta_.type_, pos_, meta_.entry_size_))) { + LOG_WARN("reset child value fail", K(ret), K(meta_)); } else { - new_bin = new (buf) ObJsonBin(sub.ptr(), sub.length(), allocator); - if (OB_FAIL(new_bin->reset_iter())) { - LOG_WARN("fail to reset iter for new json bin", K(ret)); - } + new_bin->set_seek_flag(is_seek_only); + } + } else if (OB_FAIL(reset_child(*new_bin, pos_))) { + LOG_WARN("reset_child fail", K(ret), K(meta_)); + } else { + new_bin->set_seek_flag(is_seek_only); + } + + return ret; +} + +int ObJsonBin::clone_new_node(ObJsonBin*& res, common::ObIAllocator *allocator) const +{ + INIT_SUCC(ret); + void *buf = NULL; + bool is_seek_only = get_seek_flag(); + if (res != NULL) { // use stack memory + buf = res; + allocator = res->get_allocator(); + } else if (OB_ISNULL(allocator)) { // check allocator_ + ret = OB_ERR_NULL_VALUE; + LOG_WARN("json bin allocator is null", K(ret)); + } else { // use allocator_ + buf = allocator->alloc(sizeof(ObJsonBin)); + if (OB_ISNULL(buf)) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("alloc json bin fail", K(ret), K(sizeof(ObJsonBin))); } } + if (OB_FAIL(ret)) { + } else if (OB_FALSE_IT(res = new (buf) ObJsonBin(allocator))) { + } else if (!(json_type() == ObJsonNodeType::J_ARRAY || json_type() == ObJsonNodeType::J_OBJECT || ObJsonVerType::is_opaque_or_string(json_type()))) { + if (OB_FAIL(reset_child(*res, meta_.type_, pos_, meta_.entry_size_))) { + LOG_WARN("reset child value fail", K(ret), K(meta_)); + } + } else if (OB_FAIL(reset_child(*res, pos_))) { + LOG_WARN("reset_child fail", K(ret), K(meta_)); + } + if (OB_FAIL(ret)) { + } else if (! is_seek_only_ && OB_FAIL(res->node_stack_.copy(this->node_stack_))) { + LOG_WARN("copy node stack fail", K(ret)); + } else { + res->set_seek_flag(is_seek_only); + } + return ret; +} + +int ObJsonBin::serialize_number_to_json_decimal(number::ObNumber number, ObJsonBuffer &result) +{ + INIT_SUCC(ret); + ObPrecision prec = get_decimal_precision(); + ObScale scale = get_decimal_scale(); + int64_t ser_len = number.get_serialize_size() + serialization::encoded_length_i16(prec) + + serialization::encoded_length_i16(scale); + int64_t pos = result.length(); + if (OB_FAIL(result.reserve(ser_len))) { + LOG_WARN("failed to reserver serialize size for decimal json obj", K(ret), K(pos), K(ser_len)); + } else if (OB_FAIL(serialization::encode_i16(result.ptr(), result.capacity(), pos, prec))) { + LOG_WARN("failed to serialize for decimal precision", K(ret), K(pos), K(prec)); + } else if (OB_FAIL(result.set_length(pos))) { + LOG_WARN("failed to set length for decimal precision", K(ret), K(pos), K(prec)); + } else if (OB_FAIL(serialization::encode_i16(result.ptr(), result.capacity(), pos, scale))) { + LOG_WARN("failed to serialize for decimal precision", K(ret), K(pos), K(scale)); + } else if (OB_FAIL(result.set_length(pos))) { + LOG_WARN("failed to set length for decimal scale", K(ret), K(pos), K(scale)); + } else if (OB_FAIL(number.serialize(result.ptr(), result.capacity(), pos))) { + LOG_WARN("failed to serialize for decimal value", K(ret), K(pos)); + } else if (OB_FAIL(result.set_length(pos))){ + LOG_WARN("failed to update len for decimal json obj", K(ret), K(pos)); + } + return ret; +} + +int ObJsonBin::get_total_value(ObStringBuffer &result) const +{ + INIT_SUCC(ret); + ObJBVerType j_type = get_vertype(); + if (ObJsonVerType::is_scalar(j_type)) { + if (OB_FAIL(rebuild_json_value(result))) { + LOG_WARN("rebuild_json_value fail", K(ret), K(pos_), KPC(this)); + } + } else { + uint64_t area_size = 0; + uint64_t total_len = cursor_->get_length(); + ObString value; + if (OB_NOT_NULL(ctx_) && ctx_->extend_seg_offset_ != 0 && ctx_->extend_seg_offset_ != total_len) { + if (OB_FAIL(rebuild_json_value(result))) { + LOG_WARN("rebuild_json_value fail", K(ret), K(pos_), KPC(this)); + } + } else if (OB_FAIL(get_area_size(area_size))) { + LOG_WARN("get_area_size", K(ret), K(pos_), KPC(this)); + } else if (OB_FAIL(cursor_->get(pos_, area_size, value))) { + LOG_WARN("cursor get_data fail", K(ret), K(pos_), K(area_size), KPC(this)); + } else if (OB_FAIL(result.append(value.ptr(), value.length()))) { + LOG_WARN("failed to append null json obj", K(ret)); + } + } return ret; } @@ -313,6 +387,8 @@ int ObJsonBin::get_array_element(uint64_t index, ObIJsonBase *&value) const LOG_WARN("invalid json array operation", K(ret), K(index)); } else if (OB_FAIL(create_new_binary(value, new_bin))) { LOG_WARN("fail to create sub binary", K(ret), K(index)); + } else if (! is_seek_only_ && OB_FAIL(new_bin->node_stack_.copy(this->node_stack_))) { + LOG_WARN("copy node stack fail", K(ret)); } else if (OB_FAIL(new_bin->element(index))) { LOG_WARN("fail to access index node for new json bin.", K(ret), K(index)); } else { @@ -331,6 +407,8 @@ int ObJsonBin::get_object_value(uint64_t index, ObIJsonBase *&value) const LOG_WARN("invalid json object operation", K(ret), K(index)); } else if (OB_FAIL(create_new_binary(value, new_bin))) { LOG_WARN("fail to create sub binary", K(ret), K(index)); + } else if (! is_seek_only_ && OB_FAIL(new_bin->node_stack_.copy(this->node_stack_))) { + LOG_WARN("copy node stack fail", K(ret)); } else if (OB_FAIL(new_bin->element(index))) { LOG_WARN("fail to access index node for new json bin.", K(ret), K(index)); } else { @@ -340,6 +418,26 @@ int ObJsonBin::get_object_value(uint64_t index, ObIJsonBase *&value) const return ret; } +int ObJsonBin::get_object_value(uint64_t index, ObString &key, ObIJsonBase *&value) const +{ + INIT_SUCC(ret); + ObJsonBin *new_bin = NULL; + + if (OB_FAIL(check_valid_object_op(index))) { + LOG_WARN("invalid json object operation", K(ret), K(index)); + } else if (OB_FAIL(create_new_binary(value, new_bin))) { + LOG_WARN("fail to create sub binary", K(ret), K(index)); + } else if (OB_FAIL(new_bin->get_key_in_object(index, key))) { + LOG_WARN("fail to access index node for new json bin.", K(ret), K(index)); + } else if (OB_FAIL(new_bin->get_element_in_object(index))) { + LOG_WARN("fail to access index node for new json bin.", K(ret), K(index)); + } else { + value = new_bin; + } + + return ret; +} + int ObJsonBin::get_object_value(const ObString &key, ObIJsonBase *&value) const { INIT_SUCC(ret); @@ -350,6 +448,8 @@ int ObJsonBin::get_object_value(const ObString &key, ObIJsonBase *&value) const LOG_WARN("invalid json node type", K(ret)); } else if (OB_FAIL(create_new_binary(value, new_bin))) { LOG_WARN("fail to create sub binary", K(ret), K(key)); + } else if (! is_seek_only_ && OB_FAIL(new_bin->node_stack_.copy(this->node_stack_))) { + LOG_WARN("copy node stack fail", K(ret)); } else { ret = new_bin->lookup(key); if (OB_SUCC(ret)) { @@ -363,114 +463,71 @@ int ObJsonBin::get_object_value(const ObString &key, ObIJsonBase *&value) const return ret; } -int ObJsonBin::serialize_json_object(ObJsonObject *object, ObJsonBuffer &result, uint32_t depth) +int ObJsonBinSerializer::serialize_json_object(ObJsonObject *object, ObJsonBuffer &result, uint32_t depth) { - UNUSED(depth); INIT_SUCC(ret); - const int64_t st_pos = result.length(); - bool with_key_dict = false; // TODO get from common header - ObJsonNode *value = NULL; - uint64_t count = object->element_count(); - // object header [node_type:uint8_t][type:uint8_t][member_count:var][object_size_:var] - ObJsonBinObjHeader header; - header.is_continuous_ = 1; + uint64_t element_count = object->element_count(); + const int64_t start_pos = result.length(); + ObJsonBin obj_bin; + ObJsonBinMeta meta; uint64_t obj_size = object->get_serialize_size(); - header.entry_size_ = ObJsonVar::get_var_type(obj_size); - header.obj_size_size_ = header.entry_size_; - header.count_size_ = ObJsonVar::get_var_type(count); - header.type_ = static_cast(get_object_vertype()); + meta.set_type(ObJsonBin::get_object_vertype(), false); + meta.set_element_count(element_count); + meta.set_element_count_var_type(ObJsonVar::get_var_type(element_count)); + meta.set_obj_size(obj_size); + meta.set_obj_size_var_type(ObJsonVar::get_var_type(obj_size)); + meta.set_entry_var_type(meta.obj_size_var_type()); + meta.set_is_continuous(true); + meta.calc_entry_array(); - ret = result.append(reinterpret_cast(&header), OB_JSON_BIN_OBJ_HEADER_LEN); - if (OB_FAIL(ret)) { - LOG_WARN("failed to append array header node type", K(ret)); - } else if (OB_FAIL(ObJsonVar::append_var(count, header.count_size_, result))) { - LOG_WARN("failed to append array header member count", K(ret)); - } else if (OB_FAIL(ObJsonVar::append_var(obj_size, header.obj_size_size_, result))) { - LOG_WARN("failed to append array header array size", K(ret)); + if (OB_FAIL(meta.to_header(result))) { + LOG_WARN("to obj header fail", K(ret)); + } else if (OB_FAIL(obj_bin.reset(result.string(), start_pos, nullptr))) { + LOG_WARN("init bin with meta fail", K(ret), K(meta)); } - // [key_entry][val_entry][key][val] - // push key offset (check if with key dict) - uint64_t type_size = ObJsonVar::get_var_size(header.entry_size_); - uint64_t key_offset_size = type_size * 2 * count; - uint64_t value_offset_size = (type_size + sizeof(uint8_t)) * count; - if (OB_SUCC(ret)) { - if (with_key_dict) { - // todo fill key dict id - } else { - ObString key; - uint32_t key_offset = static_cast(result.length() - st_pos + key_offset_size + value_offset_size); - for (uint64_t i = 0; OB_SUCC(ret) && i < count; i++) { - if (OB_FAIL(object->get_key(i, key))) { - LOG_WARN("get key failed.", K(ret), K(i)); - } else if (OB_FAIL(ObJsonVar::append_var(key_offset, header.entry_size_, result))) { // push key offset to st_pos - LOG_WARN("append key failed.", K(ret), K(key_offset), K(i)); - } else { - uint32_t key_len = static_cast(key.length()); - if (OB_FAIL(ObJsonVar::append_var(key_len, header.entry_size_, result))) { //push key len - LOG_WARN("append key length failed.", K(ret), K(key_len), K(i)); - } else { - key_offset += key_len; // todo check overflow? - } - } - } + + ObString key; + for (int i = 0; OB_SUCC(ret) && i < element_count; i++) { + uint64_t key_offset = result.length() - start_pos; + uint64_t key_len = 0; + if (OB_FAIL(object->get_key(i, key))) { + LOG_WARN("get key failed.", K(ret), K(i)); + } else if (OB_FALSE_IT(key_len = key.length())) { + } else if (OB_FAIL(obj_bin.set_key_entry(i, key_offset, key_len, false))) { + LOG_WARN("set_key_entry fail", K(ret), K(key)); + } else if (OB_FAIL(result.append(key))) { + LOG_WARN("append key fail", K(ret), K(key)); + // result may realloc, so need ensure point same memory + } else if (OB_FALSE_IT(obj_bin.set_current(result.string(), start_pos))) { } } - // extend and fill value entry - int64_t value_entry_offset = result.length(); - if (OB_SUCC(ret)) { - ret = result.reserve(value_offset_size); - if (OB_FAIL(ret)) { - LOG_WARN("failed to extend result", K(ret)); - } else { - ret = result.set_length(result.length() + value_offset_size); - if (OB_FAIL(ret)) { - LOG_WARN("failed to set length result", K(ret)); - } - } - } - // keys (when without keydict and has_key) - if (!with_key_dict) { - ObString key; - for (uint64_t i = 0; OB_SUCC(ret) && i < count; i++) { - if (OB_FAIL(object->get_key(i, key))) { - LOG_WARN("get key failed.", K(ret), K(i)); - } else if (OB_FAIL(result.append(key.ptr(), key.length()))) { - LOG_WARN("failed to append key to result", K(ret), K(key)); - } - } - } - - // values - for (uint64_t i = 0; OB_SUCC(ret) && i < count; i++) { + for (int i = 0; OB_SUCC(ret) && i < element_count; i++) { + ObJsonNode *value = nullptr; + uint64_t value_offset = result.length() - start_pos; + uint8_t value_type = 0; + bool is_update_inline = false; if (OB_ISNULL(value = object->get_value(i))) { ret = OB_ERR_NULL_VALUE; LOG_WARN("value is null", K(ret), K(i)); - } else { - uint32_t value_offset = result.length() - st_pos; - // recursion(parse value entry into func, simple type can store on value entry) - if (!try_update_inline(value, header.entry_size_, &value_entry_offset, result)) { - ret = serialize_json_value(value, result); - if (OB_FAIL(ret)) { - LOG_WARN("failed to append key to result", K(ret)); - } else { - // fill value entry - if (OB_SUCC(ObJsonVar::set_var(value_offset, header.entry_size_, result.ptr() + value_entry_offset))) { - value_entry_offset += type_size; - // fill value type - uint8_t *value_type_ptr = reinterpret_cast(result.ptr() + value_entry_offset); - *value_type_ptr = static_cast(value->json_type()); - value_entry_offset += sizeof(uint8_t); - } - } - } + } else if (OB_FAIL(obj_bin.try_update_inline(i, value, is_update_inline))) { + LOG_WARN("try_update_inline fail", K(ret), K(i)); + } else if (is_update_inline) { + LOG_DEBUG("try_update_inline success", K(i)); + } else if (OB_FALSE_IT(value_type = ObJsonVerType::get_json_vertype(value->json_type()))) { + } else if (OB_FAIL(obj_bin.set_value_entry(i, value_offset, value_type, false))) { + LOG_WARN("set_value_entry fail", K(ret), K(value_offset), K(value_type)); + } else if (OB_FAIL(serialize_json_value(value, result))) { + LOG_WARN("serialize_json_value fail", K(ret)); + // result may realloc, so need ensure point same memory + } else if (OB_FALSE_IT(obj_bin.set_current(result.string(), start_pos))) { } } // fill header obj size if (OB_SUCC(ret)) { - uint64_t real_obj_size = static_cast(result.length() - st_pos); + uint64_t real_obj_size = static_cast(result.length() - start_pos); if (ObJsonVar::get_var_type(real_obj_size) > ObJsonVar::get_var_type(obj_size)) { if (depth >= OB_JSON_BIN_MAX_SERIALIZE_TIME) { ret = OB_ERR_UNEXPECTED; @@ -478,101 +535,82 @@ int ObJsonBin::serialize_json_object(ObJsonObject *object, ObJsonBuffer &result, } else { int64_t delta_size = real_obj_size - obj_size; object->set_serialize_delta_size(delta_size); - result.set_length(st_pos); + result.set_length(start_pos); ret = serialize_json_object(object, result, depth + 1); } - } else { - ObJsonBinObjHeader *header = reinterpret_cast(result.ptr() + st_pos); - real_obj_size = static_cast(result.length() - st_pos); - ObJsonVar::set_var(real_obj_size, header->obj_size_size_, header->used_size_ + ObJsonVar::get_var_size(header->count_size_)); + } else if (OB_FAIL(obj_bin.set_obj_size(real_obj_size))) { + LOG_WARN("set_obj_size fail", K(ret)); } } - return ret; } -int ObJsonBin::serialize_json_array(ObJsonArray *array, ObJsonBuffer &result, uint32_t depth) +int ObJsonBinSerializer::serialize_json_array(ObJsonArray *array, ObJsonBuffer &result, uint32_t depth) { - UNUSED(depth); INIT_SUCC(ret); - const int64_t st_pos = result.length(); - uint64_t count = array->element_count(); - // object header [node_type:uint8_t][type:uint8_t][member_count:var][object_size_:var] - ObJsonBinArrHeader header; - header.is_continuous_ = 1; + uint64_t element_count = array->element_count(); + const int64_t start_pos = result.length(); + ObJsonBin array_bin; + ObJsonBinMeta meta; uint64_t array_size = array->get_serialize_size(); - header.entry_size_ = ObJsonVar::get_var_type(array_size); - header.obj_size_size_ = header.entry_size_; - header.count_size_ = ObJsonVar::get_var_type(count); - header.type_ = static_cast(get_array_vertype()); + meta.set_type(ObJsonBin::get_array_vertype(), false); + meta.set_element_count(element_count); + meta.set_element_count_var_type(ObJsonVar::get_var_type(element_count)); + meta.set_obj_size(array_size); + meta.set_obj_size_var_type(ObJsonVar::get_var_type(array_size)); + meta.set_entry_var_type(meta.obj_size_var_type()); + meta.set_is_continuous(true); + meta.calc_entry_array(); - - if (OB_FAIL(result.append(reinterpret_cast(&header), OB_JSON_BIN_ARR_HEADER_LEN))) { - LOG_WARN("failed to append array header node type", K(ret)); - } else if (OB_FAIL(ObJsonVar::append_var(count, header.count_size_, result))) { - LOG_WARN("failed to append array header member count", K(ret)); - } else if (OB_FAIL(ObJsonVar::append_var(array_size, header.obj_size_size_, result))) { - LOG_WARN("failed to append array header array size", K(ret)); + if (OB_FAIL(meta.to_header(result))) { + LOG_WARN("to obj header fail", K(ret)); + } else if (OB_FAIL(array_bin.reset(result.string(), start_pos, nullptr))) { + LOG_WARN("init bin with meta fail", K(ret), K(meta)); } - uint64_t type_size = ObJsonVar::get_var_size(header.entry_size_); - int64_t value_offset_size = (type_size + sizeof(uint8_t)) * count; - int64_t value_entry_offset = result.length(); - if (OB_SUCC(ret)) { - if (OB_FAIL(result.reserve(value_offset_size))) { - LOG_WARN("failed to extend result", K(ret), K(value_offset_size)); - } else if (OB_FAIL(result.set_length(result.length() + value_offset_size))) { - LOG_WARN("failed to set length result", K(ret)); + for (int i = 0; OB_SUCC(ret) && i < element_count; i++) { + ObJsonNode *value = nullptr; + uint64_t value_offset = result.length() - start_pos; + uint8_t value_type = 0; + bool is_update_inline = false; + if (OB_ISNULL(value = (*array)[i])) { + ret = OB_ERR_NULL_VALUE; + LOG_WARN("value is null", K(ret), K(i)); + } else if (OB_FAIL(array_bin.try_update_inline(i, value, is_update_inline))) { + LOG_WARN("try_update_inline fail", K(ret), K(i)); + } else if (is_update_inline) { + LOG_DEBUG("try_update_inline success", K(i)); + } else if (OB_FALSE_IT(value_type = ObJsonVerType::get_json_vertype(value->json_type()))) { + } else if (OB_FAIL(array_bin.set_value_entry(i, value_offset, value_type, false))) { + LOG_WARN("set_value_entry fail", K(ret), K(value_offset), K(value_type)); + } else if (OB_FAIL(serialize_json_value(value, result))) { + LOG_WARN("serialize_json_value fail", K(ret)); + // result may realloc, so need ensure point same memory + } else if (OB_FALSE_IT(array_bin.set_current(result.string(), start_pos))) { } } - for (uint64_t i = 0; OB_SUCC(ret) && i < count; i++) { - ObJsonNode *value = (*array)[i]; - uint32_t value_offset = result.length() - st_pos; - // recursion(parse value entry into func, simple type can store on value entry) - if (!try_update_inline(value, header.entry_size_, &value_entry_offset, result)) { - ret = serialize_json_value(value, result); - if (OB_FAIL(ret)) { - LOG_WARN("failed to append key to result", K(ret)); - } else { - // fill value entry - if (OB_SUCC(ObJsonVar::set_var(value_offset, header.entry_size_, result.ptr() + value_entry_offset))) { - value_entry_offset += type_size; - // fill value type - uint8_t *value_type_ptr = reinterpret_cast(result.ptr() + value_entry_offset); - *value_type_ptr = static_cast(value->json_type()); - value_entry_offset += sizeof(uint8_t); - } - } - } - } - - // fill header obj size + // fill header array size if (OB_SUCC(ret)) { - ObJsonBinObjHeader *header = reinterpret_cast(result.ptr() + st_pos); - uint64_t real_array_size = static_cast(result.length() - st_pos); + uint64_t real_array_size = static_cast(result.length() - start_pos); if (ObJsonVar::get_var_type(real_array_size) > ObJsonVar::get_var_type(array_size)) { if (depth >= OB_JSON_BIN_MAX_SERIALIZE_TIME) { ret = OB_ERR_UNEXPECTED; - LOG_WARN("failed to calc array size", K(ret)); + LOG_WARN("failed to calc object size", K(ret), K(real_array_size), K(array_size)); } else { int64_t delta_size = real_array_size - array_size; array->set_serialize_delta_size(delta_size); - result.set_length(st_pos); + result.set_length(start_pos); ret = serialize_json_array(array, result, depth + 1); } - } else { - uint64_t count_var_size = ObJsonVar::get_var_size(header->count_size_); - if (OB_FAIL(ObJsonVar::set_var(real_array_size, header->obj_size_size_, header->used_size_ + count_var_size))) { - LOG_WARN("failed to set array size.", K(ret), K(array_size)); - } + } else if (OB_FAIL(array_bin.set_obj_size(real_array_size))) { + LOG_WARN("set_obj_size fail", K(ret)); } } - return ret; } -int ObJsonBin::serialize_json_integer(int64_t value, ObJsonBuffer &result) const +int ObJsonBinSerializer::serialize_json_integer(int64_t value, ObJsonBuffer &result) { INIT_SUCC(ret); int64_t ser_len = serialization::encoded_length_vi64(value); @@ -589,7 +627,7 @@ int ObJsonBin::serialize_json_integer(int64_t value, ObJsonBuffer &result) const // [precision] [scale] [value] // [int16_t] [int16_t] [val_len] -int ObJsonBin::serialize_json_decimal(ObJsonDecimal *json_dec, ObJsonBuffer &result) const +int ObJsonBinSerializer::serialize_json_decimal(ObJsonDecimal *json_dec, ObJsonBuffer &result) { INIT_SUCC(ret); ObPrecision prec = json_dec->get_precision(); @@ -614,7 +652,7 @@ int ObJsonBin::serialize_json_decimal(ObJsonDecimal *json_dec, ObJsonBuffer &res return ret; } -int ObJsonBin::serialize_json_value(ObJsonNode *json_tree, ObJsonBuffer &result) +int ObJsonBinSerializer::serialize_json_value(ObJsonNode *json_tree, ObJsonBuffer &result) { INIT_SUCC(ret); switch (json_tree->json_type()) { @@ -636,7 +674,7 @@ int ObJsonBin::serialize_json_value(ObJsonNode *json_tree, ObJsonBuffer &result) case ObJsonNodeType::J_OINT: { const ObJsonInt *i = static_cast(json_tree); int64_t value = i->value(); - if (OB_FAIL(serialize_json_integer(value, result))) { + if (OB_FAIL(ObJsonBinSerializer::serialize_json_integer(value, result))) { LOG_WARN("failed to serialize json int", K(ret), K(value)); } break; @@ -645,7 +683,7 @@ int ObJsonBin::serialize_json_value(ObJsonNode *json_tree, ObJsonBuffer &result) case ObJsonNodeType::J_OLONG: { const ObJsonUint *i = static_cast(json_tree); uint64_t value = i->value(); - if (OB_FAIL(serialize_json_integer(value, result))) { + if (OB_FAIL(ObJsonBinSerializer::serialize_json_integer(value, result))) { LOG_WARN("failed to serialize json uint", K(ret), K(value)); } break; @@ -684,7 +722,7 @@ int ObJsonBin::serialize_json_value(ObJsonNode *json_tree, ObJsonBuffer &result) int64_t ser_len = serialization::encoded_length_vi64(sub_obj->length()); int64_t pos = result.length() + sizeof(uint8_t); ObJBVerType vertype = ObJsonVerType::get_json_vertype(json_tree->json_type()); - if (OB_FAIL(result_.append(reinterpret_cast(&vertype), sizeof(uint8_t)))) { + if (OB_FAIL(result.append(reinterpret_cast(&vertype), sizeof(uint8_t)))) { LOG_WARN("failed to serialize type for str json obj", K(ret), K(ser_len)); } else if (OB_FAIL(result.reserve(ser_len))) { LOG_WARN("failed to reserver serialize size for str json obj", K(ret), K(ser_len)); @@ -772,8 +810,8 @@ int ObJsonBin::serialize_json_value(ObJsonNode *json_tree, ObJsonBuffer &result) const ObJsonOpaque *sub_obj = static_cast(json_tree); uint64_t obj_size = sub_obj->size(); uint16_t field_type = static_cast(sub_obj->field_type()); - ObJBVerType vertype = get_opaque_vertype(); - if (OB_FAIL(result_.append(reinterpret_cast(&vertype), sizeof(uint8_t)))) { + ObJBVerType vertype = ObJsonBin::get_opaque_vertype(); + if (OB_FAIL(result.append(reinterpret_cast(&vertype), sizeof(uint8_t)))) { LOG_WARN("failed to serialize type for str json obj", K(ret), K(vertype)); } else if (OB_FAIL(result.append(reinterpret_cast(&field_type), sizeof(uint16_t)))) { LOG_WARN("failed to append opaque json obj type", K(ret)); @@ -794,14 +832,15 @@ int ObJsonBin::serialize_json_value(ObJsonNode *json_tree, ObJsonBuffer &result) return ret; } -bool ObJsonBin::try_update_inline(const ObJsonNode *value, - uint8_t var_type, - int64_t *value_entry_offset, - ObJsonBuffer &result) +int ObJsonBin::try_update_inline( + const int index, + const ObJsonNode *value, + bool &is_update_inline) { - bool is_update_inline = false; - uint64_t inlined_val; - uint8_t inlined_type = static_cast(value->json_type()); + INIT_SUCC(ret); + uint8_t var_type = entry_var_type(); + uint64_t inlined_val = 0; + uint8_t inlined_type = 0; switch (value->json_type()) { case ObJsonNodeType::J_NULL: { inlined_val = 0; @@ -842,34 +881,22 @@ bool ObJsonBin::try_update_inline(const ObJsonNode *value, } // set inline - if (is_update_inline) { - uint64_t type_size = ObJsonVar::get_var_size(var_type); - // if inlined set first high bit to 1 - inlined_type |= OB_JSON_TYPE_INLINE_MASK; - INIT_SUCC(ret); - if (OB_FAIL(ObJsonVar::set_var(inlined_val, var_type, result.ptr() + *value_entry_offset))) { - is_update_inline = false; - LOG_WARN("fail to set inlined val.", K(ret), K(inlined_val), K(var_type)); - } else { - *value_entry_offset += type_size; - // fill value type - uint8_t *value_type_ptr = reinterpret_cast(result.ptr() + *value_entry_offset); - *value_type_ptr = inlined_type; - *value_entry_offset += sizeof(uint8_t); - } + if (! is_update_inline) { + } else if (OB_FAIL(set_value_entry(index, inlined_val, inlined_type | OB_JSON_TYPE_INLINE_MASK))) { + LOG_WARN("set_value_entry for inline fail", K(ret), K(inlined_type), K(inlined_val), K(var_type)); } - return is_update_inline; + return ret; } - -bool ObJsonBin::try_update_inline(const ObJsonBin *value, - uint8_t var_type, - int64_t *value_entry_offset, - ObJsonBuffer &result) +int ObJsonBin::try_update_inline( + const int index, + const ObJsonBin *value, + bool &is_update_inline) { - bool is_update_inline = false; + INIT_SUCC(ret); + uint8_t var_type = entry_var_type(); ObJsonNodeType j_type = value->json_type(); uint64_t inlined_val; - uint8_t inlined_type = static_cast(j_type); + uint8_t inlined_type = 0; switch (j_type) { case ObJsonNodeType::J_NULL: { inlined_val = 0; @@ -902,59 +929,60 @@ bool ObJsonBin::try_update_inline(const ObJsonBin *value, break; } default: { - LOG_INFO("unsupport inline type.", K(j_type)); break; } } // set inline - if (is_update_inline) { - uint64_t type_size = ObJsonVar::get_var_size(var_type); - // if inlined set first high bit to 1 - inlined_type |= OB_JSON_TYPE_INLINE_MASK; - INIT_SUCC(ret); - if (OB_FAIL(ObJsonVar::set_var(inlined_val, var_type, result.ptr() + *value_entry_offset))) { - is_update_inline = false; - LOG_WARN("fail to set inlined val.", K(ret), K(inlined_val), K(var_type)); - } else { - *value_entry_offset += type_size; - // fill value type - uint8_t *value_type_ptr = reinterpret_cast(result.ptr() + *value_entry_offset); - *value_type_ptr = inlined_type; - *value_entry_offset += sizeof(uint8_t); + if (! is_update_inline) { + } else if (OB_FAIL(set_value_entry(index, inlined_val, inlined_type | OB_JSON_TYPE_INLINE_MASK))) { + LOG_WARN("set_value_entry for inline fail", K(ret), K(inlined_val), K(var_type)); + } + return ret; +} + +int ObJsonBinSerializer::serialize(ObJsonNode *json_tree, ObString &data) +{ + INIT_SUCC(ret); + ObJsonBuffer result(allocator_); + ObJsonNodeType root_type = json_tree->json_type(); + if (root_type == ObJsonNodeType::J_ARRAY || root_type == ObJsonNodeType::J_OBJECT) { + if (OB_FAIL(ObJsonBin::add_doc_header_v0(result))) { + LOG_WARN("add_doc_header_v0 fail", K(ret)); + } else if (OB_FAIL(serialize_json_value(json_tree, result))) { + LOG_WARN("serialize json tree fail", K(ret), K(root_type)); + } else if (OB_FAIL(ObJsonBin::set_doc_header_v0(result, result.length()))) { + LOG_WARN("set_doc_header_v0 fail", K(ret)); + } + } else { + ObJBVerType ver_type = ObJsonVerType::get_json_vertype(root_type); + if (!ObJsonVerType::is_opaque_or_string(ver_type) && + OB_FAIL(result.append(reinterpret_cast(&ver_type), sizeof(uint8_t)))) { + LOG_WARN("failed to serialize json tree at append used size", K(ret), K(result.length())); + } else if (OB_FAIL(serialize_json_value(json_tree, result))) { // do recursion + LOG_WARN("failed to serialize json tree at recursion", K(ret)); } } - return is_update_inline; + + if (OB_FAIL(ret)) { + } else { + result.get_result_string(data); + } + return ret; } int ObJsonBin::parse_tree(ObJsonNode *json_tree) { INIT_SUCC(ret); - result_.reuse(); - ObJsonNodeType root_type = json_tree->json_type(); - if (OB_FAIL(result_.reserve(json_tree->get_serialize_size()))) { - LOG_WARN("failed to reserve bin buffer", K(ret), K(json_tree->get_serialize_size())); - } else if (root_type == ObJsonNodeType::J_ARRAY || root_type == ObJsonNodeType::J_OBJECT) { - if (OB_FAIL(serialize_json_value(json_tree, result_))) { // do recursion - LOG_WARN("failed to serialize json tree at recursion", K(ret)); - result_.reset(); - } - } else { - ObJBVerType ver_type = ObJsonVerType::get_json_vertype(root_type); - if (!ObJsonVerType::is_opaque_or_string(ver_type) && - OB_FAIL(result_.append(reinterpret_cast(&ver_type), sizeof(uint8_t)))) { - LOG_WARN("failed to serialize json tree at append used size", K(ret), K(result_.length())); - result_.reset(); - } else if (OB_FAIL(serialize_json_value(json_tree, result_))) { // do recursion - LOG_WARN("failed to serialize json tree at recursion", K(ret)); - result_.reset(); - } - } - - if (OB_SUCC(ret)) { - curr_.assign_ptr(result_.ptr(), result_.length()); - is_alloc_ = true; - reset_iter(); + ObJsonBinSerializer serializer(allocator_); + ObString data; + if (nullptr != ctx_ && nullptr != ctx_->update_ctx_) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("ctx not null, should not parse from tree", K(ret), KPC(ctx_)); + } else if (OB_FAIL(serializer.serialize(json_tree, data))) { + LOG_WARN("serialize fail", K(ret)); + } else if (OB_FAIL(reset(data, 0, nullptr))) { + LOG_WARN("set_current fail", K(ret)); } return ret; } @@ -966,38 +994,17 @@ int ObJsonBin::to_tree(ObJsonNode *&json_tree) if (OB_ISNULL(allocator_)) { ret = OB_ERR_NULL_VALUE; LOG_WARN("fail to deserialize with NULL alloctor.", K(ret)); - } else { - char *ptr = curr_.ptr(); - uint64_t offset = pos_; - uint8_t type = type_; - ObJsonNodeType curr_type = json_type(); - if (curr_type == ObJsonNodeType::J_OBJECT || - curr_type == ObJsonNodeType::J_ARRAY || - ObJsonVerType::is_opaque_or_string(static_cast(type_))) { - type = static_cast(curr_type); - } else { - // inline value store all store in union - offset = OB_JSON_TYPE_IS_INLINE(type) ? uint_val_ : offset; - type = ObJsonVar::get_var_type(offset); - } - if (OB_FAIL(deserialize_json_value(ptr + offset, curr_.length() - offset, type_, offset, json_tree, type))) { - LOG_WARN("deserialize failed", K(ret), K(offset), K(type)); - } + } else if (OB_FAIL(deserialize_json_value(json_tree))) { + LOG_WARN("deserialize failed", K(ret), K(pos_), K(get_type())); } - return ret; } -int ObJsonBin:: deserialize_json_value(const char *data, - uint64_t length, - uint8_t type, - uint64_t value_offset, - ObJsonNode *&json_tree, - uint64_t type_size) +int ObJsonBin::deserialize_json_value(ObJsonNode *&json_tree) { INIT_SUCC(ret); - bool is_inlined = OB_JSON_TYPE_IS_INLINE(type); - ObJBVerType node_vertype = static_cast(OB_JSON_TYPE_GET_INLINE(type)); + bool is_inlined = OB_JSON_TYPE_IS_INLINE(get_type()); + ObJBVerType node_vertype = static_cast(OB_JSON_TYPE_GET_INLINE(get_type())); ObJsonNodeType node_type = ObJsonVerType::get_json_type(node_vertype); switch (node_type) { case ObJsonNodeType::J_NULL: { @@ -1016,13 +1023,13 @@ int ObJsonBin:: deserialize_json_value(const char *data, ObPrecision prec = -1; ObScale scale = -1; number::ObNumber num; - int64_t pos = 0; - if (OB_FAIL(serialization::decode_i16(data, length, pos, &prec))) { - LOG_WARN("fail to deserialize decimal precision.", K(ret), K(length)); - } else if (OB_FAIL(serialization::decode_i16(data, length, pos, &scale))) { - LOG_WARN("fail to deserialize decimal scale.", K(ret), K(length), K(prec)); - } else if (OB_FAIL(num.deserialize(data, length, pos))) { - LOG_WARN("fail to deserialize number.", K(ret), K(length)); + int64_t pos = pos_; + if (OB_FAIL(cursor_->decode_i16(pos, &prec))) { + LOG_WARN("fail to deserialize decimal precision.", K(ret), K(pos)); + } else if (OB_FAIL(cursor_->decode_i16(pos, &scale))) { + LOG_WARN("fail to deserialize decimal scale.", K(ret), K(pos), K(prec)); + } else if (OB_FAIL(cursor_->deserialize(pos, &num))) { + LOG_WARN("fail to deserialize number.", K(ret), K(pos)); } else { void *buf = allocator_->alloc(sizeof(ObJsonDecimal)); if (buf == NULL) { @@ -1046,13 +1053,13 @@ int ObJsonBin:: deserialize_json_value(const char *data, LOG_WARN("fail to alloc memory for int json node", K(ret)); } else { if (is_inlined) { - ObJsonInt *node = new(buf)ObJsonInt(ObJsonVar::var_uint2int(value_offset, type_size)); + ObJsonInt *node = new(buf)ObJsonInt(ObJsonVar::var_uint2int(inline_value_, meta_.entry_size_)); json_tree = static_cast(node); } else { int64_t val = 0; - int64_t pos = 0; - if (OB_FAIL(serialization::decode_vi64(data, length, pos, &val))) { - LOG_WARN("fail to decode int val.", K(ret), K(length)); + int64_t pos = pos_; + if (OB_FAIL(cursor_->decode_vi64(pos, &val))) { + LOG_WARN("fail to decode int val.", K(ret), K(pos)); allocator_->free(buf); } else { if (node_type == ObJsonNodeType::J_INT) { @@ -1073,13 +1080,13 @@ int ObJsonBin:: deserialize_json_value(const char *data, LOG_WARN("fail to alloc memory for uint json node", K(ret)); } else { if (is_inlined) { - ObJsonUint *node = new(buf)ObJsonUint(value_offset); + ObJsonUint *node = new(buf)ObJsonUint(uint_val_); json_tree = static_cast(node); } else { int64_t val = 0; - int64_t pos = 0; - if (OB_FAIL(serialization::decode_vi64(data, length, pos, &val))) { - LOG_WARN("fail to decode uint val.", K(ret), K(length)); + int64_t pos = pos_; + if (OB_FAIL(cursor_->decode_vi64(pos, &val))) { + LOG_WARN("fail to decode uint val.", K(ret), K(pos)); allocator_->free(buf); } else { uint64_t uval = static_cast(val); @@ -1095,16 +1102,15 @@ int ObJsonBin:: deserialize_json_value(const char *data, } case ObJsonNodeType::J_DOUBLE: case ObJsonNodeType::J_ODOUBLE: { + double val = 0; void *buf = allocator_->alloc(sizeof(ObJsonDouble)); if (buf == NULL) { ret = OB_ALLOCATE_MEMORY_FAILED; LOG_WARN("fail to alloc memory for double json node", K(ret)); - } else if (length < sizeof(double)) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("data length is not enough.", K(ret), K(length), K(sizeof(double))); + } else if (OB_FAIL(cursor_->read_double(pos_, &val))) { + LOG_WARN("read_double fail", K(ret), K(pos_), K(sizeof(double))); allocator_->free(buf); } else { - double val = *reinterpret_cast(data); if (node_type == ObJsonNodeType::J_DOUBLE) { json_tree = static_cast(new(buf)ObJsonDouble(val)); } else { @@ -1115,16 +1121,15 @@ int ObJsonBin:: deserialize_json_value(const char *data, } case ObJsonNodeType::J_OFLOAT: { + float val = 0; void *buf = allocator_->alloc(sizeof(ObJsonOFloat)); if (buf == NULL) { ret = OB_ALLOCATE_MEMORY_FAILED; LOG_WARN("fail to alloc memory for float json node", K(ret)); - } else if (length < sizeof(float)) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("data length is not enough.", K(ret), K(length)); + } else if (OB_FAIL(cursor_->read_float(pos_, &val))) { + LOG_WARN("read_float fail", K(ret), K(pos_), K(sizeof(float))); allocator_->free(buf); } else { - float val = *reinterpret_cast(data); ObJsonOFloat *node = new(buf) ObJsonOFloat(val); json_tree = static_cast(node); } @@ -1137,29 +1142,27 @@ int ObJsonBin:: deserialize_json_value(const char *data, LOG_WARN("fail to alloc memory for str json node", K(ret)); } else { int64_t val = 0; - int64_t pos = 0; - - ObJBVerType vertype = *reinterpret_cast(data); - if (vertype == ObJBVerType::J_STRING_V0) { + int64_t pos = pos_; + ObString str_data; + if (node_vertype == ObJBVerType::J_STRING_V0) { pos += sizeof(uint8_t); - if (OB_FAIL(serialization::decode_vi64(data, length, pos, &val))) { - LOG_WARN("decode str length failed.", K(ret)); - } else if (length < pos + val) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("data length is not enough.", K(ret), K(length), K(pos), K(val)); + if (OB_FAIL(cursor_->decode_vi64(pos, &val))) { + LOG_WARN("decode str length failed.", K(ret), K(pos)); } else { uint64_t str_length = static_cast(val); if (str_length == 0) { - LOG_DEBUG("empty string in json binary", K(str_length), K(pos), K(length)); + LOG_DEBUG("empty string in json binary", K(str_length), K(pos)); ObJsonString *empty_str_node = new(buf)ObJsonString(NULL, 0); json_tree = static_cast(empty_str_node); + } else if (OB_FAIL(cursor_->get(pos, str_length, str_data))) { + LOG_WARN("get str data fail", K(ret), K(pos), K(str_length)); } else { void *str_buf = allocator_->alloc(str_length); if (str_buf == NULL) { ret = OB_ALLOCATE_MEMORY_FAILED; LOG_WARN("fail to alloc memory for data buf", K(ret), K(str_length)); } else { - MEMCPY(str_buf, data + pos, str_length); + MEMCPY(str_buf, str_data.ptr(), str_data.length()); ObJsonString *node = new(buf)ObJsonString(reinterpret_cast(str_buf), str_length); json_tree = static_cast(node); } @@ -1168,7 +1171,7 @@ int ObJsonBin:: deserialize_json_value(const char *data, } else { // other version process ret = OB_NOT_SUPPORTED; - LOG_WARN("invalid ver type.", K(ret), K(vertype)); + LOG_WARN("invalid ver type.", K(ret), K(node_vertype)); } } break; @@ -1180,7 +1183,7 @@ int ObJsonBin:: deserialize_json_value(const char *data, LOG_WARN("fail to alloc memory for obj json node", K(ret)); } else { ObJsonObject *node = new(buf)ObJsonObject(allocator_); - ret = deserialize_json_object(data, length, node, node_vertype); + ret = deserialize_json_object(node); if (OB_SUCC(ret)) { json_tree = static_cast(node); } else { @@ -1197,7 +1200,7 @@ int ObJsonBin:: deserialize_json_value(const char *data, LOG_WARN("fail to alloc memory for array json node", K(ret)); } else { ObJsonArray *node = new(buf)ObJsonArray(allocator_); - ret = deserialize_json_array(data, length, node, node_vertype); + ret = deserialize_json_array(node); if (OB_SUCC(ret)) { json_tree = static_cast(node); } else { @@ -1208,28 +1211,34 @@ int ObJsonBin:: deserialize_json_value(const char *data, break; } case ObJsonNodeType::J_BOOLEAN: { + bool val = false; void *buf = allocator_->alloc(sizeof(ObJsonBoolean)); if (buf == NULL) { ret = OB_ALLOCATE_MEMORY_FAILED; LOG_WARN("fail to alloc memory for boolean json node", K(ret)); } else { - bool val = is_inlined ? static_cast(value_offset) : static_cast(*data); - ObJsonBoolean *node = new(buf)ObJsonBoolean(val); - json_tree = static_cast(node); + if (is_inlined) { + val = static_cast(inline_value_); + } else if (OB_FAIL(cursor_->read_bool(pos_, &val))) { + LOG_WARN("read_bool fail", K(ret), K(pos_)); + } + if (OB_SUCC(ret)) { + ObJsonBoolean *node = new (buf) ObJsonBoolean(val); + json_tree = static_cast(node); + } } break; } case ObJsonNodeType::J_DATE: { + int32_t value = 0; void *buf = allocator_->alloc(sizeof(ObJsonDatetime)); if (buf == NULL) { ret = OB_ALLOCATE_MEMORY_FAILED; LOG_WARN("fail to alloc memory for date json node", K(ret)); - } else if (length < sizeof(int32_t)) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("data length is not enough.", K(ret), K(length), K(sizeof(int32_t))); + } else if (OB_FAIL(cursor_->read_i32(pos_, &value))) { + LOG_WARN("read_i32 fail", K(ret), K(pos_), K(sizeof(int32_t))); } else { ObTime ob_time; - int32_t value = *reinterpret_cast(data); if (OB_FAIL(ObTimeConverter::date_to_ob_time(value, ob_time))) { LOG_WARN("fail to convert date to ob time", K(ret)); } else { @@ -1240,16 +1249,15 @@ int ObJsonBin:: deserialize_json_value(const char *data, break; } case ObJsonNodeType::J_TIME: { + int64_t value = 0; void *buf = allocator_->alloc(sizeof(ObJsonDatetime)); if (buf == NULL) { ret = OB_ALLOCATE_MEMORY_FAILED; LOG_WARN("fail to alloc memory for time json node", K(ret)); - } else if (length < sizeof(int64_t)) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("data length is not enough.", K(ret), K(length), K(sizeof(int64_t))); + } else if (OB_FAIL(cursor_->read_i64(pos_, &value))) { + LOG_WARN("read_i64 fail", K(ret), K(pos_), K(sizeof(int64_t))); } else { ObTime ob_time; - int64_t value = *reinterpret_cast(data); ObTimeConverter::time_to_ob_time(value, ob_time); ObJsonDatetime *node = new(buf)ObJsonDatetime(node_type, ob_time); json_tree = static_cast(node); @@ -1258,16 +1266,15 @@ int ObJsonBin:: deserialize_json_value(const char *data, } case ObJsonNodeType::J_DATETIME: case ObJsonNodeType::J_ORACLEDATE: { + int64_t value = 0; void *buf = allocator_->alloc(sizeof(ObJsonDatetime)); if (buf == NULL) { ret = OB_ALLOCATE_MEMORY_FAILED; LOG_WARN("fail to alloc memory for datetime json node", K(ret)); - } else if (length < sizeof(int64_t)) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("data length is not enough.", K(ret), K(length), K(sizeof(int64_t))); + } else if (OB_FAIL(cursor_->read_i64(pos_, &value))) { + LOG_WARN("read_i64 fail", K(ret), K(pos_), K(sizeof(int64_t))); } else { ObTime ob_time; - int64_t value = *reinterpret_cast(data); if (OB_FAIL(ObTimeConverter::datetime_to_ob_time(value, NULL, ob_time))) { LOG_WARN("fail to convert datetime to ob time", K(ret)); } else { @@ -1281,16 +1288,15 @@ int ObJsonBin:: deserialize_json_value(const char *data, case ObJsonNodeType::J_ODATE: case ObJsonNodeType::J_OTIMESTAMP: case ObJsonNodeType::J_OTIMESTAMPTZ: { + int64_t value = 0; void *buf = allocator_->alloc(sizeof(ObJsonDatetime)); if (buf == NULL) { ret = OB_ALLOCATE_MEMORY_FAILED; LOG_WARN("fail to alloc memory for timestamp json node", K(ret)); - } else if (length < sizeof(int64_t)) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("data length is not enough.", K(ret), K(length), K(sizeof(int64_t))); + } else if (OB_FAIL(cursor_->read_i64(pos_, &value))) { + LOG_WARN("read_i64 fail", K(ret), K(pos_), K(sizeof(int64_t))); } else { ObTime ob_time; - int64_t value = *reinterpret_cast(data); if (OB_FAIL(ObTimeConverter::datetime_to_ob_time(value, NULL, ob_time))) { LOG_WARN("fail to convert timestamp to ob time", K(ret)); } else { @@ -1301,34 +1307,37 @@ int ObJsonBin:: deserialize_json_value(const char *data, break; } case ObJsonNodeType::J_OPAQUE: { - ObJBVerType vertype = *reinterpret_cast(data); - if (vertype == ObJBVerType::J_OPAQUE_V0) { + if (node_vertype == ObJBVerType::J_OPAQUE_V0) { + int64_t pos = pos_; + ObString str_data; uint64_t need_len = sizeof(uint16_t) + sizeof(uint64_t) + sizeof(uint8_t); - ObObjType field_type = static_cast(*reinterpret_cast(data + sizeof(uint8_t))); - uint64_t val_length = *reinterpret_cast(data + sizeof(uint8_t) + sizeof(uint16_t)); + ObObjType field_type = ObObjType::ObNullType; + uint64_t val_length = 0; void *buf = allocator_->alloc(sizeof(ObJsonOpaque)); if (buf == NULL) { ret = OB_ALLOCATE_MEMORY_FAILED; LOG_WARN("fail to alloc memory for opaque json node", K(ret)); - } else if (length < need_len) { + } else if (OB_FAIL(cursor_->read_i16(pos + sizeof(uint8_t), reinterpret_cast(&field_type)))) { ret = OB_ERR_UNEXPECTED; - LOG_WARN("data length is not enough.", K(ret), K(length), K(need_len), K(val_length)); + LOG_WARN("read_i64 fail", K(ret), K(pos), K(need_len), K(val_length)); + } else if (OB_FAIL(cursor_->read_i64(pos + sizeof(uint8_t) + sizeof(uint16_t), reinterpret_cast(&val_length)))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("read_i64 fail", K(ret), K(pos), K(need_len), K(val_length)); } else if (val_length == 0) { - LOG_DEBUG("empty opaque in json binary", K(val_length), K(field_type), K(length)); + LOG_DEBUG("empty opaque in json binary", K(val_length), K(field_type), K(pos)); ObString empty_value(0, NULL); ObJsonOpaque *empy_opa_node = new(buf)ObJsonOpaque(empty_value, field_type); json_tree = static_cast(empy_opa_node); } else { - if (length < need_len + val_length) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("data length is not enough.", K(ret), K(length), K(need_len), K(val_length)); + if (OB_FAIL(cursor_->get(pos + sizeof(uint8_t) + sizeof(uint16_t) + sizeof(uint64_t), val_length, str_data))) { + LOG_WARN("get data fail", K(ret), K(pos), K(need_len), K(val_length)); } else { void *str_buf = allocator_->alloc(val_length); if (str_buf == NULL) { ret = OB_ALLOCATE_MEMORY_FAILED; LOG_WARN("fail to alloc memory for data buf", K(ret), K(val_length)); } else { - MEMCPY(str_buf, data + sizeof(uint8_t) + sizeof(uint16_t) + sizeof(uint64_t), val_length); + MEMCPY(str_buf, str_data.ptr(), str_data.length()); ObString value(val_length, reinterpret_cast(str_buf)); ObJsonOpaque *node = new(buf)ObJsonOpaque(value, field_type); json_tree = static_cast(node); @@ -1338,7 +1347,7 @@ int ObJsonBin:: deserialize_json_value(const char *data, } else { // other version process ret = OB_NOT_SUPPORTED; - LOG_WARN("invalid ver type.", K(ret), K(vertype)); + LOG_WARN("invalid ver type.", K(ret), K(node_vertype)); } break; } @@ -1352,18 +1361,17 @@ int ObJsonBin:: deserialize_json_value(const char *data, LOG_WARN("fail to alloc memory for str json node", K(ret)); } else { int64_t val = 0; - int64_t pos = 0; - + int64_t pos = pos_; + ObString str_data; pos += sizeof(uint8_t); - if (OB_FAIL(serialization::decode_vi64(data, length, pos, &val))) { + if (OB_FAIL(cursor_->decode_vi64(pos, &val))) { LOG_WARN("decode str length failed.", K(ret)); - } else if (length < pos + val) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("data length is not enough.", K(ret), K(length), K(pos), K(val)); + } else if (OB_FAIL(cursor_->get(pos, val, str_data))) { + LOG_WARN("get data fail", K(ret), K(pos), K(val)); } else { uint64_t str_length = static_cast(val); if (str_length == 0) { - LOG_DEBUG("empty string in json binary", K(str_length), K(pos), K(length)); + LOG_DEBUG("empty string in json binary", K(str_length), K(pos)); ObJsonString *empty_str_node = new(buf)ObJsonString(NULL, 0); json_tree = static_cast(empty_str_node); } else { @@ -1372,7 +1380,7 @@ int ObJsonBin:: deserialize_json_value(const char *data, ret = OB_ALLOCATE_MEMORY_FAILED; LOG_WARN("fail to alloc memory for data buf", K(ret), K(str_length)); } else { - MEMCPY(str_buf, data + pos, str_length); + MEMCPY(str_buf, str_data.ptr(), str_data.length()); ObJsonORawString *node = new(buf)ObJsonORawString(reinterpret_cast(str_buf), str_length, node_type); json_tree = static_cast(node); @@ -1390,18 +1398,17 @@ int ObJsonBin:: deserialize_json_value(const char *data, LOG_WARN("fail to alloc memory for str json node", K(ret)); } else { int64_t val = 0; - int64_t pos = 0; - + int64_t pos = pos_; + ObString str_data; pos += sizeof(uint8_t); - if (OB_FAIL(serialization::decode_vi64(data, length, pos, &val))) { + if (OB_FAIL(cursor_->decode_vi64(pos, &val))) { LOG_WARN("decode str length failed.", K(ret)); - } else if (length < pos + val) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("data length is not enough.", K(ret), K(length), K(pos), K(val)); + } else if (OB_FAIL(cursor_->get(pos, val, str_data))) { + LOG_WARN("get data fail", K(ret), K(pos), K(val)); } else { uint64_t str_length = static_cast(val); if (str_length == 0) { - LOG_DEBUG("empty string in json binary", K(str_length), K(pos), K(length)); + LOG_DEBUG("empty string in json binary", K(str_length), K(pos)); ObJsonString *empty_str_node = new(buf)ObJsonString(NULL, 0); json_tree = static_cast(empty_str_node); } else { @@ -1410,7 +1417,7 @@ int ObJsonBin:: deserialize_json_value(const char *data, ret = OB_ALLOCATE_MEMORY_FAILED; LOG_WARN("fail to alloc memory for data buf", K(ret), K(str_length)); } else { - MEMCPY(str_buf, data + pos, str_length); + MEMCPY(str_buf, str_data.ptr(), str_data.length()); ObObjType field_type = node_type == ObJsonNodeType::J_ODAYSECOND ? ObIntervalYMType : ObIntervalDSType; ObJsonOInterval *node = new(buf)ObJsonOInterval(reinterpret_cast(str_buf), str_length, field_type); @@ -1434,81 +1441,38 @@ int ObJsonBin:: deserialize_json_value(const char *data, return ret; } -int ObJsonBin::deserialize_json_object_v0(const char *data, uint64_t length, ObJsonObject *object) +int ObJsonBin::deserialize_json_object_v0(ObJsonObject *object) { INIT_SUCC(ret); - bool with_key_dict = false; // TODO how to judge key dict - uint64_t offset = 0; - uint8_t node_type, type, obj_size_type; - uint64_t count, obj_size; - if (length <= OB_JSON_BIN_OBJ_HEADER_LEN) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("failed to access data for length is not enough.", K(ret), K(length)); - } else { - parse_obj_header(data, offset, node_type, type, obj_size_type, count, obj_size); - object->set_serialize_size(obj_size); - uint64_t type_size = ObJsonVar::get_var_size(type); - uint64_t key_entry_size = type_size * 2; - uint64_t val_entry_size = (type_size + sizeof(uint8_t)); - - const char *key_entry = (data + offset); - const char *val_entry = (key_entry + key_entry_size * count); - uint8_t v_type = static_cast(JBLS_UINT8); - if (offset + key_entry_size * count + val_entry_size * count > length) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("failed to access data for length is not enough.", - K(ret), K(length), K(offset), K(key_entry_size), K(val_entry_size), K(count)); - } - for (uint64_t i = 0; OB_SUCC(ret) && i < count; i++) { - uint64_t key_offset, key_len, value_offset, val_type; - if (OB_FAIL(ObJsonVar::read_var(key_entry + key_entry_size * i, type, &key_offset))) { - LOG_WARN("failed to read key offset", K(ret)); - } else if (OB_FAIL(ObJsonVar::read_var(key_entry + key_entry_size * i + type_size, type, &key_len))) { - LOG_WARN("failed to read key len", K(ret)); - } else if (OB_FAIL(ObJsonVar::read_var(val_entry + val_entry_size * i, type, &value_offset))) { - LOG_WARN("failed to read val offset", K(ret)); - } else if (OB_FAIL(ObJsonVar::read_var(val_entry + val_entry_size * i + type_size, v_type, &val_type))) { - LOG_WARN("failed to read val type", K(ret)); - } else if (key_offset >= length) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("failed to access data for length is not enough.", K(ret), K(length), K(key_offset)); - } else { - // TODO if with key dict, read key from dict - // to consider, add option to controll need alloc or not - void *key_buf = nullptr; - if (key_len > 0) { - key_buf = allocator_->alloc(key_len); - if (key_buf == NULL) { - ret = OB_ALLOCATE_MEMORY_FAILED; - LOG_WARN("fail to alloc memory for data buf", K(ret)); - } - } - if (OB_SUCC(ret)) { - MEMCPY(key_buf, data + key_offset, key_len); - ObString key(key_len, reinterpret_cast(key_buf)); - const char *val = data + value_offset; - ObJsonNode *node = NULL; - ret = deserialize_json_value(val, length - value_offset, val_type, value_offset, node, type); - if (OB_SUCC(ret)) { - if (OB_FAIL(object->add(key, node, false, true, false))) { - LOG_WARN("failed to add node to obj", K(ret)); - } - } else { - LOG_WARN("failed to deserialize child node.", K(ret), K(i), K(val_type)); - } - } - } + uint64_t element_count = this->element_count(); + object->set_serialize_size(obj_size()); + ObJsonBin child_bin(allocator_); + for (uint64_t i = 0; OB_SUCC(ret) && i < element_count; i++) { + ObJsonNode *node = nullptr; + ObString ori_key; + ObString key; + if (OB_FAIL(get_key_in_object(i, ori_key))) { + LOG_WARN("get_key_in_object fail", K(ret), K(i)); + } else if (OB_FAIL(ob_write_string(*allocator_, ori_key, key))) { + LOG_WARN("ob_write_string fail", K(ret), K(i), K(ori_key)); + } else if (OB_FAIL(get_value(i, child_bin))) { + LOG_WARN("get child value fail", K(ret)); + } else if (OB_FAIL(child_bin.deserialize_json_value(node))) { + LOG_WARN("deserialize child node fail", K(ret), K(i), K(child_bin)); + } else if (OB_FAIL(object->add(key, node, false, true, false, is_schema_))) { + LOG_WARN("add node to obj fail", K(ret), K(i)); } } return ret; } -int ObJsonBin::deserialize_json_object(const char *data, uint64_t length, ObJsonObject *object, ObJBVerType vertype) +int ObJsonBin::deserialize_json_object(ObJsonObject *object) { INIT_SUCC(ret); + ObJBVerType vertype = get_vertype(); switch(vertype) { case ObJBVerType::J_OBJECT_V0: { - ret = deserialize_json_object_v0(data, length, object); + ret = deserialize_json_object_v0(object); break; } default: { @@ -1520,54 +1484,32 @@ int ObJsonBin::deserialize_json_object(const char *data, uint64_t length, ObJson return ret; } -int ObJsonBin::deserialize_json_array_v0(const char *data, uint64_t length, ObJsonArray *array) +int ObJsonBin::deserialize_json_array_v0(ObJsonArray *array) { INIT_SUCC(ret); - uint64_t offset = 0; - uint8_t node_type, type, obj_size_type; - uint64_t count, obj_size; - if (length <= OB_JSON_BIN_OBJ_HEADER_LEN) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("failed to access data for length is not enough.", K(ret), K(length)); - } else { - parse_obj_header(data, offset, node_type, type, obj_size_type, count, obj_size); - array->set_serialize_size(obj_size); - uint64_t type_size = ObJsonVar::get_var_size(type); - uint64_t val_entry_size = (type_size + sizeof(uint8_t)); - - const char *val_entry = (data + offset); - uint8_t v_type = static_cast(JBLS_UINT8); - if (offset + val_entry_size * count > length) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("failed to access data for length is not enough.", - K(ret), K(length), K(offset), K(val_entry_size), K(count)); - } - for (uint64_t i = 0; OB_SUCC(ret) && i < count; i++){ - uint64_t val_offset, val_type; - if (OB_FAIL(ObJsonVar::read_var(val_entry + val_entry_size * i, type, &val_offset))) { - LOG_WARN("failed to read val offset", K(ret), K(i), K(val_entry_size), K(type)); - } else if (OB_FAIL(ObJsonVar::read_var(val_entry + val_entry_size * i + type_size, v_type, &val_type))) { - LOG_WARN("failed to read val type", K(ret), K(i), K(val_entry_size), K(type)); - } else { - const char *val = data + val_offset; - ObJsonNode *node = NULL; - if (OB_FAIL(deserialize_json_value(val, length - val_offset, val_type, val_offset, node, type))) { - LOG_WARN("failed to deserialize child node", K(ret), K(i), K(val_type), K(val_offset)); - } else if (OB_FAIL(array->append(node))) { - LOG_WARN("failed to append node to array", K(ret)); - } - } + uint64_t element_count = this->element_count(); + array->set_serialize_size(this->obj_size()); + ObJsonBin child_bin(allocator_); + for (uint64_t i = 0; OB_SUCC(ret) && i < element_count; i++) { + ObJsonNode *node = nullptr; + if (OB_FAIL(get_value(i, child_bin))) { + LOG_WARN("get_value fail", K(ret), K(i)); + } else if (OB_FAIL(child_bin.deserialize_json_value(node))) { + LOG_WARN("failed to deserialize child node", K(ret), K(i), K(child_bin)); + } else if (OB_FAIL(array->append(node))) { + LOG_WARN("failed to append node to array", K(ret), K(i)); } } return ret; } -int ObJsonBin::deserialize_json_array(const char *data, uint64_t length, ObJsonArray *array, ObJBVerType vertype) +int ObJsonBin::deserialize_json_array(ObJsonArray *array) { INIT_SUCC(ret); + ObJBVerType vertype = get_vertype(); switch(vertype) { case ObJBVerType::J_ARRAY_V0: { - ret = deserialize_json_array_v0(data, length, array); + ret = deserialize_json_array_v0(array); break; } default: { @@ -1579,207 +1521,94 @@ int ObJsonBin::deserialize_json_array(const char *data, uint64_t length, ObJsonA return ret; } -int ObJsonBin::raw_binary(ObString &buf) const +// get need size when rebuild or serialize from tree for un-inline node +int ObJsonBin::get_area_size(uint64_t& size) const { INIT_SUCC(ret); - if (OB_ISNULL(curr_.ptr())) { - ret = OB_ERR_NULL_VALUE; - LOG_WARN("json binary ptr is null.", K(ret)); + ObJsonNodeType node_type = json_type(); + if (OB_JSON_TYPE_IS_INLINE(get_type())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("inline type can not call this", K(ret), K(get_type())); } else { - buf.assign_ptr(curr_.ptr(), curr_.length()); - } - return ret; -} - -int ObJsonBin::get_max_offset(const char* data, ObJsonNodeType cur_node, uint64_t& max_offset) const -{ - INIT_SUCC(ret); - uint8_t cur_node_type = static_cast(cur_node); - if (OB_JSON_TYPE_IS_INLINE(cur_node_type)) { - max_offset = 1; - } else if (!(cur_node == ObJsonNodeType::J_OBJECT || cur_node == ObJsonNodeType::J_ARRAY)) { - if (ObJsonVerType::is_opaque_or_string(cur_node)) { - int64_t decode_pos = 1; - int64_t val = 0; - if (OB_FAIL(serialization::decode_vi64(data, curr_.length() - (data - curr_.ptr()), decode_pos, &val))) { - LOG_WARN("decode slength failed.", K(ret)); - } else { - max_offset = decode_pos; - max_offset += val; + switch (node_type) { + case ObJsonNodeType::J_NULL: { + size = 1; + break; } - } else if (ObJsonBaseUtil::is_time_type(cur_node)) { - if (cur_node == ObJsonNodeType::J_TIME || - cur_node == ObJsonNodeType::J_DATE || - cur_node == ObJsonNodeType::J_ORACLEDATE) { - max_offset = sizeof(int32_t); - } else { - max_offset = sizeof(uint64_t); + // [precision(int16_t)][scale(int16_t)][value] + case ObJsonNodeType::J_DECIMAL: + case ObJsonNodeType::J_ODECIMAL: { + size = meta_.bytes_; + break; } - } else if (cur_node == ObJsonNodeType::J_DECIMAL || - cur_node == ObJsonNodeType::J_ODECIMAL) { - ObPrecision prec = -1; - ObScale scale = -1; - int64_t pos = 0; - number::ObNumber number; - if (OB_FAIL(serialization::decode_i16(data, curr_.length() - (data - curr_.ptr()), pos, &prec))) { - LOG_WARN("fail to deserialize decimal precision.", K(ret), K(data - curr_.ptr()), K(curr_.length())); - } else if (OB_FAIL(serialization::decode_i16(data, curr_.length() - (data - curr_.ptr()), pos, &scale))) { - LOG_WARN("fail to deserialize decimal scale.", K(ret), K(data - curr_.ptr()), K(curr_.length())); - } else if (OB_FAIL(number.deserialize(data, curr_.length() - (data - curr_.ptr()), pos))) { - LOG_WARN("failed to deserialize decimal data", K(ret)); - } else { - max_offset = pos; + case ObJsonNodeType::J_INT: + case ObJsonNodeType::J_OINT: { + size = meta_.bytes_; + break; } - } else if (cur_node == ObJsonNodeType::J_INT || - cur_node == ObJsonNodeType::J_UINT || - cur_node == ObJsonNodeType::J_OINT || - cur_node == ObJsonNodeType::J_OLONG) { - int64_t val = 0; - int64_t pos = 0; - if (OB_FAIL(serialization::decode_vi64(data, curr_.length() - (data - curr_.ptr()), pos, &val))) { - LOG_WARN("decode int val failed.", K(ret)); - } else { - max_offset = pos; + case ObJsonNodeType::J_UINT: + case ObJsonNodeType::J_OLONG: { + size = meta_.bytes_; + break; } - } else if (cur_node == ObJsonNodeType::J_OFLOAT) { - max_offset = sizeof(float); - } else if (cur_node == ObJsonNodeType::J_DOUBLE || - cur_node == ObJsonNodeType::J_ODOUBLE) { - max_offset = sizeof(double); - } else if (cur_node == ObJsonNodeType::J_NULL || cur_node == ObJsonNodeType::J_BOOLEAN) { - max_offset = 1; - } else { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("get length failed.", K(ret), K(cur_node)); - } - } else { - uint8_t node_type, type, obj_size_type; - uint64_t count, obj_size, offset = 0; - parse_obj_header(data, offset, node_type, type, obj_size_type, count, obj_size); - ObJsonNodeType cur_node = ObJsonVerType::get_json_type(static_cast(node_type)); - - uint64_t val_offset, val_len, max_val_offset = offset; - uint8_t entry_size = ObJsonVar::get_var_size(type); - uint8_t max_offset_type; - bool is_first_uninline = true; - bool is_continuous = (reinterpret_cast(data))->is_continuous_; - - max_offset = max_val_offset; - - for (int64_t i = count - 1; OB_SUCC(ret) && i >= 0 && is_first_uninline; --i) { - val_offset = offset + (entry_size + sizeof(uint8_t)) * i; - if (cur_node == ObJsonNodeType::J_OBJECT) { - val_offset += count * (entry_size * 2); + case ObJsonNodeType::J_DOUBLE: + case ObJsonNodeType::J_ODOUBLE: { + size = sizeof(double); + break; } - const char* val_offset_ptr = data + val_offset; - uint64_t node_offset; - - node_type = static_cast(*static_cast(val_offset_ptr + entry_size)); - if (OB_JSON_TYPE_IS_INLINE(node_type)) { - if (max_val_offset < val_offset_ptr + 1 - data) { - max_val_offset = val_offset_ptr + 1 - data; - max_offset_type = node_type; - } - } else if (OB_FAIL(ObJsonVar::read_var(val_offset_ptr, type, &node_offset))) { - LOG_WARN("get max offset failed.", K(ret), K(type)); - } else if (max_val_offset < node_offset) { - max_val_offset = node_offset; - max_offset_type = node_type; - if (is_continuous) { - is_first_uninline = false; - } + case ObJsonNodeType::J_OFLOAT: { + size = sizeof(float); + break; } - } - - if (OB_SUCC(ret)) { - if (max_val_offset > offset) { - uint64_t node_max_offset = 0; - if (!OB_JSON_TYPE_IS_INLINE(node_type) && - OB_FAIL(get_max_offset(data + max_val_offset, static_cast(max_offset_type), node_max_offset))) { - LOG_WARN("get max offset failed.", K(ret), K(cur_node)); - } else { - max_val_offset += node_max_offset; - } + // string type : [vertype(uint8_t)][length(var uint64_t)][data] + // length is var_size encoding + // element_count_ store data length + case ObJsonNodeType::J_OBINARY: + case ObJsonNodeType::J_OOID: + case ObJsonNodeType::J_ORAWHEX: + case ObJsonNodeType::J_ORAWID: + case ObJsonNodeType::J_ODAYSECOND: + case ObJsonNodeType::J_OYEARMONTH: + case ObJsonNodeType::J_STRING: { + uint64_t str_len = get_element_count(); + size = OB_JSON_BIN_VALUE_TYPE_LEN + serialization::encoded_length_vi64(str_len) + str_len; + break; } - if (max_val_offset < obj_size) { - max_offset = obj_size; - } else { - max_offset = max_val_offset; + case ObJsonNodeType::J_OBJECT: + case ObJsonNodeType::J_ARRAY: { + size = obj_size(); + break; } - } - } - - return ret; -} - -int ObJsonBin::get_use_size(uint64_t& used_size) const -{ - INIT_SUCC(ret); - int32_t stk_len = stack_size(stack_buf_); - const char* data = curr_.ptr() + pos_; - ObJBVerType ver_type = static_cast(*reinterpret_cast(data)); - ObJsonNodeType json_type = static_cast(ver_type); - if (stk_len == 0) { - used_size = curr_.length(); - } else { - uint64_t max_offset = 0; - if (OB_FAIL(get_max_offset(data, json_type, max_offset))) { - LOG_WARN("get max offset.", K(ret)); - } else { - used_size = max_offset; - if (curr_.length() - pos_ < used_size) { - used_size = curr_.length() - pos_; + case ObJsonNodeType::J_BOOLEAN: { + size = sizeof(bool); + break; } - } - } - return ret; -} - -int ObJsonBin::raw_binary(ObString &buf, ObIAllocator *allocator) const -{ - INIT_SUCC(ret); - ObIAllocator * allocator_ptr = (allocator == NULL) ? allocator_ : allocator; - uint8_t type = OB_JSON_TYPE_GET_INLINE(type_); // dst type take off inline mask - ObJBVerType vertype = static_cast(type); - ObJsonNodeType node_type = ObJsonVerType::get_json_type(vertype); - if (node_type == ObJsonNodeType::J_ARRAY || - node_type == ObJsonNodeType::J_OBJECT || - ObJsonVerType::is_opaque_or_string(vertype)) { - ret = raw_binary_at_iter(buf); - } else if (OB_ISNULL(allocator_ptr)) { - ret = OB_ERR_NULL_VALUE; - LOG_WARN("alloctor is null.", K(ret)); - } else { - void *result_jbuf = allocator_ptr->alloc(sizeof(ObJsonBuffer)); - if (OB_ISNULL(result_jbuf)) { - ret = OB_ALLOCATE_MEMORY_FAILED; - LOG_WARN("failed to allocate jbuf", K(ret)); - } else { - ObJsonBuffer* result = static_cast(new(result_jbuf)ObJsonBuffer(allocator_ptr)); - if (OB_FAIL(result->append(reinterpret_cast(&type), sizeof(uint8_t)))) { - LOG_WARN("failed to serialize jsonbin append type_", K(ret)); - } else { - int64_t append_len = curr_.length() - pos_; - if (append_len <= 0) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("invalid append len.", K(ret), K(append_len), K(pos_), K(curr_)); - } else { - if (bytes_ * OB_JSON_BIN_REBUILD_THRESHOLD < append_len || OB_JSON_TYPE_IS_INLINE(type_)) { - // free space over 30% or inline type, do rebuild - ObJsonBuffer& jbuf = *result; - if (OB_FAIL(rebuild_json_value(curr_.ptr() + pos_, curr_.length() - pos_, type_, type, uint_val_, jbuf))) { - LOG_WARN("failed to rebuild inline value", K(ret)); - } - } else { - // free space less than 30%, do append - if (OB_FAIL(result->append(curr_.ptr() + pos_, curr_.length() - pos_))) { - LOG_WARN("failed to copy data to result", K(ret), K(curr_.length() - pos_)); - } - } - if (OB_SUCC(ret)) { - buf.assign_ptr(result->ptr(), result->length()); - } - } + case ObJsonNodeType::J_DATE: + case ObJsonNodeType::J_ORACLEDATE: { + size = sizeof(int32_t); + break; + } + case ObJsonNodeType::J_TIME: + case ObJsonNodeType::J_DATETIME: + case ObJsonNodeType::J_TIMESTAMP: + case ObJsonNodeType::J_ODATE: + case ObJsonNodeType::J_OTIMESTAMP: + case ObJsonNodeType::J_OTIMESTAMPTZ: { + size = sizeof(int64_t); + break; + } + // opaque type : [vertype(uint8_t)][ObObjType(uint16_t)][length(uint64_t)][data] + // length is fix_size encoding + // element_count_ store data length + case ObJsonNodeType::J_OPAQUE: { + uint64_t str_len = get_element_count(); + size = OB_JSON_BIN_VALUE_TYPE_LEN + sizeof(uint16_t) + sizeof(uint64_t) + str_len; + break; + } + default: { + ret = OB_NOT_SUPPORTED; + LOG_WARN("invalid node type", K(ret), K(node_type), K(get_type())); + break; } } } @@ -1789,17 +1618,205 @@ int ObJsonBin::raw_binary(ObString &buf, ObIAllocator *allocator) const int ObJsonBin::raw_binary_at_iter(ObString &buf) const { INIT_SUCC(ret); - uint64_t used_size = 0; - if (OB_ISNULL(curr_.ptr())) { + uint8_t type = OB_JSON_TYPE_GET_INLINE(get_type()); + ObJBVerType vertype = static_cast(type); + ObJsonNodeType node_type = ObJsonVerType::get_json_type(vertype); + ObJsonBuffer result(allocator_); + if (OB_ISNULL(allocator_)) { ret = OB_ERR_NULL_VALUE; - LOG_WARN("json binary ptr is null.", K(ret)); - } else if (pos_ >= curr_.length()) { - ret = OB_ERROR_OUT_OF_RANGE; - LOG_WARN("json binary iter pos invalid", K(ret), K(pos_), K(curr_.length())); - } else if (OB_FAIL(get_use_size(used_size))) { - LOG_WARN("get use size failed", K(ret)); + LOG_WARN("alloctor is null.", K(ret)); + } else if (! (node_type == ObJsonNodeType::J_ARRAY || node_type == ObJsonNodeType::J_OBJECT || ObJsonVerType::is_opaque_or_string(vertype)) + && OB_FAIL(result.append(reinterpret_cast(&type), sizeof(uint8_t)))) { + LOG_WARN("failed to serialize jsonbin append type_", K(ret)); + } else if (OB_FAIL(rebuild_json_value(result))) { + LOG_WARN("failed to rebuild inline value", K(ret)); } else { - buf.assign_ptr(curr_.ptr() + pos_, used_size); + // need ensure memory not release + result.get_result_string(buf); + } + return ret; +} + +// ObJsonBin may be not continous when partial update +// calling get_serialize_size get need size when rebuilding, +int ObJsonBin::get_serialize_size(uint64_t &size) const +{ + INIT_SUCC(ret); + uint64_t real_size = 0; + ObJBVerType vertype = get_vertype(); + if ((ObJBVerType::J_ARRAY_V0 == vertype || ObJBVerType::J_OBJECT_V0 == vertype)) { + bool is_obj_type = json_type() == ObJsonNodeType::J_OBJECT; + uint64_t element_count = this->element_count(); + int64_t total_child_size = 0; + ObJsonBin child; + for (uint64_t i = 0; OB_SUCC(ret) && i < element_count; i++) { + uint64_t key_offset = 0; + uint64_t key_len = 0; + uint64_t child_size = 0; + if (ObJBVerType::J_OBJECT_V0 == vertype && OB_FAIL(get_key_entry(i, key_offset, key_len))) { + LOG_WARN("get_key_entry fail", K(ret), K(i)); + } else if (OB_FAIL(get_value(i, child))) { + LOG_WARN("get_value fail", K(ret), K(i)); + } else if (! child.is_inline_vertype() && OB_FAIL(child.get_serialize_size(child_size))) { + LOG_WARN("get child value size fail", K(ret), K(i)); + } else { + total_child_size += key_len + child_size; + } + } + if (OB_FAIL(ret)) { + } else if (obj_size_var_type() != entry_var_type()) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("var type invalid", K(ret), K(meta_)); + } else { + uint8_t old_entry_var_type = entry_var_type(); + real_size = OB_JSON_BIN_HEADER_LEN + + element_count_var_size() + + entry_var_size() + + (is_obj_type ? element_count * (entry_var_size() * 2) : 0) + + element_count * (entry_var_size() + OB_JSON_BIN_VALUE_TYPE_LEN) + + total_child_size; + uint8_t new_entry_var_type = ObJsonVar::get_var_type(real_size); + if (OB_FAIL(extend_entry_var_type(is_obj_type, element_count, real_size, old_entry_var_type, new_entry_var_type, real_size))) { + LOG_WARN("extend_entry_var_type fail", K(ret), K(meta_), K(is_obj_type), K(element_count), K(real_size), K(old_entry_var_type), K(new_entry_var_type)); + } + } + } else if (OB_FAIL(get_area_size(real_size))) { + LOG_WARN("get_area_size fail", K(ret), K(meta_)); + } + + if (OB_SUCC(ret)) { + size = real_size; + } + return ret; +} + +// if var type not enough, need extend var type +int ObJsonBin::extend_entry_var_type( + const bool is_obj_type, + const uint64_t element_count, + const uint64_t old_size, + uint8_t old_entry_var_type, + uint8_t &new_entry_var_type, + uint64_t &new_size) const +{ + INIT_SUCC(ret); + new_entry_var_type = ObJsonVar::get_var_type(old_size); + new_size = old_size; + static const int64_t OB_JSONBIN_CONVERGEN_TIME = 3; + for (int i = 0; i < OB_JSONBIN_CONVERGEN_TIME && new_entry_var_type > old_entry_var_type; ++i) { + uint8_t entry_var_size_inc = ObJsonVar::get_var_size(new_entry_var_type) - ObJsonVar::get_var_size(old_entry_var_type); + // plus for obj_size + new_size += entry_var_size_inc; + // puus for value entry + new_size += element_count * entry_var_size_inc; + // plus key entry if is object + new_size += (is_obj_type ? element_count * (entry_var_size_inc * 2) : 0); + old_entry_var_type = new_entry_var_type; + new_entry_var_type = ObJsonVar::get_var_type(new_size); + } + return ret; +} + +int ObJsonBin::get_used_bytes(uint64_t &size) const +{ + INIT_SUCC(ret); + if (! is_at_root()) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("should call at root", K(ret), K(pos_), K(node_stack_.size())); + } else { + size = cursor_->get_length(); + } + return ret; +} + +int ObJsonBin::get_value_binary(ObString &out) const +{ + INIT_SUCC(ret); + uint64_t area_size = 0; + uint64_t total_len = cursor_->get_length(); + // must no extend segment + if (OB_NOT_NULL(ctx_) && ctx_->extend_seg_offset_ != 0 && ctx_->extend_seg_offset_ != total_len) { + ret = OB_NOT_SUPPORTED; + LOG_WARN("not support", K(ret), K(pos_), K(total_len), KPC(this)); + } else if (OB_FAIL(get_area_size(area_size))) { + LOG_WARN("get_area_size", K(ret), K(pos_), KPC(this)); + } else if (OB_FAIL(cursor_->get(pos_, area_size, out))) { + LOG_WARN("cursor get_data fail", K(ret), K(pos_), K(area_size), KPC(this)); + } + return ret; +} + +int ObJsonBin::get_raw_binary(ObString &buf, ObIAllocator *allocator) const +{ + INIT_SUCC(ret); + ObIAllocator * allocator_ptr = (allocator == NULL) ? allocator_ : allocator; + uint8_t type = OB_JSON_TYPE_GET_INLINE(get_type()); // dst type take off inline mask + ObJBVerType vertype = static_cast(type); + ObJsonNodeType node_type = ObJsonVerType::get_json_type(vertype); + ObJsonBuffer result(allocator_ptr); + ObJsonBinUpdateCtx *update_ctx = get_update_ctx(); + bool is_rebuild = true; + if (node_type == ObJsonNodeType::J_ARRAY || node_type == ObJsonNodeType::J_OBJECT) { + if (nullptr == update_ctx && is_at_root()) { + if (OB_FAIL(cursor_->get_data(buf))) { + LOG_WARN("get_data fail", K(ret)); + } else { + is_rebuild = false; + } + } else if (OB_FAIL(add_doc_header_v0(result))) { + LOG_WARN("add_doc_header_v0 fail", K(ret)); + } else if (OB_FAIL(rebuild_json_value(result))) { + LOG_WARN("failed to rebuild inline value", K(ret)); + } else if (OB_FAIL(set_doc_header_v0(result, result.length()))) { + LOG_WARN("set_doc_header_v0 fail", K(ret)); + } + } else { + // for scalar type, need add type byte except string type + // so have to use ObStringBuffer + if (!ObJsonVerType::is_opaque_or_string(vertype) && + OB_FAIL(result.append(reinterpret_cast(&vertype), sizeof(uint8_t)))) { + LOG_WARN("failed to serialize json tree at append used size", K(ret), K(result.length())); + } else if (OB_FAIL(rebuild_json_value(result))) { + LOG_WARN("failed to rebuild inline value", K(ret)); + } + } + + if (OB_SUCC(ret) && is_rebuild) { + // need ensure memory not release + result.get_result_string(buf); + } + return ret; +} + +int ObJsonBin::get_raw_binary_v0(ObString &buf, ObIAllocator *allocator) const +{ + INIT_SUCC(ret); + ObIAllocator * allocator_ptr = (allocator == NULL) ? allocator_ : allocator; + uint8_t type = OB_JSON_TYPE_GET_INLINE(get_type()); // dst type take off inline mask + ObJBVerType vertype = static_cast(type); + ObJsonNodeType node_type = ObJsonVerType::get_json_type(vertype); + ObJsonBuffer result(allocator_ptr); + ObJsonBinUpdateCtx *update_ctx = get_update_ctx(); + bool is_rebuild = true; + if (node_type == ObJsonNodeType::J_ARRAY || node_type == ObJsonNodeType::J_OBJECT) { + if (OB_FAIL(cursor_->get_data(buf))) { + LOG_WARN("cursor get_data fail", K(ret)); + } else { + buf.assign_ptr(buf.ptr() + pos_, buf.length() - pos_); + is_rebuild = false; + } + } else { + if (!ObJsonVerType::is_opaque_or_string(vertype) && + OB_FAIL(result.append(reinterpret_cast(&vertype), sizeof(uint8_t)))) { + LOG_WARN("failed to serialize json tree at append used size", K(ret), K(result.length())); + } else if (OB_FAIL(rebuild_json_value(result))) { + LOG_WARN("failed to rebuild inline value", K(ret)); + } + } + + if (OB_SUCC(ret) && is_rebuild) { + // need ensure memory not release + result.get_result_string(buf); } return ret; } @@ -1807,194 +1824,354 @@ int ObJsonBin::raw_binary_at_iter(ObString &buf) const int ObJsonBin::get_free_space(size_t &space) const { INIT_SUCC(ret); - uint64_t actual_size = curr_.length(); + uint64_t actual_size = cursor_->get_length(); uint64_t used_size = 0; - - uint8_t node_type = *reinterpret_cast(curr_.ptr()); - ObJBVerType node_type_val = static_cast(OB_JSON_TYPE_GET_INLINE(node_type)); - if (ObJsonVerType::get_json_type(node_type_val) != ObJsonNodeType::J_ARRAY && - ObJsonVerType::get_json_type(node_type_val) != ObJsonNodeType::J_OBJECT) { + if (0 == actual_size) { space = 0; + } else if (! is_at_root()) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("should call at root", K(ret), K(pos_), K(node_stack_.size())); + } else if (OB_FAIL(get_serialize_size(used_size))) { + LOG_WARN("get_serialize_size fail", K(ret)); + } else if (used_size > actual_size) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("length invalid.", K(ret), K(actual_size), K(used_size)); } else { - uint8_t type, obj_size_type; - uint64_t count, obj_size, offset = 0; - parse_obj_header(curr_.ptr(), offset, node_type, type, obj_size_type, count, obj_size); - - used_size = obj_size; - if (used_size > actual_size) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("length invalid.", K(ret), K(actual_size), K(used_size)); - } else { - space = actual_size - used_size; - } + space = actual_size - (used_size + pos_); } + return ret; +} +int ObJsonBin::init_cursor(const ObString& data) +{ + INIT_SUCC(ret); + if (OB_NOT_NULL(ctx_) && (OB_NOT_NULL(ctx_->update_ctx_))) { + if (! data.empty()) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("data should empty", K(ret), KPC(ctx_), K(data)); + } else if (OB_ISNULL(ctx_->update_ctx_->cursor_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("update_ctx cursor is null or data not empty", K(ret), KPC(ctx_), KPC(ctx_->update_ctx_), K(data)); + } else { + cursor_ = ctx_->update_ctx_->cursor_; + } + } else if (OB_FAIL(local_cursor_.init(data))) { + cursor_ = &local_cursor_; + } return ret; } // reset iter to root int ObJsonBin::reset_iter() +{ + node_stack_.reset(); + return reset(0); +} + +int ObJsonBin::reset(const uint8_t type, const int64_t offset, const uint8_t value_entry_var_type) { INIT_SUCC(ret); - char *ptr = curr_.ptr(); - if (OB_ISNULL(ptr)) { - ret = OB_ERR_NULL_VALUE; - LOG_WARN("json binary ptr is null.", K(ret)); + if (OB_FAIL(reset(type, local_cursor_.data(), offset, value_entry_var_type, ctx_))) { + LOG_WARN("reset fail", K(ret), K(type), K(offset), K(value_entry_var_type)); + } + return ret; +} + +int ObJsonBin::reset(const int64_t offset) +{ + INIT_SUCC(ret); + if (OB_FAIL(reset(local_cursor_.data(), offset, ctx_))) { + LOG_WARN("reset fail", K(ret), K(offset)); + } + return ret; +} + +int ObJsonBin::reset_child(ObJsonBin &child, const int64_t child_offset) const +{ + INIT_SUCC(ret); + if (OB_FAIL(child.reset(local_cursor_.data(), child_offset, ctx_))) { + LOG_WARN("reset fail", K(ret), K(child_offset)); + } + return ret; +} + +int ObJsonBin::reset_child( + ObJsonBin &child, + const uint8_t child_type, + const int64_t child_offset, + const uint8_t value_entry_var_type) const +{ + INIT_SUCC(ret); + if (OB_FAIL(child.reset(child_type, local_cursor_.data(), child_offset, value_entry_var_type, ctx_))) { + LOG_WARN("reset fail", K(ret), K(child_type), K(child_offset), K(value_entry_var_type)); + } + return ret; +} + +// before this called, need set cursor +int ObJsonBin::parse_type_() +{ + return cursor_->read_i8(pos_, reinterpret_cast(&meta_.type_)); +} + +int ObJsonBin::parse_doc_header_() +{ + INIT_SUCC(ret); + if (OB_FAIL(parse_type_())) { + LOG_WARN("parse_type fail", K(ret)); + } else if (is_doc_header_v0(meta_.type_)) { + if (OB_FAIL(init_ctx())) { + LOG_WARN("init_ctx v0 fail", K(ret)); + } + } + return ret; +} + +int ObJsonBin::skip_type_byte_() +{ + INIT_SUCC(ret); + ObJBVerType vertype = get_vertype(); + if (ObJsonVerType::is_array(vertype) + || ObJsonVerType::is_object(vertype) + || ObJsonVerType::is_opaque_or_string(vertype)) { } else { - // parse first byte - type_ = *reinterpret_cast(ptr); - pos_ = 0; - stack_reset(stack_buf_); - ObJBVerType node_vertype = static_cast(OB_JSON_TYPE_GET_INLINE(type_)); - ObJsonNodeType node_type = ObJsonVerType::get_json_type(node_vertype); - if (!(node_type == ObJsonNodeType::J_ARRAY || - node_type == ObJsonNodeType::J_OBJECT || - ObJsonVerType::is_opaque_or_string(node_vertype))) { - pos_ += sizeof(uint8_t); - } - - if (OB_FAIL(set_curr_by_type(pos_, 0, type_))) { - LOG_WARN("falied to set root obj", K(ret), K(pos_), K(type_)); - } + pos_ += sizeof(uint8_t); } return ret; } -int64_t ObJsonBin::to_string(char *buf, int64_t len) const +bool ObJsonBin::is_empty_data() const { - int64_t pos = 0; - databuff_printf(buf, len, pos, "is_alloc=%d type=%u pos=%ld " - "element_count=%lu bytes=%lu field_type=%d int_val=%ld uint_val=%lu " - "double_val=%lf", - is_alloc_, type_, pos_, element_count_, bytes_, field_type_, - int_val_, uint_val_, double_val_); - return pos; + return nullptr == cursor_ || cursor_->get_length() <= 0; } -int ObJsonBin::move_iter(ObJsonBuffer& stack, uint32_t start) +int ObJsonBin::reset(const ObString &buffer, int64_t offset, ObJsonBinCtx *ctx) { INIT_SUCC(ret); - uint32_t depth = stack_size(stack); - uint64_t offset = 0; - ObJBNodeMeta node_meta; - curr_.assign_ptr(result_.ptr(), result_.length()); - char* data = result_.ptr(); + pos_ = offset; + ctx_ = ctx; + meta_.reset(); // notice : this clear meta_, all set meta info should after this - stack_at(stack, start, node_meta); - data += node_meta.offset_; - offset = node_meta.offset_; - - uint8_t node_type, type, obj_size_type; - uint64_t count, obj_size; - - for (uint32_t idx = 0; OB_SUCC(ret) && idx < depth; ++idx) { - stack_at(stack, idx, node_meta); - node_meta.offset_ = data - result_.ptr(); - parse_obj_header(data, offset, node_type, type, obj_size_type, count, obj_size); - if (ObJsonVerType::is_array(static_cast(node_type)) || - ObJsonVerType::is_object(static_cast(node_type))) { - uint64_t type_size = ObJsonVar::get_var_size(type); - uint64_t key_entry_size = 2 * type_size; - uint64_t val_entry_size = type_size + sizeof(uint8_t); - char* val_entry = data + offset + (val_entry_size * node_meta.idx_); - if (ObJsonVerType::is_object(static_cast(node_type))) { - val_entry += count * key_entry_size; - } - uint64_t val_offset; - if (OB_FAIL(ObJsonVar::read_var(val_entry, type, &val_offset))) { - LOG_WARN("falied to read value offset", K(ret), K(node_type)); - } else { - stack_update(stack, idx, node_meta); - data += val_offset; - offset = 0; - } - } else { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("falied to parse, invalid node type", K(ret), K(node_type)); - } - } - - if (OB_SUCC(ret)) { - stack_back(stack, node_meta); - if (OB_FAIL(set_curr_by_type(node_meta.offset_, 0, node_meta.ver_type_))) { - LOG_WARN("falied to set curr type", K(ret), K(node_type)); - } + if (OB_FAIL(init_cursor(buffer))) { + LOG_WARN("init_cursor fail", K(ret)); + } else if (is_empty_data()) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("buf or len is empty", K(ret), K(offset), K(buffer)); + } else if (OB_FAIL(parse_doc_header_())) { + LOG_WARN("parse_doc_header_ fail", K(ret)); + } else if (OB_FAIL(parse_type_())) { + LOG_WARN("parse_type_ fail", K(ret)); + } else if (OB_FAIL(skip_type_byte_())) { + LOG_WARN("skip_type_byte_ fail", K(ret)); + } else if (OB_FAIL(init_bin_data())) { + LOG_WARN("init_bin_data fail", K(ret), K(pos_), K(meta_.type_)); } return ret; } + +int ObJsonBin::reset( + const uint8_t type, + const ObString &buffer, + const int64_t offset, + const uint8_t value_entry_var_type, + ObJsonBinCtx *ctx) +{ + INIT_SUCC(ret); + pos_ = offset; + ctx_ = ctx; + meta_.reset(); // notice : this clear meta_, all set meta info should after this + meta_.type_ = type; + meta_.entry_size_ = value_entry_var_type; + + if (OB_FAIL(init_cursor(buffer))) { + LOG_WARN("init_cursor fail", K(ret)); + } else if (is_empty_data()) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("buf or len is empty", K(ret), K(type), K(offset), K(value_entry_var_type), K(buffer)); + } else if (OB_FAIL(init_bin_data())) { + LOG_WARN("falied to set root obj", K(ret), K(pos_), K(type)); + } else if (meta_.type_ != type) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("input type not match parsed type", K(ret), K(type), K(meta_)); + } + return ret; +} + + +int ObJsonBin::init_ctx() +{ + INIT_SUCC(ret); + if (nullptr == ctx_) { + if (OB_ISNULL(allocator_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("allocator is null", K(ret), K(ctx_)); + } else if (OB_ISNULL(ctx_ = OB_NEWx(ObJsonBinCtx, allocator_))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("alloc ctx fail", K(ret), K(sizeof(ObJsonBinCtx))); + } else { + is_alloc_ctx_ = true; + } + } + + if (OB_FAIL(ret)) { + } else if (OB_FAIL(parse_doc_header_v0())) { + LOG_WARN("parse doc header fail", K(ret)); + } + return ret; +} + // move iter to its parent int ObJsonBin::move_parent_iter() { INIT_SUCC(ret); ObJBNodeMeta curr_parent; - if (OB_FAIL(stack_back(stack_buf_, curr_parent, true))) { - LOG_WARN("fail to pop back from parent", K(ret), K(stack_size(stack_buf_))); - } else if (curr_parent.offset_ >= curr_.length()) { + if (OB_FAIL(node_stack_.back(curr_parent, true))) { + LOG_WARN("fail to pop back from parent", K(ret), K(node_stack_.size())); + } else if (OB_FAIL(reset(curr_parent.offset_))) { + LOG_WARN("failed to move iter to parent", K(ret), K(pos_), K(curr_parent)); + } + return ret; +} + +int ObJsonBin::get_parent(ObIJsonBase *& parent) const +{ + INIT_SUCC(ret); + ObJsonBin *parent_bin = nullptr; + // if not root, parent stack should not be null + // Otherwise, cann't get the correct return value by get_parent() + if (!is_at_root() && node_stack_.size() <= 0) { ret = OB_ERR_UNEXPECTED; - LOG_WARN("fail to move iter", K(curr_parent.offset_), K(stack_size(stack_buf_)), K(curr_.length())); + LOG_WARN("non-root node, but node_stack is empty", K(ret), K(meta_), K(pos_), KPC(this)); + } else if (node_stack_.size() <= 0) { + } else if (OB_FAIL(create_new_binary(nullptr, parent_bin))) { + LOG_WARN("create_new_binary fail", K(ret)); + } else if (OB_FAIL(parent_bin->node_stack_.copy(this->node_stack_))) { + LOG_WARN("copy node stack fail", K(ret)); + } else if (OB_FAIL(parent_bin->move_parent_iter())) { + LOG_WARN("move parent fail", K(ret)); } else { - // father must be a container - char *ptr = curr_.ptr() + curr_parent.offset_; - ObJsonBinHeader *obj_header = reinterpret_cast(ptr); - type_ = obj_header->type_; - pos_ = curr_parent.offset_; - if (OB_FAIL(set_curr_by_type(pos_, 0, type_))) { - LOG_WARN("failed to move iter to parent", K(ret), K(pos_), K(type_)); - } + parent = parent_bin; + } + return ret; +} + + +int ObJsonBin::init_string_node_v0() +{ + INIT_SUCC(ret); + int64_t str_len = 0; + int64_t offset = pos_ + sizeof(uint8_t); + ObString data; + if (OB_FAIL(cursor_->decode_vi64(offset, &str_len))) { + LOG_WARN("decode string length fail", K(ret), K(pos_), K(offset), K(str_len)); + } else if (OB_FAIL(cursor_->get(offset, str_len, data))) { + LOG_WARN("get string data fail", K(ret), K(pos_), K(offset), K(str_len)); + } else { + meta_.set_element_count(static_cast(str_len)); + meta_.bytes_ = offset - pos_ + str_len; + meta_.str_data_offset_ = offset - pos_; + data_ = data.ptr(); + } + return ret; +} + +int ObJsonBin::init_string_node() +{ + INIT_SUCC(ret); + if (ObJBVerType::J_STRING_V0 == get_vertype()) { + ret = init_string_node_v0(); + } else { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("parse string type invlaid vertype.", K(ret), K(meta_)); + } + return ret; +} + +int ObJsonBin::init_opaque_node_v0() +{ + INIT_SUCC(ret); + int64_t offset = sizeof(uint8_t); + int64_t str_len = 0; + ObString data; + // [vertype(uint8_t)][ObObjType(uint16_t)][length(uint64_t)][data] + if (OB_FAIL(cursor_->read_i16(pos_ + offset, reinterpret_cast(&meta_.field_type_)))) { + LOG_WARN("read_u16 fail", K(ret), K(pos_), K(offset), K(str_len)); + } else if (OB_FALSE_IT(offset += sizeof(uint16_t))) { + } else if (OB_FAIL(cursor_->read_i64(pos_ + offset, &str_len))) { + LOG_WARN("read_u16 fail", K(ret), K(pos_), K(offset), K(str_len)); + } else if (OB_FALSE_IT(offset += sizeof(uint64_t))) { + } else if (OB_FAIL(cursor_->get(pos_ + offset, str_len, data))) { + LOG_WARN("get data fail", K(ret), K(pos_), K(offset), K(str_len)); + } else { + meta_.set_element_count(str_len); + meta_.bytes_ = offset + str_len; + meta_.str_data_offset_ = offset; + data_ = data.ptr(); + } + return ret; +} + +int ObJsonBin::init_opaque_node() +{ + INIT_SUCC(ret); + if (ObJBVerType::J_OPAQUE_V0 == get_vertype()) { + ret = init_opaque_node_v0(); + } else { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("parse string type invlaid vertype.", K(ret), K(meta_)); } return ret; } // inlined will reuse value entry offset which length is type_size -int ObJsonBin::set_curr_by_type(int64_t new_pos, uint64_t val_offset, uint8_t type, uint8_t entry_size) +int ObJsonBin::init_bin_data() { INIT_SUCC(ret); - char *ptr = curr_.ptr(); - data_ = ptr + new_pos; - element_count_ = 1; // scalar is 1, container is acutual k-v pairs number - bool is_inlined = OB_JSON_TYPE_IS_INLINE(type); - if (!is_inlined && new_pos >= curr_.length()) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("new pos invalid.", K(ret), K(curr_.length()), K(new_pos)); - } else { - ObJBVerType node_vertype = static_cast(OB_JSON_TYPE_GET_INLINE(type)); - ObJsonNodeType node_type = ObJsonVerType::get_json_type(node_vertype); + meta_.set_element_count(1); // scalar is 1, container is acutual k-v pairs number + bool is_inlined = is_inline_vertype(); + { + ObJsonNodeType node_type = json_type(); switch (node_type) { case ObJsonNodeType::J_NULL: { - bytes_ = is_inlined ? 0 : 1; + meta_.bytes_ = is_inlined ? 0 : 1; break; } case ObJsonNodeType::J_DECIMAL: case ObJsonNodeType::J_ODECIMAL: { ObPrecision prec = -1; ObScale scale = -1; - int64_t pos = 0; - if (OB_FAIL(serialization::decode_i16(data_, curr_.length() - new_pos, pos, &prec))) { - LOG_WARN("fail to deserialize decimal precision.", K(ret), K(new_pos), K(curr_.length())); - } else if (OB_FAIL(serialization::decode_i16(data_, curr_.length() - new_pos, pos, &scale))) { - LOG_WARN("fail to deserialize decimal scale.", K(ret), K(new_pos), K(pos), K(curr_.length())); - } else if (OB_FAIL(number_.deserialize(data_, curr_.length() - new_pos, pos))) { - LOG_WARN("failed to deserialize decimal data", K(ret), K(new_pos), K(pos), K(curr_.length())); + int64_t pos = pos_; + if (OB_FAIL(cursor_->decode_i16(pos, &prec))) { + LOG_WARN("fail to deserialize decimal precision.", K(ret), K(pos)); + } else if (OB_FAIL(cursor_->decode_i16(pos, &scale))) { + LOG_WARN("fail to deserialize decimal scale.", K(ret), K(pos), K(pos)); + } else if (OB_FAIL(cursor_->deserialize(pos, &number_))) { + LOG_WARN("failed to deserialize decimal data", K(ret), K(pos), K(pos)); } else { prec_ = prec; scale_ = scale; - bytes_ = pos; + meta_.bytes_ = pos - pos_; } break; } case ObJsonNodeType::J_INT: case ObJsonNodeType::J_OINT: { if (is_inlined) { - int_val_ = ObJsonVar::var_uint2int(val_offset, entry_size); - bytes_ = 0; + uint64_t inline_val = 0; + if (OB_FAIL(ObJsonVar::read_var(cursor_, pos_, meta_.entry_size_, reinterpret_cast(&inline_val)))) { + LOG_WARN("read inline value fail", K(ret), K(meta_)); + } else { + int_val_ = ObJsonVar::var_uint2int(inline_val, meta_.entry_size_); + meta_.bytes_ = 0; + } } else { int64_t val = 0; - int64_t pos = 0; - if (OB_FAIL(serialization::decode_vi64(data_, curr_.length() - new_pos, pos, &val))) { + int64_t pos = pos_; + if (OB_FAIL(cursor_->decode_vi64(pos, &val))) { LOG_WARN("decode int val failed.", K(ret)); } else { int_val_ = val; - bytes_ = pos; + meta_.bytes_ = pos - pos_; } } break; @@ -2002,122 +2179,91 @@ int ObJsonBin::set_curr_by_type(int64_t new_pos, uint64_t val_offset, uint8_t ty case ObJsonNodeType::J_UINT: case ObJsonNodeType::J_OLONG: { if (is_inlined) { - uint_val_ = static_cast(val_offset); - bytes_ = 0; + if (OB_FAIL(ObJsonVar::read_var(cursor_, pos_, meta_.entry_size_, &uint_val_))) { + LOG_WARN("read inline value fail", K(ret), K(meta_)); + } else { + meta_.bytes_ = 0; + } } else { int64_t val = 0; - int64_t pos = 0; - if (OB_FAIL(serialization::decode_vi64(data_, curr_.length() - new_pos, pos, &val))) { + int64_t pos = pos_; + if (OB_FAIL(cursor_->decode_vi64(pos, &val))) { LOG_WARN("decode uint val failed.", K(ret)); } else { - uint64_t uval = static_cast(val); - uint_val_ = uval; - bytes_ = pos; + uint_val_ = static_cast(val); + meta_.bytes_ = pos - pos_; } } break; } case ObJsonNodeType::J_DOUBLE: case ObJsonNodeType::J_ODOUBLE: { - double_val_ = *reinterpret_cast(data_); - bytes_ = sizeof(double); + if (OB_FAIL(cursor_->read_double(pos_, &double_val_))) { + LOG_WARN("read_double fail", K(ret)); + } else { + meta_.bytes_ = sizeof(double); + } break; } case ObJsonNodeType::J_OFLOAT: { - float_val_ = *reinterpret_cast(data_); - bytes_ = sizeof(float); + if (OB_FAIL(cursor_->read_float(pos_, &float_val_))) { + LOG_WARN("read_float fail", K(ret)); + } else { + meta_.bytes_ = sizeof(float); + } break; } case ObJsonNodeType::J_STRING: { - int64_t val = 0; - int64_t pos = 0; - ObJBVerType vertype = *reinterpret_cast(data_); - if (vertype == ObJBVerType::J_STRING_V0) { - pos += sizeof(uint8_t); - if (OB_FAIL(serialization::decode_vi64(data_, curr_.length() - new_pos, pos, &val))) { - LOG_WARN("decode string length failed.", K(ret)); - } else if (pos + val > curr_.length() - new_pos) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("data length is not enough for str.", K(ret), K(curr_.length()), K(new_pos), K(pos), K(val)); - } else { - uint64_t length = static_cast(val); - element_count_ = length; - bytes_ = length + pos; - data_ = data_ + pos; - } - } else { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("parse string type invlaid vertype.", K(ret), K(vertype)); + if (OB_FAIL(init_string_node())) { + LOG_WARN("init_string_node fail", K(ret)); } break; } case ObJsonNodeType::J_OBJECT: case ObJsonNodeType::J_ARRAY: { - int64_t left_len = curr_.length() - new_pos; - if (left_len <= OB_JSON_BIN_OBJ_HEADER_LEN) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("data length is not enough for obj header.", K(ret), K(curr_.length()), K(new_pos)); - } else if (node_vertype == ObJBVerType::J_ARRAY_V0 || node_vertype == ObJBVerType::J_OBJECT_V0) { - // different version process - ObJsonBinObjHeader *header = reinterpret_cast(data_); - left_len -= OB_JSON_BIN_OBJ_HEADER_LEN; - uint64_t count_size = ObJsonVar::get_var_size(header->count_size_); - uint64_t obj_size_size = ObJsonVar::get_var_size(header->obj_size_size_); - if (left_len < count_size + obj_size_size) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("data length is not enough for obj var size.", - K(ret), K(left_len), K(count_size), K(obj_size_size)); - } else { - if (OB_FAIL(ObJsonVar::read_var(header->used_size_, header->count_size_, &element_count_))) { - LOG_WARN("fail to read count var.", K(ret), K(header->count_size_)); - } else if (OB_FAIL(ObJsonVar::read_var(header->used_size_ + count_size, header->obj_size_size_, &bytes_))) { - LOG_WARN("fail to read obj_size var.", K(ret), K(header->obj_size_size_)); - } else { - left_len -= count_size; - left_len -= obj_size_size; - uint64_t entry_size = ObJsonVar::get_var_size(header->entry_size_); - uint64_t kv_entry_len = element_count_ * (entry_size + 1); // val_entry - if (node_type == ObJsonNodeType::J_OBJECT) { - kv_entry_len += element_count_ * (entry_size * 2); // key_entry - } - if (left_len < kv_entry_len) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("data length is not enough for obj kv entry.", - K(ret), K(left_len), K(kv_entry_len), K(element_count_), K(entry_size)); - } - } - } + if (OB_FAIL(init_meta())) { + LOG_WARN("init meta fail", K(ret)); } else { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("invalid ver type.", K(ret), K(node_vertype)); + meta_.bytes_ = meta_.obj_size(); } break; } case ObJsonNodeType::J_BOOLEAN: { - uint_val_ = is_inlined ? static_cast(val_offset) : static_cast(*data_); - bytes_ = is_inlined ? 0 : 1; + if (is_inlined) { + if (OB_FAIL(ObJsonVar::read_var(cursor_, pos_, meta_.entry_size_, &uint_val_))) { + LOG_WARN("read inline value fail", K(ret), K(meta_)); + } else { + meta_.bytes_ = 0; + } + } else { + bool val = false; + if (OB_FAIL(cursor_->read_bool(pos_, &val))) { + LOG_WARN("read_float fail", K(ret)); + } else { + meta_.bytes_ = sizeof(bool); + uint_val_ = val; + } + } break; } case ObJsonNodeType::J_DATE: case ObJsonNodeType::J_ORACLEDATE: { - if (sizeof(int32_t) > curr_.length() - new_pos) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("data length is not enough for date.", K(ret), K(curr_.length()), K(new_pos)); + int32_t val = 0; + if (OB_FAIL(cursor_->read_i32(pos_, &val))) { + LOG_WARN("read_id32 fail", K(ret), K(pos_)); } else { - field_type_ = ObDateType; - int_val_ = *reinterpret_cast(data_); - bytes_ = sizeof(int32_t); + meta_.field_type_ = ObDateType; + int_val_ = val; + meta_.bytes_ = sizeof(int32_t); } break; } case ObJsonNodeType::J_TIME: { - if (sizeof(int64_t) > curr_.length() - new_pos) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("data length is not enough for time.", K(ret), K(curr_.length()), K(new_pos)); + if (OB_FAIL(cursor_->read_i64(pos_, &int_val_))) { + LOG_WARN("read_id32 fail", K(ret), K(pos_)); } else { - field_type_ = ObTimeType; - int_val_ = *reinterpret_cast(data_); - bytes_ = sizeof(int64_t); + meta_.field_type_ = ObTimeType; + meta_.bytes_ = sizeof(int64_t); } break; } @@ -2125,49 +2271,26 @@ int ObJsonBin::set_curr_by_type(int64_t new_pos, uint64_t val_offset, uint8_t ty case ObJsonNodeType::J_ODATE: case ObJsonNodeType::J_OTIMESTAMP: case ObJsonNodeType::J_OTIMESTAMPTZ: { - if (sizeof(int64_t) > curr_.length() - new_pos) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("data length is not enough for datetime.", K(ret), K(curr_.length()), K(new_pos)); + if (OB_FAIL(cursor_->read_i64(pos_, &int_val_))) { + LOG_WARN("read_id32 fail", K(ret), K(pos_)); } else { - field_type_ = ObJsonBaseUtil::get_time_type(node_type); - int_val_ = *reinterpret_cast(data_); - bytes_ = sizeof(int64_t); + meta_.field_type_ = ObJsonBaseUtil::get_time_type(node_type); + meta_.bytes_ = sizeof(int64_t); } break; } case ObJsonNodeType::J_TIMESTAMP: { - if (sizeof(int64_t) > curr_.length() - new_pos) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("data length is not enough for timestamp.", K(ret), K(curr_.length()), K(new_pos)); + if (OB_FAIL(cursor_->read_i64(pos_, &int_val_))) { + LOG_WARN("read_id32 fail", K(ret), K(pos_)); } else { - field_type_ = ObTimestampType; - int_val_ = *reinterpret_cast(data_); - bytes_ = sizeof(int64_t); + meta_.field_type_ = ObTimestampType; + meta_.bytes_ = sizeof(int64_t); } break; } case ObJsonNodeType::J_OPAQUE: { - ObJBVerType vertype = *reinterpret_cast(data_); - if (vertype == ObJBVerType::J_OPAQUE_V0) { - char* data = data_ + sizeof(uint8_t); - if (sizeof(uint16_t) + sizeof(uint64_t) + sizeof(uint8_t) > curr_.length() - new_pos) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("data length is not enough for opaque len.", K(ret), K(curr_.length()), K(new_pos)); - } else { - field_type_ = static_cast(*reinterpret_cast(data)); - element_count_ = *reinterpret_cast(data + sizeof(uint16_t)); - if (element_count_ + sizeof(uint16_t) + sizeof(uint64_t) + sizeof(uint8_t) > curr_.length() - new_pos) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("data length is not enough for opaque val.", K(ret), - K(curr_.length()), K(new_pos), K(element_count_)); - } else { - bytes_ = sizeof(uint16_t) + sizeof(uint64_t) + sizeof(uint8_t) + element_count_; - data_ = data_ + sizeof(uint8_t) + sizeof(uint16_t) + sizeof(uint64_t); - } - } - } else { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("parse opaque type invlaid vertype.", K(ret), K(vertype)); + if (OB_FAIL(init_opaque_node())) { + LOG_WARN("init_opaque_node fail", K(ret)); } break; } @@ -2189,9 +2312,9 @@ int ObJsonBin::element(size_t index) if (node_type != ObJsonNodeType::J_ARRAY && node_type != ObJsonNodeType::J_OBJECT) { ret = OB_OBJ_TYPE_ERROR; LOG_WARN("wrong node_type.", K(ret), K(node_type)); - } else if (index >= element_count_) { + } else if (index >= get_element_count()) { ret = OB_OUT_OF_ELEMENT; - LOG_WARN("index out of range.", K(ret), K(index), K(element_count_)); + LOG_WARN("index out of range.", K(ret), K(index), K(get_element_count())); } else { if (node_type == ObJsonNodeType::J_ARRAY) { ret = get_element_in_array(index); @@ -2219,7 +2342,7 @@ int ObJsonBin::lookup_index(const ObString &key, size_t *idx) const ObString key_iter; bool is_found = false; int64_t low = 0; - int64_t high = element_count_ - 1; + int64_t high = get_element_count() - 1; // do binary search while (OB_SUCC(ret) && low <= high) { int64_t mid = low + (high - low) / 2; @@ -2243,6 +2366,42 @@ int ObJsonBin::lookup_index(const ObString &key, size_t *idx) const return ret; } + +// find first position that greater than key +int ObJsonBin::lookup_insert_postion(const ObString &key, size_t &idx) const +{ + INIT_SUCC(ret); + ObJsonNodeType node_type = this->json_type(); + if (node_type != ObJsonNodeType::J_OBJECT) { + ret = OB_OBJ_TYPE_ERROR; + LOG_WARN("wrong node_type.", K(ret), K(node_type)); + } + + ObJsonKeyCompare comparator; + ObString key_iter; + int64_t low = 0; + int64_t high = get_element_count() - 1; + // do binary search, find last key that less or equal to key + while (OB_SUCC(ret) && low <= high) { + int64_t mid = low + (high - low) / 2; + if (OB_FAIL(get_key_in_object(mid, key_iter))) { + LOG_WARN("fail to get key.", K(ret), K(mid), K(low), K(high)); + } else { + int compare_result = comparator.compare(key_iter, key); + if (compare_result > 0) { + high = mid - 1; + } else { + low = mid + 1; + } + } + } + // low is the lower_bound, +1 is upper_bound + if (OB_SUCC(ret)) { + idx = low; + } + return ret; +} + // mover iter to the value of key int ObJsonBin::lookup(const ObString &key) { @@ -2264,77 +2423,50 @@ int ObJsonBin::lookup(const ObString &key) return ret; } -void ObJsonBin::parse_obj_header(const char *data, uint64_t &offset, - uint8_t &node_type, uint8_t &type, uint8_t& obj_size_type, uint64_t &count, uint64_t &obj_size) const -{ - const ObJsonBinObjHeader *header = reinterpret_cast(data + offset); - node_type = header->type_; - offset += OB_JSON_BIN_OBJ_HEADER_LEN; - type = header->entry_size_; - obj_size_type = header->obj_size_size_; - ObJsonVar::read_var(data + offset, header->count_size_, &count); - offset += ObJsonVar::get_var_size(header->count_size_); - ObJsonVar::read_var(data + offset, header->obj_size_size_, &obj_size); - offset += ObJsonVar::get_var_size(header->obj_size_size_); -} - -int ObJsonBin::get_element_in_array_v0(size_t index, char **get_addr_only) +int ObJsonBin::get_element_v0(size_t index, uint64_t *get_addr_only) { INIT_SUCC(ret); uint64_t offset = pos_; - char *data = curr_.ptr(); - uint8_t node_type, type, obj_size_type; - uint64_t count, obj_size; - parse_obj_header(data, offset, node_type, type, obj_size_type, count, obj_size); - uint64_t type_size = ObJsonVar::get_var_size(type); - uint64_t val_entry_size = (type_size + sizeof(uint8_t)); + uint64_t value_offset = 0; + uint8_t value_type = 0; + uint64_t value_entry_offset = get_value_entry_offset(index); - const char *val_entry = (data + offset); - uint64_t val_offset, val_type; - uint8_t v_type = static_cast(JBLS_UINT8); - if (OB_FAIL(ObJsonVar::read_var(val_entry + val_entry_size * index, type, &val_offset))) { - LOG_WARN("failed to read val offset", K(ret), K(index), K(val_entry_size), K(type)); - } else if (OB_FAIL(ObJsonVar::read_var(val_entry + val_entry_size * index + type_size, v_type, &val_type))) { - LOG_WARN("failed to read val type", K(ret), K(index), K(val_entry_size), K(v_type)); - } else { - char *val = data + pos_ + val_offset; - if (OB_NOT_NULL(get_addr_only)) { - if (OB_JSON_TYPE_IS_INLINE(static_cast(val_type))) { - // for inline, set addr to val_offset - *get_addr_only = data + offset + val_entry_size * index; - } else { - *get_addr_only = val; - } - } else { - type_ = static_cast(val_type); - ObJBNodeMeta path_node(node_type, obj_size_type, type, index, pos_, obj_size); - if (OB_NOT_NULL(allocator_) && OB_FAIL(stack_push(stack_buf_, path_node))) { - LOG_WARN("failed to push parent pos.", K(ret), K(pos_), K(stack_size(stack_buf_))); - } else { - if (OB_JSON_TYPE_IS_INLINE(type_)) { - // for inline, set pos_ to val_offset - pos_ = offset + val_entry_size * index; - } else { - pos_ = pos_ + val_offset; - } - if (OB_FAIL(set_curr_by_type(pos_, val_offset, val_type, type))) { - LOG_WARN("failed to move iter to sub obj.", K(ret), K(index)); - } - } + if (OB_FAIL(get_value_entry(index, value_offset, value_type))) { + LOG_WARN("get_value_entry fail", K(index)); + } else if (OB_JSON_TYPE_IS_INLINE(value_type)) { + offset = pos_ + value_entry_offset; + } else if (is_forward_v0(value_type)) { + offset = get_extend_value_offset(value_offset); + if (OB_FAIL(get_extend_value_type(offset, value_type))) { + LOG_WARN("get_extend_value_type fail", K(ret), K(index), K(value_offset)); + } else if (! need_type_prefix(value_type)) { + offset += sizeof(uint8_t); } + } else { + offset = pos_ + value_offset; + } + + if (OB_FAIL(ret)) { + } else if (OB_NOT_NULL(get_addr_only)) { + *get_addr_only = offset; + } else if (OB_NOT_NULL(allocator_) && !is_seek_only_ + && OB_FAIL(node_stack_.push(ObJBNodeMeta(get_type(), obj_size_var_type(), entry_var_type(), index, pos_, obj_size())))) { + LOG_WARN("failed to push parent pos.", K(ret), K(pos_), K(node_stack_.size())); + } else if (OB_FAIL(reset(value_type, offset, entry_var_type()))) { + LOG_WARN("failed to move iter to sub obj.", K(ret), K(index), K(offset), K(value_type)); } return ret; } -int ObJsonBin::get_element_in_array(size_t index, char **get_addr_only) +int ObJsonBin::get_element_in_array(size_t index, uint64_t *get_addr_only) { INIT_SUCC(ret); - ObJBVerType vertype = *reinterpret_cast(curr_.ptr() + pos_); + ObJBVerType vertype = get_vertype(); switch (vertype) { case ObJBVerType::J_ARRAY_V0: { - ret = get_element_in_array_v0(index, get_addr_only); + ret = get_element_v0(index, get_addr_only); break; } default: @@ -2347,63 +2479,13 @@ int ObJsonBin::get_element_in_array(size_t index, char **get_addr_only) return ret; } -int ObJsonBin::get_element_in_object_v0(size_t i, char **get_addr_only) +int ObJsonBin::get_element_in_object(size_t i, uint64_t *get_addr_only) { INIT_SUCC(ret); - uint64_t offset = pos_; - char *data = curr_.ptr(); - uint8_t node_type, type, obj_size_type; - uint64_t count, obj_size; - parse_obj_header(data, offset, node_type, type, obj_size_type, count, obj_size); - uint64_t type_size = ObJsonVar::get_var_size(type); - uint64_t key_entry_size = type_size * 2; - uint64_t val_entry_size = (type_size + sizeof(uint8_t)); - - const char *key_entry = (data + offset); - const char *val_entry = (key_entry + key_entry_size * count); - uint64_t value_offset, val_type; - uint8_t v_type = static_cast(JBLS_UINT8); - if (OB_FAIL(ObJsonVar::read_var(val_entry + val_entry_size * i, type, &value_offset))) { - LOG_WARN("failed to read val offset", K(ret), K(i), K(val_entry_size), K(type)); - } else if (OB_FAIL(ObJsonVar::read_var(val_entry + val_entry_size * i + type_size, v_type, &val_type))) { - LOG_WARN("failed to read val type", K(ret), K(i), K(val_entry_size), K(v_type)); - } else { - char *val = data + pos_ + value_offset; - if (OB_NOT_NULL(get_addr_only)) { - if (OB_JSON_TYPE_IS_INLINE(static_cast(val_type))) { - // for inline, set addr to val_offset - *get_addr_only = data + offset + key_entry_size * count + val_entry_size * i; - } else { - *get_addr_only = val; - } - } else { - type_ = static_cast(val_type); - ObJBNodeMeta path_node(node_type, obj_size_type, type, i, pos_, obj_size); - if (OB_NOT_NULL(allocator_) && OB_FAIL(stack_push(stack_buf_, path_node))) { - LOG_WARN("failed to push parent pos.", K(ret), K(pos_), K(stack_size(stack_buf_))); - } else { - if (OB_JSON_TYPE_IS_INLINE(type_)) { - // for inline, set pos_ to val_offset - pos_ = offset + key_entry_size * count + val_entry_size * i; - } else { - pos_ = pos_ + value_offset; - } - if (OB_FAIL(set_curr_by_type(pos_, value_offset, val_type, type))) { - LOG_WARN("failed to move iter to sub obj.", K(ret), K(i)); - } - } - } - } - return ret; -} - -int ObJsonBin::get_element_in_object(size_t i, char **get_addr_only) -{ - INIT_SUCC(ret); - ObJBVerType vertype = *reinterpret_cast(curr_.ptr() + pos_); + ObJBVerType vertype = get_vertype(); switch (vertype) { case ObJBVerType::J_OBJECT_V0: { - ret = get_element_in_object_v0(i, get_addr_only); + ret = get_element_v0(i, get_addr_only); break; } default: { @@ -2418,23 +2500,12 @@ int ObJsonBin::get_element_in_object(size_t i, char **get_addr_only) int ObJsonBin::get_key_in_object_v0(size_t i, ObString &key) const { INIT_SUCC(ret); - uint64_t offset = pos_; - const char *data = curr_.ptr(); - uint8_t node_type, type, obj_size_type; - uint64_t count, obj_size; - parse_obj_header(data, offset, node_type, type, obj_size_type, count, obj_size); - uint64_t type_size = ObJsonVar::get_var_size(type); - uint64_t key_entry_size = type_size * 2; - - const char *key_entry = (data + offset); - uint64_t key_offset, key_len; - if (OB_FAIL(ObJsonVar::read_var(key_entry + key_entry_size * i, type, &key_offset))) { - LOG_WARN("failed to read key offset", K(ret)); - } else if (OB_FAIL(ObJsonVar::read_var(key_entry + key_entry_size * i + type_size, type, &key_len))) { - LOG_WARN("failed to read key len", K(ret)); - } else { - const char *key_val = data + pos_ + key_offset; - key.assign_ptr(key_val, key_len); + uint64_t key_offset = 0; + uint64_t key_len = 0; + if (OB_FAIL(get_key_entry(i, key_offset, key_len))) { + LOG_WARN("get_key_entry fail", K(ret), K(i), K(get_element_count())); + } else if (OB_FAIL(cursor_->get(pos_ + key_offset, key_len, key))) { + LOG_WARN("get_key_data fail", K(ret), K(i), K(get_element_count()), K(key_offset), K(key_len)); } return ret; } @@ -2443,7 +2514,7 @@ int ObJsonBin::get_key_in_object_v0(size_t i, ObString &key) const int ObJsonBin::get_key_in_object(size_t i, ObString &key) const { INIT_SUCC(ret); - ObJBVerType vertype = *reinterpret_cast(curr_.ptr() + pos_); + ObJBVerType vertype = get_vertype(); switch (vertype) { case ObJBVerType::J_OBJECT_V0: { ret = get_key_in_object_v0(i, key); @@ -2458,421 +2529,6 @@ int ObJsonBin::get_key_in_object(size_t i, ObString &key) const return ret; } -int ObJsonBin::estimate_need_rebuild_kv_entry(ObJsonBuffer &result, ObJsonBuffer& origin_stack, ObJsonBuffer& update_stack, - uint32_t& top_pos, bool& need_rebuild) -{ - INIT_SUCC(ret); - need_rebuild = false; - uint64_t new_offset = result.length(); - if (OB_FAIL(stack_copy(origin_stack, update_stack))) { - LOG_WARN("failed to copy path stack", K(ret), K(origin_stack.length())); - } else { - top_pos = stack_size(update_stack) - 1; - for (int idx = top_pos; idx >= 0; --idx) { - ObJBNodeMeta path_node; - stack_at(update_stack, idx, path_node); - uint8_t curr_node_offset_type = path_node.entry_type_; - uint8_t new_offset_type = ObJsonVar::get_var_type(new_offset - path_node.offset_ + path_node.obj_size_); - if (new_offset_type > curr_node_offset_type) { - need_rebuild = true; - top_pos = idx; - path_node.entry_type_ = new_offset_type; - stack_update(update_stack, idx, path_node); - } - } - } - return ret; -} - -int ObJsonBin::estimate_need_rebuild(ObJsonBuffer& update_stack, int64_t size_change, - int32_t pos, uint32_t& top_pos, bool& need_rebuild) -{ - INIT_SUCC(ret); - if (OB_FAIL(stack_copy(stack_buf_, update_stack))) { - LOG_WARN("failed to copy path stack", K(ret), K(stack_buf_.length())); - } else { - top_pos = stack_size(update_stack) - 1; - // if pos == 0, from last do scan, else from pos - if (pos == 0) { - pos = stack_size(update_stack) - 1; - } else if (pos > top_pos) { - ret = OB_ERROR_OUT_OF_RANGE; - LOG_WARN("calc rebuild failed", K(pos), K(top_pos)); - } else { - top_pos = pos; - } - for (int64_t idx = pos; size_change && stack_size(update_stack) > 0 && idx >= 0; idx--) { - ObJBNodeMeta path_node; - stack_at(update_stack, idx, path_node); - - int64_t new_data_size = path_node.obj_size_ + size_change; - uint8_t new_var_type = ObJsonVar::get_var_type(new_data_size); - if (new_var_type > path_node.size_type_ || new_var_type > path_node.entry_type_) { - top_pos = idx; - path_node.entry_type_ = new_var_type; - need_rebuild = true; - } - path_node.obj_size_ = new_data_size; - path_node.size_type_ = new_var_type; - stack_update(update_stack, idx, path_node); - } - - // if top pos == 0, do rebuild - if (top_pos != 0 && need_rebuild) { - ObJsonBuffer nw_stack(update_stack.get_allocator()); - uint32_t tmp_pos = top_pos; - estimate_need_rebuild_kv_entry(result_, update_stack, nw_stack, tmp_pos, need_rebuild); - if (top_pos > tmp_pos) { - top_pos = tmp_pos; - } - stack_copy(nw_stack, update_stack); - } - } - return ret; -} - -int ObJsonBin::rebuild_with_meta(const char *data, uint64_t length, ObJsonBuffer& old_stack, ObJsonBuffer& new_meta, - uint32_t min, uint32_t max, ObJsonBuffer &result, uint32_t depth) -{ - INIT_SUCC(ret); - if (min <= max) { - ObJBNodeMeta old_node; - ObJBNodeMeta new_node; - stack_at(old_stack, min, old_node); - stack_at(new_meta, min, new_node); - - // use transform matrix function later - if ((old_node.ver_type_ == ObJBVerType::J_ARRAY_V0 || old_node.ver_type_ == ObJBVerType::J_OBJECT_V0) && - (new_node.ver_type_ == ObJBVerType::J_ARRAY_V0 || new_node.ver_type_ == ObJBVerType::J_OBJECT_V0)) { - int64_t st_pos = result.length(); - uint64_t offset = old_node.offset_; - uint8_t node_type, var_type, obj_size_type; - uint64_t count, obj_size; - - const ObJsonBinObjHeader *header = reinterpret_cast(data + offset); - // parsing header using v0 format - parse_obj_header(data, offset, node_type, var_type, obj_size_type, count, obj_size); - - ObJsonBinHeader new_header = *header; - new_header.entry_size_ = new_node.entry_type_; - new_header.obj_size_size_ = new_node.size_type_; - new_header.is_continuous_ = 1; - uint64_t new_count_size = ObJsonVar::get_var_size(header->count_size_); - uint64_t new_type_size = ObJsonVar::get_var_size(new_node.entry_type_); - uint64_t new_key_entry_size = new_type_size * 2; - uint64_t new_val_entry_size = new_type_size + sizeof(uint8_t); - uint64_t reserve_entry_size = count * new_val_entry_size; - - if (ObJsonVerType::is_object(static_cast(old_node.ver_type_))) { - reserve_entry_size += count * new_key_entry_size; - } - - // rebuild using latest format - // copy obj header, key entry, val entry, key(if need) - uint64_t type_size = ObJsonVar::get_var_size(var_type); - uint64_t key_entry_size = type_size * 2; - uint64_t val_entry_size = (type_size + sizeof(uint8_t)); - uint64_t meta_len = (offset - old_node.offset_) + key_entry_size * count + val_entry_size * count; - const char *old_val_entry = data + offset; - - if (ObJsonVerType::is_object(static_cast(old_node.ver_type_))) { - old_val_entry += key_entry_size * count; - } - - uint64_t new_val_entry_offset; - if (OB_FAIL(result.append(reinterpret_cast(&new_header), OB_JSON_BIN_HEADER_LEN))) { - LOG_WARN("failed to append header", K(ret)); - } else if (OB_FAIL(ObJsonVar::append_var(count, new_header.count_size_, result))) { - LOG_WARN("failed to append count", K(ret)); - } else if (OB_FAIL(ObJsonVar::append_var(new_node.obj_size_, new_node.size_type_, result))) { - LOG_WARN("failed to append obj size", K(ret)); - } else if (OB_FAIL(result.reserve(reserve_entry_size))) { - LOG_WARN("failed to reserve mem", K(ret), K(reserve_entry_size)); - } else if (ObJsonVerType::is_object(static_cast(old_node.ver_type_))) { - if (length < meta_len) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("data length is not enough for obj.", K(ret), K(length), K(meta_len)); - } else { - // reserve key entry array - uint64_t new_key_entry_offset = result.length() - st_pos; - new_val_entry_offset = new_key_entry_offset + count * new_key_entry_size; - result.set_length(result.length() + reserve_entry_size); - - // using latest bin format - uint64_t key_offset, key_len; - const char *key_entry = (data + offset); - const char *last_key_offset_ptr = key_entry + key_entry_size * (count - 1); - const char *last_key_len_ptr = key_entry + key_entry_size * (count - 1) + type_size; - - // get last key offest and len - if (OB_FAIL(ObJsonVar::read_var(last_key_offset_ptr, var_type, &key_offset))) { - LOG_WARN("failed to read key offset", K(ret)); - } else if (OB_FAIL(ObJsonVar::read_var(last_key_len_ptr, var_type, &key_len))) { - LOG_WARN("failed to read key len", K(ret)); - } else { - for (int i = 0; OB_SUCC(ret) && i < count; ++i) { - uint64_t new_key_offset = result.length() - st_pos; - char* new_key_entry = result.ptr() + st_pos + new_key_entry_offset; - if (OB_FAIL(ObJsonVar::read_var(key_entry, var_type, &key_offset))) { - LOG_WARN("failed to read key offset.", K(ret)); - } else if (OB_FAIL(ObJsonVar::read_var(key_entry + type_size, var_type, &key_len))) { - LOG_WARN("failed to read key len.", K(ret)); - } else if (OB_FAIL(ObJsonVar::set_var(new_key_offset, new_node.entry_type_, new_key_entry))) { - LOG_WARN("failed to set key len.", K(ret)); - } else if (OB_FAIL(ObJsonVar::set_var(key_len, new_node.entry_type_, new_key_entry + new_type_size))) { - LOG_WARN("failed to set key len.", K(ret)); - } else if (OB_FAIL(result.append(data + key_offset, key_len))) { - LOG_WARN("failed to apend key.", K(ret)); - } else { - // append key entry [key-offset][key-len] - new_key_entry_offset += new_key_entry_size; - key_entry += key_entry_size; - } - } - } - } - } - - // reserve value entry array - - if (OB_SUCC(ret) && ObJsonVerType::is_array(static_cast(old_node.ver_type_))) { - new_val_entry_offset = result.length() - st_pos; - result.set_length(result.length() + reserve_entry_size); - } - - // process value - for (uint64_t i = 0; OB_SUCC(ret) && i < count; i++) { - uint64_t new_val_offset = result.length() - st_pos; - uint64_t val_offset, val_type; - uint8_t v_type = static_cast(JBLS_UINT8); - char* new_val_entry = result.ptr() + st_pos + new_val_entry_offset + i * new_val_entry_size; - if (OB_FAIL(ObJsonVar::read_var(old_val_entry + val_entry_size * i, var_type, &val_offset))) { - LOG_WARN("failed to read val offset", K(ret)); - } else if (OB_FAIL(ObJsonVar::read_var(old_val_entry + val_entry_size * i + type_size, v_type, &val_type))) { - LOG_WARN("failed to read val type", K(ret)); - } else if (OB_FAIL(ObJsonVar::set_var(val_offset, new_node.entry_type_, new_val_entry))) { - LOG_WARN("failed to set val offset", K(ret)); - } else if (OB_FAIL(ObJsonVar::set_var(val_type, v_type, new_val_entry + new_type_size))) { - LOG_WARN("failed to set val type", K(ret)); - } else { - uint8_t type = static_cast(val_type); - if (!OB_JSON_TYPE_IS_INLINE(type)) { - if (i == old_node.idx_) { - if (min < max) { - ret = rebuild_with_meta(data, length - val_offset, stack_buf_, new_meta, min+1, max, result); - } else { - ret = rebuild_json_value(data + old_node.offset_ + val_offset, length - val_offset, type, type, val_offset, result); - } - } else { - ret = rebuild_json_value(data + val_offset + old_node.offset_, length - val_offset, type, type, val_offset, result); - } - if (OB_SUCC(ret)) { - // fill value offset - new_val_entry = result.ptr() + st_pos + new_val_entry_offset + i * new_val_entry_size; - if (OB_FAIL(ObJsonVar::set_var(new_val_offset, new_node.entry_type_, new_val_entry))) { - LOG_WARN("failed to set val offset.", K(ret), K(i), K(new_val_offset), K(var_type)); - } - } else { - LOG_WARN("rebuild child node failed.", K(ret), K(i), K(val_type)); - } - } - } - } - - if (OB_SUCC(ret)) { - char* obj_size_ptr = result.ptr() + st_pos + OB_JSON_BIN_HEADER_LEN + new_count_size; - uint64_t actual_obj_size = result.length() - st_pos; - if (ObJsonVar::get_var_type(actual_obj_size) > new_node.size_type_) { - if (depth < OB_JSON_BIN_MAX_SERIALIZE_TIME) { - result.set_length(st_pos); - new_node.size_type_ = ObJsonVar::get_var_type(actual_obj_size); - stack_update(new_meta, min, new_node); - ret = rebuild_with_meta(data, length, stack_buf_, new_meta, min, max, result_, depth + 1); - } else { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("rebuild failed sub obj size too large.", K(ret), K(actual_obj_size), K(new_node.size_type_)); - } - } else if (OB_FAIL(ObJsonVar::set_var(result.length() - st_pos, new_node.size_type_, obj_size_ptr))) { - LOG_WARN("rebuild failed set obj size.", K(ret), K(actual_obj_size)); - } else { - new_node.obj_size_ = actual_obj_size; - new_node.offset_ = st_pos; - stack_update(new_meta, min, new_node); - } - } - } else { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("failed to rebuild with meta.", K(ret)); - } - } - return ret; -} - -int ObJsonBin::update_parents(int64_t size_change, bool is_continous) -{ - INIT_SUCC(ret); - uint32_t stack_len = stack_size(stack_buf_); - char *data = result_.ptr(); - ObJsonBuffer new_stack(allocator_); - uint32_t top_pos; - int32_t update_begin_pos = -1; - bool need_rebuild = false; - bool is_rebuild = false; - if (OB_FAIL(estimate_need_rebuild(new_stack, size_change, 0, top_pos, need_rebuild))) { - LOG_WARN("failed calc new stack.", K(ret), K(stack_len)); - } else if (size_change < 0 || !need_rebuild) { - // no need rebuild, just do update data_size - update_begin_pos = stack_len - 1; - } else { - ObJBNodeMeta old_node, new_node; - stack_at(stack_buf_, top_pos, old_node); - stack_at(new_stack, top_pos, new_node); - if (old_node.ver_type_ == ObJBVerType::J_OBJECT_V0 || old_node.ver_type_ == ObJBVerType::J_ARRAY_V0) { - if (OB_FAIL(result_.reserve(new_node.obj_size_)) || - OB_FAIL(rebuild_with_meta(data, new_node.obj_size_, stack_buf_, new_stack, top_pos, stack_len-1, result_))) { - LOG_WARN("failed to rebuild.", K(ret)); - } else { - curr_.assign_ptr(result_.ptr(), result_.length()); - if (top_pos != 0) { - ObJBNodeMeta upper_node, path_node, old_node; - stack_at(new_stack, top_pos, path_node); - stack_at(new_stack, top_pos - 1, upper_node); - update_offset(upper_node.offset_, upper_node.idx_, path_node.offset_); - update_begin_pos = top_pos - 1; - size_change = path_node.obj_size_ - old_node.obj_size_; - if (OB_FAIL(move_iter(new_stack, 0))) { - reset_iter(); - LOG_WARN("failed to move iter.", K(ret)); - } else { - stack_copy(new_stack, stack_buf_); - } - } else { - is_rebuild = true; - ObJsonBuffer tmp_buf(allocator_); - ObJBNodeMeta root; - stack_at(new_stack, 0, root); - char* data = result_.ptr(); - uint64_t data_length = result_.length() - root.offset_; - if (OB_FAIL(rebuild_with_meta(data, data_length, stack_buf_, new_stack, 0, stack_len - 1, tmp_buf))) { - LOG_WARN("failed to rebuild all obj.", K(ret)); - } else { - result_.reuse(); - if (OB_FAIL(result_.append(tmp_buf.ptr(), tmp_buf.length()))) { - LOG_WARN("failed to copy result.", K(ret)); - } else if (OB_FAIL(move_iter(new_stack, 0))) { - reset_iter(); - LOG_WARN("failed to move iter.", K(ret)); - } else { - stack_copy(new_stack, stack_buf_); - } - } - } - } - } else { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("failed to update obj size.", K(ret)); - } - } - - for (int i = update_begin_pos; OB_SUCC(ret) && !is_rebuild && i >= 0; i--) { - data = result_.ptr(); - ObJBNodeMeta path_node; - stack_at(stack_buf_, i, path_node); - if (path_node.ver_type_ == ObJBVerType::J_OBJECT_V0 || path_node.ver_type_ == ObJBVerType::J_ARRAY_V0) { - ObJsonBinHeader *header = reinterpret_cast(data + path_node.offset_); - if (!is_continous) { // set highest bit for append update, parent is not continuos - header->is_continuous_ = 0; - } - uint64_t obj_size; - uint64_t offset = ObJsonVar::get_var_size(header->count_size_); - if (OB_FAIL(ObJsonVar::read_var(header->used_size_ + offset, header->obj_size_size_, &obj_size))) { - LOG_WARN("failed to read obj size.", K(ret), K(header->obj_size_size_)); - } else { - obj_size += size_change; - if (OB_FAIL(ObJsonVar::set_var(obj_size, header->obj_size_size_, header->used_size_ + offset))) { - LOG_WARN("failed to set new obj size.", K(ret), K(obj_size), K(header->obj_size_size_)); - } - } - } else { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("failed to update obj size.", K(ret)); - } - } - - return ret; -} - - -int ObJsonBin::update_offset(uint64_t parent_offset, uint64_t idx, uint64_t value_offset) -{ - INIT_SUCC(ret); - char* data = result_.ptr() + parent_offset; - ObJBVerType vertype = *reinterpret_cast(data + parent_offset); - - if (vertype == ObJBVerType::J_OBJECT_V0 || vertype == ObJBVerType::J_ARRAY_V0) { - uint64_t offset = 0; - uint8_t var_type, node_type, obj_size_type; - uint64_t count, obj_size; - const ObJsonBinObjHeader *header = reinterpret_cast(data); - // parsing header using v0 format - parse_obj_header(data, offset, node_type, var_type, obj_size_type, count, obj_size); - - uint64_t type_size = ObJsonVar::get_var_size(var_type); - uint64_t key_entry_size = type_size * 2; - uint64_t val_entry_size = (type_size + sizeof(uint8_t)); - - char* value_offset_ptr = data + offset + idx * val_entry_size; - if (vertype == ObJBVerType::J_OBJECT_V0) { - value_offset_ptr += key_entry_size * count; - } - if (ObJsonVar::set_var(value_offset, var_type, value_offset_ptr)) { - LOG_WARN("failed: set var.", K(ret), K(var_type)); - } - } else { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("failed: wrong node vertype.", K(ret), K(vertype)); - } - return ret; -} - -bool ObJsonBin::is_discontinuous() const -{ - ObJsonBinObjHeader *header = reinterpret_cast(data_); - return (header->is_continuous_ == 0); -} - -int ObJsonBin::get_update_val_ptr(ObJsonBin *new_value_bin, char *&val, uint64_t &len, ObJsonBuffer &str) -{ - INIT_SUCC(ret); - ObJsonNodeType new_value_type = new_value_bin->json_type(); - switch (new_value_type) { - case ObJsonNodeType::J_ARRAY: // non-continue array or object do rebuild first - case ObJsonNodeType::J_OBJECT: { - size_t free_space; - if (OB_FAIL(new_value_bin->get_free_space(free_space))) { - LOG_WARN("failed to get free space. ", K(ret)); - break; - } else { - if (free_space > 0 || new_value_bin->is_discontinuous()) { - if (OB_FAIL(new_value_bin->rebuild_at_iter(str))) { - LOG_WARN("rebuild failed", K(ret)); - } else { - val = str.ptr(); - len = str.length(); - } - break; - } - } - } - default: { - val = new_value_bin->curr_.ptr() + new_value_bin->pos_; - len = new_value_bin->get_used_bytes(); - break; - } - } - return ret; -} - int ObJsonBin::update(const ObString &key, ObJsonBin *new_value) { INIT_SUCC(ret); @@ -2890,18 +2546,19 @@ int ObJsonBin::insert(const ObString &key, ObJsonBin *new_value, int64_t pos) INIT_SUCC(ret); ObJsonNodeType cur_node_type = json_type(); ObJBVerType ver_type = get_vertype(); + ObJsonBinUpdateCtx *update_ctx = get_update_ctx(); if (key.empty()) { ret = OB_ERR_JSON_DOCUMENT_NULL_KEY; LOG_WARN("key is NULL", K(ret)); } else if (OB_ISNULL(new_value)) { ret = OB_ERR_NULL_VALUE; LOG_WARN("input json binary is null.", K(ret)); + } else if (OB_ISNULL(update_ctx)) { + ret = OB_ERR_NULL_VALUE; + LOG_WARN("update_ctx is null", K(ret)); } else if (cur_node_type != ObJsonNodeType::J_OBJECT) { ret = OB_OBJ_TYPE_ERROR; LOG_WARN("wrong node_type.", K(ret), K(cur_node_type)); - } else if (!is_alloc_) { - ret = OB_ERR_READ_ONLY; - LOG_WARN("json binary is read only.", K(ret), K(is_alloc_)); } else if (ver_type == ObJBVerType::J_OBJECT_V0) { if (OB_FAIL(insert_v0(pos, key, new_value))) { LOG_WARN("json binary add object failed.", K(ret), K(ver_type)); @@ -2918,15 +2575,16 @@ int ObJsonBin::insert(ObJsonBin *new_value, int64_t pos) INIT_SUCC(ret); ObJsonNodeType cur_node_type = json_type(); ObJBVerType ver_type = this->get_vertype(); + ObJsonBinUpdateCtx *update_ctx = get_update_ctx(); if (OB_ISNULL(new_value)) { ret = OB_ERR_NULL_VALUE; LOG_WARN("input json binary is null.", K(ret)); + } else if (OB_ISNULL(update_ctx)) { + ret = OB_ERR_NULL_VALUE; + LOG_WARN("update_ctx is null", K(ret)); } else if (cur_node_type != ObJsonNodeType::J_ARRAY) { ret = OB_OBJ_TYPE_ERROR; LOG_WARN("wrong node_type.", K(ret), K(cur_node_type)); - } else if (!is_alloc_) { - ret = OB_ERR_READ_ONLY; - LOG_WARN("json binary is read only.", K(ret), K(is_alloc_)); } else if (ver_type == ObJBVerType::J_ARRAY_V0) { ObString key; if (OB_FAIL(insert_v0(pos, key, new_value))) { @@ -2954,18 +2612,19 @@ int ObJsonBin::update(int index, ObJsonBin *new_value) INIT_SUCC(ret); ObJsonNodeType cur_node_type = this->json_type(); ObJBVerType ver_type = this->get_vertype(); + ObJsonBinUpdateCtx *update_ctx = get_update_ctx(); if (OB_ISNULL(new_value)) { ret = OB_ERR_NULL_VALUE; LOG_WARN("input json binary is null.", K(ret)); + } else if (OB_ISNULL(update_ctx)) { + ret = OB_ERR_NULL_VALUE; + LOG_WARN("update_ctx is null", K(ret)); } else if (cur_node_type != ObJsonNodeType::J_ARRAY && cur_node_type != ObJsonNodeType::J_OBJECT) { ret = OB_OBJ_TYPE_ERROR; LOG_WARN("wrong node_type.", K(ret), K(cur_node_type)); - } else if (index >= element_count_) { + } else if (index >= get_element_count()) { ret = OB_OUT_OF_ELEMENT; - LOG_WARN("idx out of range.", K(ret), K(index), K(element_count_)); - } else if (!is_alloc_) { - ret = OB_ERR_READ_ONLY; - LOG_WARN("json binary is read only.", K(ret), K(is_alloc_)); + LOG_WARN("idx out of range.", K(ret), K(index), K(get_element_count())); } else if (ver_type == ObJBVerType::J_ARRAY_V0 || ver_type == ObJBVerType::J_OBJECT_V0) { if (OB_FAIL(update_v0(index, new_value))) { LOG_WARN("json binary update v0 failed.", K(ret), K(ver_type)); @@ -2977,579 +2636,392 @@ int ObJsonBin::update(int index, ObJsonBin *new_value) return ret; } -int ObJsonBin::insert_internal_v0(ObJBNodeMeta& meta, int64_t pos, const ObString &key, ObJsonBin *new_value, ObJsonBuffer& result) +int ObJsonBin::rebuild_child_key( + const int64_t index, + const ObString& child_key, + const int64_t key_offset, + ObJsonBuffer& result) { INIT_SUCC(ret); - int64_t st_pos = result.length(); - uint64_t offset = pos_; - uint8_t node_type, var_type, obj_size_type; - uint64_t count, obj_size; + if (OB_FAIL(set_key_entry(index, key_offset, child_key.length()))) { + LOG_WARN("set_key_entry fail", K(ret), K(child_key)); + } else if (OB_FAIL(result.append(child_key))) { + LOG_WARN("append key fail", K(ret)); + } + return ret; +} + +int ObJsonBin::rebuild_child( + const int64_t index, + const ObJsonBin& child_value, + const int64_t value_offset, + ObJsonBuffer& result) +{ + INIT_SUCC(ret); + uint8_t value_type = 0; + bool is_update_inline = false; + if (OB_FAIL(try_update_inline(index, &child_value, is_update_inline))) { + LOG_WARN("try_update_inline fail", K(ret), K(index)); + } else if (is_update_inline) { + LOG_DEBUG("try_update_inline success", K(index)); + } else if (OB_FALSE_IT(value_type = OB_JSON_TYPE_GET_INLINE(child_value.get_type()))) { + } else if (OB_FAIL(set_value_entry(index, value_offset, value_type))) { + LOG_WARN("set_value_entry fail", K(ret), K(value_offset), K(value_type)); + } else if (OB_FAIL(child_value.rebuild_json_value(result))) { + LOG_WARN("rebuild_json_value fail", K(ret), K(index)); + } + return ret; +} + +// insert new element to current bin will cause rebuilding current bin +int ObJsonBin::rebuild_with_new_insert_value(int64_t index, const ObString &new_key, ObJsonBin *new_value, ObStringBuffer &result) const +{ + INIT_SUCC(ret); + int64_t start_pos = result.length(); bool is_obj_type = json_type() == ObJsonNodeType::J_OBJECT; + ObJsonBinMeta meta; + ObJsonBin dst_bin; + uint64_t element_count = get_element_count(); - char* data = result_.ptr(); - const ObJsonBinHeader *header = reinterpret_cast(data + offset); - // parsing header using v0 format - parse_obj_header(data, offset, node_type, var_type, obj_size_type, count, obj_size); - - uint64_t length = obj_size; - ObJsonBinHeader new_header; - new_header.entry_size_ = meta.entry_type_; - new_header.obj_size_size_ = meta.size_type_; - new_header.is_continuous_ = 1; - new_header.count_size_ = ObJsonVar::get_var_type(count + 1); - new_header.type_ = meta.ver_type_; - uint64_t new_count_size = ObJsonVar::get_var_size(new_header.count_size_); - uint64_t new_type_size = ObJsonVar::get_var_size(new_header.entry_size_); - uint64_t new_key_entry_size = new_type_size * 2; - uint64_t new_val_entry_size = new_type_size + sizeof(uint8_t); - uint64_t reserve_entry_size = (count + 1) * new_val_entry_size; - - if (is_obj_type) { - reserve_entry_size += (count + 1) * new_key_entry_size; - } - - // rebuild using meta format - // copy obj header, key entry, val entry, key(if need) - uint64_t type_size = ObJsonVar::get_var_size(var_type); - uint64_t key_entry_size = type_size * 2; - uint64_t val_entry_size = (type_size + sizeof(uint8_t)); - uint64_t meta_len = (offset - pos_) + val_entry_size * count; - uint64_t old_val_entry_offset = offset; - - if (is_obj_type) { - old_val_entry_offset += key_entry_size * count; - meta_len += key_entry_size * count; - } - - uint64_t new_val_entry_offset; - if (OB_FAIL(result.reserve(meta.obj_size_))) { - LOG_WARN("failed to reserve mem", K(ret), K(meta.obj_size_)); - } else if (OB_FAIL(result.append(reinterpret_cast(&new_header), OB_JSON_BIN_HEADER_LEN))) { - LOG_WARN("failed to append header", K(ret)); - } else if (OB_FAIL(ObJsonVar::append_var(count + 1, new_header.count_size_, result))) { - LOG_WARN("failed to append count", K(ret)); - } else if (OB_FAIL(ObJsonVar::append_var(meta.obj_size_, new_header.obj_size_size_, result))) { - LOG_WARN("failed to append obj size", K(ret)); - } else if (is_obj_type) { - if (length < meta_len) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("data length is not enough for obj.", K(ret), K(length), K(meta_len)); - } else { - // reserve key entry array - uint64_t new_key_entry_offset = result.length() - st_pos; - new_val_entry_offset = new_key_entry_offset + (count + 1) * new_key_entry_size; - result.set_length(result.length() + reserve_entry_size); - - // using latest bin format - uint64_t key_offset, key_len; - const char *key_entry = (result_.ptr() + offset); - bool is_inserted = false; - for (int i = 0; OB_SUCC(ret) && i <= count; i++) { - uint64_t new_key_offset = result.length() - st_pos; - char* new_key_entry = result.ptr() + st_pos + new_key_entry_offset; - if ((i == pos || i == count) && !is_inserted) { - if (OB_FAIL(ObJsonVar::set_var(new_key_offset, meta.entry_type_, new_key_entry))) { - LOG_WARN("failed to set key len.", K(ret)); - } else if (OB_FAIL(ObJsonVar::set_var(key.length(), meta.entry_type_, new_key_entry + new_type_size))) { - LOG_WARN("failed to set key len.", K(ret)); - } else if (OB_FAIL(result.append(key.ptr(), key.length()))) { - LOG_WARN("failed to apend key.", K(ret)); - } - is_inserted = true; - } else if (OB_FAIL(ObJsonVar::read_var(key_entry, var_type, &key_offset))) { - LOG_WARN("failed to read key offset.", K(ret)); - } else if (OB_FAIL(ObJsonVar::read_var(key_entry + type_size, var_type, &key_len))) { - LOG_WARN("failed to read key len.", K(ret)); - } else if (OB_FAIL(ObJsonVar::set_var(new_key_offset, meta.entry_type_, new_key_entry))) { - LOG_WARN("failed to set key len.", K(ret)); - } else if (OB_FAIL(ObJsonVar::set_var(key_len, meta.entry_type_, new_key_entry + new_type_size))) { - LOG_WARN("failed to set key len.", K(ret)); - } else if (OB_FAIL(result.append(data + key_offset, key_len))) { - LOG_WARN("failed to apend key.", K(ret)); - } else { - key_entry += key_entry_size; - } - - new_key_entry_offset += new_key_entry_size; - } - } + if (OB_FAIL(calc_size_with_insert_new_value(new_key, new_value, meta))) { + LOG_WARN("calc size fail", K(ret)); + } else if (OB_FAIL(meta.to_header(result))) { + LOG_WARN("to obj header fail", K(ret)); + } else if (OB_FAIL(dst_bin.reset(result.string(), 0, nullptr))) { + LOG_WARN("reset bin fail", K(ret), K(meta)); } else { - new_val_entry_offset = result.length() - st_pos; - result.set_length(result.length() + reserve_entry_size); + index = ((index == OB_JSON_INSERT_LAST || index > element_count) ? element_count : index); } - - // process value - char* new_val_entry; - char* old_val_entry; - bool is_inserted = false; - for (uint64_t i = 0; OB_SUCC(ret) && i <= count; ++i) { - new_val_entry = result.ptr() + st_pos + new_val_entry_offset; - old_val_entry = result_.ptr() + old_val_entry_offset; - uint64_t new_val_offset = result.length() - st_pos; - uint64_t val_offset, val_type; - uint8_t v_type = static_cast(JBLS_UINT8); - if ((i == pos || i == count) && !is_inserted) { - is_inserted = true; - int64_t tmp_new_val_entry_offset = st_pos + new_val_entry_offset; - if (!try_update_inline(new_value, meta.entry_type_, &tmp_new_val_entry_offset, result)) { - ObJsonBuffer str(allocator_); - char *new_val_ptr = NULL; - uint64_t new_val_length = 0; - if (OB_FAIL(get_update_val_ptr(new_value, new_val_ptr, new_val_length, str))) { - LOG_WARN("failed to get update val ptr", K(ret)); - } else if (OB_FAIL(result.append(new_val_ptr, new_val_length))) { - LOG_WARN("failed to append new value. ", K(ret)); - } else if (OB_FAIL(ObJsonVar::set_var(new_val_offset, meta.entry_type_, new_val_entry))) { - LOG_WARN("failed to set val offset", K(ret)); - } else if (OB_FAIL(ObJsonVar::set_var(new_value->get_vertype(), v_type, new_val_entry + new_type_size))) { - LOG_WARN("failed to set val type", K(ret)); - } - } - } else if (OB_FAIL(ObJsonVar::read_var(old_val_entry, var_type, &val_offset))) { - LOG_WARN("failed to read val offset", K(ret)); - } else if (OB_FAIL(ObJsonVar::read_var(old_val_entry + type_size, v_type, &val_type))) { - LOG_WARN("failed to read val type", K(ret)); - } else if (OB_FAIL(ObJsonVar::set_var(val_offset, meta.entry_type_, new_val_entry))) { - LOG_WARN("failed to set val offset", K(ret)); - } else if (OB_FAIL(ObJsonVar::set_var(val_type, v_type, new_val_entry + new_type_size))) { - LOG_WARN("failed to set val type", K(ret)); - } else { - uint8_t type = static_cast(val_type); - if (!OB_JSON_TYPE_IS_INLINE(type)) { - if (OB_SUCC(rebuild_json_value(result_.ptr() + val_offset + pos_, length - val_offset, type, type, val_offset, result))) { - // fill value offset - new_val_entry = result.ptr() + st_pos + new_val_entry_offset; - if (OB_FAIL(ObJsonVar::set_var(new_val_offset, meta.entry_type_, new_val_entry))) { - LOG_WARN("failed to set val offset.", K(ret), K(i), K(new_val_offset), K(var_type)); - } - } else { - LOG_WARN("rebuild child node failed.", K(ret), K(i), K(val_type)); - } - } - old_val_entry_offset += val_entry_size; + // key entry + for (int i = 0; OB_SUCC(ret) && is_obj_type && i < index; i++) { + ObString src_key; + uint64_t key_offset = result.length() - start_pos; + if (OB_FAIL(get_key(i, src_key))) { + LOG_WARN("get_key from src_bin fail", K(ret), K(i)); + } else if (OB_FAIL(dst_bin.rebuild_child_key(i, src_key, key_offset, result))) { + LOG_WARN("set_key fail", K(ret), K(src_key)); + } else if (OB_FALSE_IT(dst_bin.set_current(result.string(), 0))) { + } + } + if (OB_SUCC(ret) && is_obj_type) { + uint64_t key_offset = result.length() - start_pos; + if (OB_FAIL(dst_bin.rebuild_child_key(index, new_key, key_offset, result))) { + LOG_WARN("set_key fail", K(ret), K(new_key)); + } else if (OB_FALSE_IT(dst_bin.set_current(result.string(), 0))) { + } + } + for (int i = index; OB_SUCC(ret) && is_obj_type && i < element_count; i++) { + ObString src_key; + uint64_t key_offset = result.length() - start_pos; + if (OB_FAIL(get_key(i, src_key))) { + LOG_WARN("get_key from src_bin fail", K(ret), K(i)); + } else if (OB_FAIL(dst_bin.rebuild_child_key(i + 1, src_key, key_offset, result))) { + LOG_WARN("set_key fail", K(ret), K(src_key)); + } else if (OB_FALSE_IT(dst_bin.set_current(result.string(), 0))) { + } + } + // value entry + ObJsonBin child_value; + for (int i = 0; OB_SUCC(ret) && i < index; i++) { + uint64_t value_offset = result.length() - start_pos; + if (OB_FAIL(get_value(i, child_value))) { + LOG_WARN("get child value fail", K(ret), K(i)); + } else if (OB_FAIL(dst_bin.rebuild_child(i, child_value, value_offset, result))) { + LOG_WARN("rebuild_child fail", K(ret), K(i)); + } else if (OB_FALSE_IT(dst_bin.set_current(result.string(), 0))) { } - - new_val_entry_offset += new_val_entry_size; } - if (OB_SUCC(ret)) { - char* obj_size_ptr = result.ptr() + st_pos + OB_JSON_BIN_HEADER_LEN + new_count_size; - uint64_t actual_obj_size = result.length() - st_pos; - if (OB_FAIL(ObJsonVar::set_var(actual_obj_size, meta.size_type_, obj_size_ptr))) { - LOG_WARN("rebuild failed set obj size.", K(ret), K(actual_obj_size)); - } else { - meta.obj_size_ = actual_obj_size; - meta.offset_ = st_pos; + uint64_t value_offset = result.length() - start_pos; + if (OB_FAIL(dst_bin.rebuild_child(index, *new_value, value_offset, result))) { + LOG_WARN("try_update_inline fail", K(ret), K(index)); + } else if (OB_FALSE_IT(dst_bin.set_current(result.string(), 0))) { } } + for (int i = index; OB_SUCC(ret) && i < element_count; i++) { + uint64_t value_offset = result.length() - start_pos; + if (OB_FAIL(get_value(i, child_value))) { + LOG_WARN("get child value fail", K(ret), K(i)); + } else if (OB_FAIL(dst_bin.rebuild_child(i + 1, child_value, value_offset, result))) { + LOG_WARN("try_update_inline fail", K(ret), K(i)); + } else if (OB_FALSE_IT(dst_bin.set_current(result.string(), 0))) { + } + } + if (OB_FAIL(ret)) { + } else if (dst_bin.obj_size() < (result.length() - start_pos)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("obj_size incorrect", K(ret), "bin", dst_bin.obj_size(), "buf", result.length(), K(start_pos)); + } else if (dst_bin.obj_size() == (result.length() - start_pos)) { // if equal, just skip + } else if (OB_FAIL(dst_bin.set_obj_size(result.length() - start_pos))) { + LOG_WARN("set obj_size fail", K(ret), "bin", dst_bin.obj_size(), "buf", result.length(), K(start_pos)); + } return ret; } -int ObJsonBin::insert_v0(int64_t pos, const ObString &key, ObJsonBin *new_value) +int ObJsonBin::calc_size_with_insert_new_value(const ObString &new_key, const ObJsonBin *new_value, ObJsonBinMeta &new_meta) const { INIT_SUCC(ret); - bool is_exist = false; bool is_obj_type = json_type() == ObJsonNodeType::J_OBJECT; + uint64_t new_serial_size = 0; + uint64_t old_serial_size = 0; + uint64_t new_value_serial_size = 0; + uint64_t old_element_count = this->element_count(); + uint64_t new_element_count = old_element_count + 1; + uint8_t old_count_var_type = element_count_var_type(); + uint8_t new_count_var_type = ObJsonVar::get_var_type(new_element_count); + uint8_t old_entry_var_type = entry_var_type(); - if (is_obj_type) { - size_t idx; - if (OB_SUCC(lookup_index(key, &idx))) { - if (OB_FAIL(update_v0(idx, new_value))) { - LOG_WARN("failed: key exist, do update failed.", K(idx), K(key)); - } else { - is_exist = true; - } - } else if (ret == OB_SEARCH_NOT_FOUND) { - ret = OB_SUCCESS; + if (OB_FAIL(get_serialize_size(old_serial_size))) { + LOG_WARN("get_serialize_size fail", K(ret)); + } else if (OB_FAIL(new_value->get_serialize_size(new_value_serial_size))) { + LOG_WARN("get_serialize_size for new_value fail", K(ret)); + } else { + // plus value_size and value_entry size + new_serial_size = old_serial_size + new_value_serial_size + (ObJsonVar::get_var_size(old_entry_var_type) + OB_JSON_BIN_VALUE_TYPE_LEN); + // plus key size and key_entry size + new_serial_size += (is_obj_type ? (new_key.length() + (ObJsonVar::get_var_size(old_entry_var_type) * 2)) : 0); + // plus count size if changed + new_serial_size += (new_count_var_type > old_count_var_type ? (ObJsonVar::get_var_size(new_count_var_type) - ObJsonVar::get_var_size(old_count_var_type)) : 0); + + // may change value entry var type and obj_size_var_type + // entry_var_type calc from obj_size + // so entry_var_type generaly same as obj_size_var_type + uint8_t new_entry_var_type = ObJsonVar::get_var_type(new_serial_size); + uint8_t new_obj_size_type = new_entry_var_type; + + if (OB_FAIL(extend_entry_var_type(is_obj_type, new_element_count, new_serial_size, old_entry_var_type, new_entry_var_type, new_serial_size))) { + LOG_WARN("extend_entry_var_type fail", K(ret), K(is_obj_type), K(new_element_count), K(new_serial_size), K(old_entry_var_type), K(new_entry_var_type)); } else { - LOG_WARN("failed: lookup key internel error.", K(ret), K(key)); - } - } + new_obj_size_type = new_entry_var_type; - if (OB_SUCC(ret) && !is_exist) { - // 1. seek index value offset and value type - uint64_t offset = pos_; - char *data = result_.ptr(); - uint8_t node_type, var_type, obj_size_type, old_entry_type; - uint64_t count, obj_size; - parse_obj_header(data, offset, node_type, var_type, obj_size_type, count, obj_size); - uint64_t type_size = ObJsonVar::get_var_size(var_type); - uint64_t key_entry_size = type_size * 2; - uint64_t val_entry_size = (type_size + sizeof(uint8_t)); - - uint64_t new_obj_size = obj_size + new_value->get_used_bytes(); - uint8_t new_count_type = ObJsonVar::get_var_type(count + 1); - uint8_t new_entry_type = ObJsonVar::get_var_type(new_obj_size); - old_entry_type = var_type; - new_obj_size += (ObJsonVar::get_var_size(ObJsonVar::get_var_type(count + 1)) - - ObJsonVar::get_var_size(ObJsonVar::get_var_type(count))); - new_obj_size += (count + 1) * (ObJsonVar::get_var_size(new_entry_type) - ObJsonVar::get_var_size(old_entry_type)); - if (is_obj_type) { - new_obj_size += 2 * (count + 1) * (ObJsonVar::get_var_size(new_entry_type) - ObJsonVar::get_var_size(old_entry_type)); - } - - // cause entry type are relevent - uint8_t old_obj_size_type = ObJsonVar::get_var_type(obj_size); - uint8_t new_obj_size_type = ObJsonVar::get_var_type(new_obj_size); - static const int64_t OB_JSONBIN_CONVERGEN_TIME = 3; - for (int i = 0; i < OB_JSONBIN_CONVERGEN_TIME && new_obj_size_type >old_obj_size_type; ++i) { - old_entry_type = new_entry_type; - new_entry_type = ObJsonVar::get_var_type(new_obj_size); - new_obj_size += (count + 1) * (ObJsonVar::get_var_size(new_entry_type) - ObJsonVar::get_var_size(old_entry_type)); - if (is_obj_type) { - new_obj_size += 2 * (count + 1) * (ObJsonVar::get_var_size(new_entry_type) - ObJsonVar::get_var_size(old_entry_type)); - } - old_obj_size_type = new_obj_size_type; - new_obj_size_type = ObJsonVar::get_var_type(new_obj_size); - new_entry_type = new_obj_size_type; - } - - int32_t stk_len = stack_size(stack_buf_); - ObJsonBuffer nw_stack(allocator_); - uint32_t top_pos = stk_len; - bool rebuild_all = false; - ObJBNodeMeta node; - if (stk_len > 0) { - bool rebuild = false; - if (OB_FAIL(estimate_need_rebuild(nw_stack, new_obj_size - obj_size, 0, top_pos, rebuild))) { - LOG_WARN("failed: calc update objsize rebuild meta.", K(ret), K(stk_len)); - } else if (!rebuild && OB_FAIL(estimate_need_rebuild_kv_entry(result_, stack_buf_, nw_stack, top_pos, rebuild))) { - LOG_WARN("failed: calc update entry type rebuild meta.", K(ret), K(stk_len)); - } else if (rebuild) { - if (top_pos == 0) { - rebuild_all = true; - } else { - stack_at(stack_buf_, top_pos, node); - // node.obj-size + new_obj_size - obj_size => new object size - // node.obj-size + new_obj_size - obj_size + new_object_size => the total hole size - rebuild_all = (node.obj_size_ + new_obj_size - obj_size + new_obj_size) + - result_.length() > result_.length() * OB_JSON_BIN_REBUILD_THRESHOLD; - } - } - } else { - rebuild_all = true; - } - - uint64_t st_pos = result_.length(); - node.entry_type_ = new_entry_type; - node.size_type_ = new_obj_size_type; - node.ver_type_ = get_vertype(); - node.obj_size_ = new_obj_size; - if (OB_FAIL(insert_internal_v0(node, pos, key, new_value, result_))) { - LOG_WARN("failed: insert value with meta.", K(ret), K(pos)); - } else if (stk_len == 0) { - char* new_start = result_.ptr() + node.offset_; - MEMMOVE(result_.ptr(), new_start, node.obj_size_); - result_.set_length(node.obj_size_); - element_count_++; - bytes_ = node.obj_size_; - } else if (OB_FAIL(stack_push(nw_stack, node)) || OB_FAIL(stack_push(stack_buf_, node))) { - LOG_WARN("failed: stack push.", K(ret), K(pos)); - } else if (rebuild_all) { - ObJsonBuffer tmp_buf(allocator_); - if (OB_FAIL(rebuild_with_meta(result_.ptr(), result_.length(), stack_buf_, nw_stack, top_pos, stk_len, tmp_buf))) { - LOG_WARN("failed: rebuild with meta.", K(ret)); - } else if (OB_FAIL(result_.set_length(0)) || OB_FAIL(result_.append(tmp_buf.ptr(), tmp_buf.length()))) { - LOG_WARN("failed: copy new data.", K(ret)); - } else if (OB_FAIL(move_iter(nw_stack, 0))) { - LOG_WARN("failed: move iter.", K(ret)); - } else if (OB_FAIL(stack_copy(nw_stack, stack_buf_))) { - LOG_WARN("failed: stack copy.", K(ret)); - } else if (OB_FAIL(move_parent_iter())) { - LOG_WARN("failed: move iter.", K(ret)); - } - } else { - stack_at(nw_stack, top_pos, node); - if (OB_FAIL(result_.reserve(node.obj_size_))) { - LOG_WARN("failed: reserve mem.", K(ret)); - } else if (OB_FAIL(rebuild_with_meta(result_.ptr(), result_.length(), stack_buf_, nw_stack, top_pos, stk_len, result_))) { - LOG_WARN("failed: rebuild with meta.", K(ret)); - } else if (OB_FAIL(move_iter(nw_stack, top_pos))) { - LOG_WARN("failed: move iter.", K(ret)); - } else if (OB_FAIL(stack_copy(nw_stack, stack_buf_))) { - LOG_WARN("failed: stack copy.", K(ret)); - } else if (OB_FAIL(move_parent_iter())) { - LOG_WARN("failed: move iter.", K(ret)); - } else { - ObJBNodeMeta upper_node, path_node; - stack_at(nw_stack, top_pos -1, upper_node); - stack_at(nw_stack, top_pos, path_node); - if (OB_FAIL(update_offset(upper_node.offset_, upper_node.idx_, path_node.offset_))) { - LOG_WARN("failed: update offset.", K(ret)); - } - } - } - - if (OB_SUCC(ret)) { - curr_.assign_ptr(result_.ptr(), result_.length()); + new_meta.set_type(get_vertype(), false); + new_meta.set_element_count(new_element_count); + new_meta.set_element_count_var_type(new_count_var_type); + new_meta.set_obj_size(new_serial_size); + new_meta.set_obj_size_var_type(new_obj_size_type); + new_meta.set_entry_var_type(new_entry_var_type); + new_meta.set_is_continuous(true); + new_meta.calc_entry_array(); } } return ret; } -int ObJsonBin::update_v0(int index, ObJsonBin *new_value) +// element count not change, just replace with new value at some position +int ObJsonBin::calc_size_with_new_value(const ObJsonBin *old_value, const ObJsonBin *new_value, ObJsonBinMeta &new_meta) const { INIT_SUCC(ret); - ObJsonNodeType cur_node_type = this->json_type(); - ObJBVerType ver_type = this->get_vertype(); + bool is_obj_type = json_type() == ObJsonNodeType::J_OBJECT; + uint64_t new_serial_size = 0; + uint64_t old_serial_size = 0; + uint64_t old_value_serial_size = 0; + uint64_t new_value_serial_size = 0; + uint8_t old_entry_var_type = entry_var_type(); - // 1. seek index value offset and value type - uint64_t offset = pos_; - char *data = result_.ptr(); - uint8_t node_type, var_type, obj_size_type; - uint64_t count, obj_size; - parse_obj_header(data, offset, node_type, var_type, obj_size_type, count, obj_size); - uint64_t type_size = ObJsonVar::get_var_size(var_type); - uint64_t key_entry_size = type_size * 2; - uint64_t val_entry_size = (type_size + sizeof(uint8_t)); - if (OB_JSON_TYPE_GET_INLINE(node_type) == static_cast(ObJsonNodeType::J_OBJECT)) { - offset += key_entry_size * count; // if type is object, need to move over key entry size - } - int64_t val_entry_offset = offset + val_entry_size * index; - bool need_update_parent = false; - // 2. first try inline new_value - if (!try_update_inline(new_value, var_type, &val_entry_offset, result_)) { - // 3. check curr is inlined - char *val_offset = (data + val_entry_offset); - uint8_t *val_type = reinterpret_cast(val_offset + type_size); - uint64_t val_type_offset = val_entry_offset + type_size; - bool is_inlined = OB_JSON_TYPE_IS_INLINE(*val_type); - // 4. check curr bytes and new_value bytes - if (OB_FAIL(this->element(index))) { - LOG_WARN("move iter to child failed.", K(ret), K(index)); + if (OB_FAIL(get_serialize_size(old_serial_size))) { + LOG_WARN("get_serialize_size fail", K(ret)); + } else if (! old_value->is_inline_vertype() && OB_FAIL(old_value->get_serialize_size(old_value_serial_size))) { + LOG_WARN("get_serialize_size for new_value fail", K(ret)); + } else if (OB_FAIL(new_value->get_serialize_size(new_value_serial_size))) { + LOG_WARN("get_serialize_size for new_value fail", K(ret)); + } else { + // plus value_size and value_entry size + new_serial_size = old_serial_size - old_value_serial_size + new_value_serial_size; + + // may change value entry var type and obj_size_var_type + // entry_var_type calc from obj_size + // so entry_var_type generaly same as obj_size_var_type + uint8_t new_entry_var_type = ObJsonVar::get_var_type(new_serial_size); + uint8_t new_obj_size_type = new_entry_var_type; + + if (OB_FAIL(extend_entry_var_type(is_obj_type, this->element_count(), new_serial_size, old_entry_var_type, new_entry_var_type, new_serial_size))) { + LOG_WARN("extend_entry_var_type fail", K(ret), K(meta_), K(is_obj_type), K(this->element_count()), K(new_serial_size), K(old_entry_var_type), K(new_entry_var_type)); } else { - ObJsonBin *new_value_bin = new_value; - // local is not discontinuous, can not do inplace update - bool can_do_inplace = true; - if (this->json_type() == ObJsonNodeType::J_ARRAY || this->json_type() == ObJsonNodeType::J_OBJECT) { - can_do_inplace = (this->is_discontinuous() == false); - } - int64_t bytes_changed = is_inlined ? new_value_bin->get_used_bytes() : - (new_value_bin->get_used_bytes() - this->get_used_bytes()); - if (bytes_changed <= 0 && can_do_inplace) { - // 5. do inplace update - ObJsonBuffer str(allocator_); - char *new_val_ptr = NULL; - uint64_t new_val_length = 0; - if (OB_FAIL(get_update_val_ptr(new_value_bin, new_val_ptr, new_val_length, str))) { - LOG_WARN("failed to get update val ptr", K(ret)); - } else { - char *val_ptr = data + pos_; - MEMCPY(val_ptr, new_val_ptr, new_val_length); - *val_type = static_cast(new_value_bin->json_type()); - } - str.reset(); - } else { - // new value bytes are bigger then curr bytes, do append - // 5. do append update - uint64_t new_val_offset = result_.length(); - ObJBNodeMeta path_node; - stack_back(stack_buf_, path_node); - uint8_t new_var_type = ObJsonVar::get_var_type(new_val_offset - path_node.offset_); - if (new_var_type > var_type) { - ObJsonBuffer nw_stack(allocator_); - uint32_t top_pos = stack_size(stack_buf_) - 1; - bool rebuild = false; - if (OB_FAIL(estimate_need_rebuild_kv_entry(result_, stack_buf_, nw_stack, top_pos, rebuild))) { - LOG_WARN("failed to estimate. ", K(ret)); - } else { - stack_at(nw_stack, top_pos, path_node); - char* rebuild_data = result_.ptr() + path_node.offset_; - if (OB_FAIL(result_.reserve(path_node.obj_size_)) || - OB_FAIL(rebuild_with_meta(rebuild_data, result_.length() - path_node.offset_, stack_buf_, nw_stack, top_pos, - stack_size(nw_stack) - 1, result_))) { - LOG_WARN("failed to rebuild with meta.", K(ret), K(top_pos)); - } else { - ObJBNodeMeta new_top_meta; - stack_at(nw_stack, top_pos, new_top_meta); - stack_at(stack_buf_, top_pos, path_node); + new_obj_size_type = new_entry_var_type; - int64_t meta_change = new_top_meta.obj_size_ - path_node.obj_size_; - if (top_pos != 0) { - ObJBNodeMeta upper_node, path_node; - stack_at(nw_stack, top_pos -1, upper_node); - stack_at(nw_stack, top_pos, path_node); - if (OB_FAIL(update_offset(upper_node.offset_, upper_node.idx_, path_node.offset_))) { - LOG_WARN("failed update offset.", K(ret), K(path_node.offset_), K(upper_node.offset_)); - } else { - stack_update(nw_stack, top_pos - 1, upper_node); - while (top_pos >= stack_size(stack_buf_) - 1) { - stack_back(stack_buf_, path_node, true); - } - - if (OB_FAIL(update_parents(meta_change, false))) { - LOG_WARN("failed update parent.", K(ret), K(meta_change)); - } else { - curr_.assign_ptr(result_.ptr(), result_.length()); - stack_at(stack_buf_, 0, path_node); - stack_update(nw_stack, 0, path_node); - if (OB_FAIL(move_iter(nw_stack))) { - LOG_WARN("failed to locate new pos.", K(ret)); - } else { - stack_copy(nw_stack, stack_buf_); - } - } - } - } else { - ObJsonBuffer tmp_buf(allocator_); - ObJBNodeMeta root; - stack_at(nw_stack, 0, root); - char* data = result_.ptr(); - uint64_t data_length = result_.length() - root.offset_; - if (OB_FAIL(rebuild_with_meta(data, data_length, stack_buf_, nw_stack, 0, stack_size(nw_stack) - 1, tmp_buf))) { - LOG_WARN("failed to rebuild all obj.", K(ret)); - } else { - result_.reuse(); - if (OB_FAIL(result_.append(tmp_buf.ptr(), tmp_buf.length()))) { - LOG_WARN("failed to copy result.", K(ret)); - } else if (OB_FAIL(move_iter(nw_stack, 0))) { - reset_iter(); - LOG_WARN("failed to move iter.", K(ret)); - } else { - stack_copy(nw_stack, stack_buf_); - } - } - } - } - } - } - data = result_.ptr(); - new_val_offset = result_.length(); - if (OB_SUCC(ret)) { - ObJsonBuffer str(allocator_); - char *new_val_ptr = NULL; - uint64_t new_val_length = 0; - if (OB_FAIL(get_update_val_ptr(new_value_bin, new_val_ptr, new_val_length, str))) { - LOG_WARN("failed to get update val ptr", K(ret)); - } else { - char *val_ptr = data + pos_; - // do append for simple type - if (OB_FAIL(result_.append(new_val_ptr, new_val_length))) { - LOG_WARN("failed to append new value. ", K(ret)); - } else { // after result_ append, may do realloc, should refresh curr_ - curr_.assign_ptr(result_.ptr(), result_.length()); - } - } - str.reset(); - } - // update val_offset and val_type - if (OB_SUCC(ret)) { - ObJBNodeMeta path_node; - stack_back(stack_buf_, path_node); - int64_t parent_pos = path_node.offset_; - data = result_.ptr(); - - type_size = ObJsonVar::get_var_size(path_node.entry_type_); - key_entry_size = type_size * 2; - val_entry_size = (type_size + sizeof(uint8_t)); - uint64_t count_size = ObJsonVar::get_var_size(ObJsonVar::get_var_type(count)); - uint64_t obj_size_size = ObJsonVar::get_var_size(path_node.size_type_); - - val_offset = data + path_node.offset_ + OB_JSON_BIN_HEADER_LEN + count_size + obj_size_size; - val_offset += val_entry_size * index; - - ObJBVerType vertype = *reinterpret_cast(data + parent_pos); - if (ObJsonVerType::get_json_type(vertype) == ObJsonNodeType::J_OBJECT) { - val_offset += key_entry_size * count; - } - if (OB_FAIL(ObJsonVar::set_var(new_val_offset - parent_pos, path_node.entry_type_, val_offset))) { - LOG_WARN("failed to set new value offset.", K(ret)); - } else { - uint8_t *val_type_ptr = reinterpret_cast(val_offset) + ObJsonVar::get_var_size(path_node.entry_type_); - *val_type_ptr = static_cast(new_value_bin->json_type()); - } - } - } - // update parent obj size - if (OB_SUCC(ret)) { - update_parents(bytes_changed, false); - if (OB_FAIL(this->move_parent_iter())) { // move curr iter back to parent - LOG_WARN("failed to move iter back to parent.", K(ret)); - } - } + new_meta.set_type(get_vertype(), false); + new_meta.set_element_count(this->element_count()); + new_meta.set_element_count_var_type(element_count_var_type()); + new_meta.set_obj_size(new_serial_size); + new_meta.set_obj_size_var_type(new_obj_size_type); + new_meta.set_entry_var_type(new_entry_var_type); + new_meta.set_is_continuous(true); + new_meta.calc_entry_array(); } } return ret; } + +// current node value entry size not enough store new_value offset, so need rebuild current node. +// if current node's parent value entry size not enough store, rebuild parent too. +// for simple, rebuild one by one +int ObJsonBin::rebuild_with_new_value(int64_t index, ObJsonBin *new_value, ObStringBuffer &result) const +{ + INIT_SUCC(ret); + int64_t start_pos = result.length(); + bool is_obj_type = json_type() == ObJsonNodeType::J_OBJECT; + uint64_t element_count = get_element_count(); + ObJsonBinMeta meta; + ObJsonBin dst_bin; + ObJsonBin child_value; + if (OB_FAIL(get_value(index, child_value))) { + LOG_WARN("get child value fail", K(ret), K(index)); + } else if (OB_FAIL(calc_size_with_new_value(&child_value, new_value, meta))) { + LOG_WARN("calc size fail", K(ret)); + } else if (OB_FAIL(meta.to_header(result))) { + LOG_WARN("to obj header fail", K(ret)); + } else if (OB_FAIL(dst_bin.reset(result.string(), 0, nullptr))) { + LOG_WARN("reset bin fail", K(ret), K(meta)); + } else { + index = ((index == OB_JSON_INSERT_LAST || index > element_count) ? element_count : index); + } + // key entry + for (int i = 0; OB_SUCC(ret) && is_obj_type && i < element_count; i++) { + ObString src_key; + uint64_t key_offset = result.length() - start_pos; + if (OB_FAIL(get_key(i, src_key))) { + LOG_WARN("get_key from src_bin fail", K(ret), K(i)); + } else if (OB_FAIL(dst_bin.rebuild_child_key(i, src_key, key_offset, result))) { + LOG_WARN("set_key fail", K(ret), K(src_key)); + } else if (OB_FALSE_IT(dst_bin.set_current(result.string(), 0))) { + } + } + // value entry + for (int i = 0; OB_SUCC(ret) && i < element_count; i++) { + uint64_t value_offset = result.length() - start_pos; + uint8_t value_type = 0; + // bool is_update_inline = false; + if (i == index) { + if (OB_FAIL(dst_bin.rebuild_child(index, *new_value, value_offset, result))) { + LOG_WARN("rebuild_child fail", K(ret), K(index)); + } else if (OB_FALSE_IT(dst_bin.set_current(result.string(), 0))) { + } + } else if (OB_FAIL(get_value(i, child_value))) { + LOG_WARN("get child value fail", K(ret), K(i)); + } else if (OB_FAIL(dst_bin.rebuild_child(i, child_value, value_offset, result))) { + LOG_WARN("rebuild_child fail", K(ret), K(i)); + } else if (OB_FALSE_IT(dst_bin.set_current(result.string(), 0))) { + } + } + if (OB_FAIL(ret)) { + } else if (dst_bin.obj_size() < (result.length() - start_pos)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("obj_size incorrect", K(ret), "bin", dst_bin.obj_size(), "buf", result.length(), K(start_pos)); + } else if (dst_bin.obj_size() == (result.length() - start_pos)) { // if equal, just skip + } else if (OB_FAIL(dst_bin.set_obj_size(result.length() - start_pos))) { + LOG_WARN("set obj_size fail", K(ret), "bin", dst_bin.obj_size(), "buf", result.length(), K(start_pos)); + } + return ret; +} + +int ObJsonBin::reset_root(const ObString &data) +{ + INIT_SUCC(ret); + ObString header_data; + ObJsonBinUpdateCtx *update_ctx = get_update_ctx(); + if (OB_ISNULL(update_ctx)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("update_ctx is null", K(ret), K(pos_), K(meta_)); + } else if (node_stack_.size() > 0) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("current node is not root", K(ret), K(pos_), K(meta_)); + } else if (OB_FAIL(cursor_->set(pos_, data))) { + LOG_WARN("set_data fail", K(ret), K(pos_)); + } else if (OB_FAIL(cursor_->get_for_write(0, sizeof(ObJsonBinDocHeader), header_data))) { + LOG_WARN("get header data fail", K(ret), K(pos_)); + } else if (OB_FAIL(set_doc_header_v0(header_data, cursor_->get_length()))) { + LOG_WARN("set_doc_header_v0 fail", K(ret), K(pos_)); + } else if (OB_FAIL(reset_iter())) { + LOG_WARN("reset_iter fail", K(ret), K(pos_)); + } else { + update_ctx->is_rebuild_all_ = true; + } + return ret; +} + /* remove child node at index, execute when iter locates at parent node */ -int ObJsonBin::remove_v0(size_t index) +int ObJsonBin::object_remove_v0(size_t index) { INIT_SUCC(ret); - ObJsonNodeType node_type = this->json_type(); - ObJBVerType ver_type = this->get_vertype(); - // 1. move into element index, get used bytes - uint64_t used_bytes; - if (OB_FAIL(this->element(index))) { - LOG_WARN("failed to get element ", K(index), K(ret)); + uint64_t old_elem_count = element_count(); + uint64_t new_elem_count = old_elem_count - 1; + ObJBVerType vertype = this->get_vertype(); + ObString header_data; + int64_t header_len = get_value_entry_offset(this->element_count()); + if (OB_FAIL(record_remove_offset(index))) { + LOG_WARN("record_remove_offset fail", K(ret), K(index)); + } else if (OB_FAIL(cursor_->get_for_write(pos_, header_len, header_data))) { + LOG_WARN("get_for_write fail", K(ret), K(index), K(header_len), K(pos_), K(meta_)); } else { - used_bytes = this->get_used_bytes(); - if (OB_FAIL(this->move_parent_iter())) { - LOG_WARN("failed to move back to parent ", K(ret)); + // move object key entry + uint64_t curr_key_entry_offset = get_key_entry_offset(index); + uint64_t next_key_entry_offset = get_key_entry_offset(index + 1); + uint64_t key_entry_move_len = get_key_entry_offset(old_elem_count) - next_key_entry_offset; + MEMMOVE(header_data.ptr() + curr_key_entry_offset, header_data.ptr() + next_key_entry_offset, key_entry_move_len); + + // [key_entry_start_offset, key_entry_end_offset][...][value_entry_start_offset, ..][curr_value_entry_offset][next_value_entry_offset, value_entry_end_offset] + uint64_t key_entry_end_offset = get_key_entry_offset(new_elem_count); + uint64_t value_entry_start_offset = get_value_entry_offset(0); + uint64_t value_entry_end_offset = get_value_entry_offset(old_elem_count); // old_elem_end + uint64_t curr_value_entry_offset = get_value_entry_offset(index); + uint64_t next_value_entry_offset = get_value_entry_offset(index + 1); + // move prev value entries + uint64_t value_entry_prev_move_len = curr_value_entry_offset - value_entry_start_offset; + MEMMOVE(header_data.ptr() + key_entry_end_offset, header_data.ptr() + value_entry_start_offset, value_entry_prev_move_len); + + // move next value entries + uint64_t value_entry_next_move_len = value_entry_end_offset - next_value_entry_offset; + MEMMOVE(header_data.ptr() + key_entry_end_offset + value_entry_prev_move_len, header_data.ptr() + next_value_entry_offset, value_entry_next_move_len); + if (OB_FAIL(set_element_count(new_elem_count))) { + LOG_WARN("set element count fail", K(ret), K(new_elem_count), K(old_elem_count), K(index)); + } else if (OB_FAIL(reset(pos_))) { + LOG_WARN("reset fail", K(ret)); } } - // 2. seek index key entry and value entry, do memmove - if (OB_SUCC(ret)) { - uint64_t offset = pos_; - char *data = result_.ptr(); - uint8_t node_type, var_type, obj_size_type; - uint64_t count, obj_size; - parse_obj_header(data, offset, node_type, var_type, obj_size_type, count, obj_size); - uint64_t type_size = ObJsonVar::get_var_size(var_type); - uint64_t val_entry_size = (type_size + sizeof(uint8_t)); - uint64_t extend_offset = 0; - // if it's an object, first remove the key entry - if (OB_JSON_TYPE_GET_INLINE(node_type) == static_cast(ObJsonNodeType::J_OBJECT)) { - uint64_t key_entry_size = type_size * 2; - char *curr_key_entry = data + offset + key_entry_size * index; - char *next_key_entry = curr_key_entry + key_entry_size; - uint64_t len = key_entry_size * (count - index - 1) + val_entry_size * index; - MEMMOVE(curr_key_entry, next_key_entry, len); - // Adjust offset of value entry - offset += key_entry_size * (count - 1); - extend_offset = key_entry_size; - } - int64_t val_entry_offset = offset + val_entry_size * index; - char *curr_val_entry = data + val_entry_offset; - char *next_val_entry = curr_val_entry + val_entry_size + extend_offset; - uint64_t len = val_entry_size * (count - index - 1); - MEMMOVE(curr_val_entry, next_val_entry, len); + return ret; +} - // update obj size and count for curr iter node - int64_t bytes_changed = 0 - used_bytes - val_entry_size - extend_offset; - ObJsonBinObjHeader *header = reinterpret_cast(data + pos_); +int ObJsonBin::array_remove_v0(size_t index) +{ + INIT_SUCC(ret); + uint64_t old_elem_count = element_count(); + uint64_t new_elem_count = old_elem_count - 1; + ObJBVerType vertype = this->get_vertype(); + int node_stack_size = node_stack_.size(); + ObJsonBuffer new_bin_str(allocator_); + ObJBNodeMeta parent_node_meta; + ObJsonBin new_bin; - uint64_t obj_count; - if (OB_FAIL(ObJsonVar::read_var(header->used_size_, header->count_size_, &obj_count))) { - LOG_WARN("fail to read header count.", K(ret), KP(header->used_size_), K(header->count_size_)); - } else { - obj_count -= 1; - if (OB_FAIL(ObJsonVar::set_var(obj_count, header->count_size_, header->used_size_))) { - LOG_WARN("fail to set header count.", K(ret), K(obj_count), KP(header->used_size_), K(header->count_size_)); - } else { - // update elememt count - element_count_ -= 1; - } - } - - if (OB_SUCC(ret)) { - uint64_t new_obj_size; - uint64_t count_var_size = ObJsonVar::get_var_size(header->count_size_); - if (OB_FAIL(ObJsonVar::read_var(header->used_size_ + count_var_size, header->obj_size_size_, &new_obj_size))) { - LOG_WARN("fail to read header obj size.", K(ret), KP(header->used_size_), K(header->obj_size_size_)); - } else { - new_obj_size += bytes_changed; - if (OB_FAIL(ObJsonVar::set_var(new_obj_size, header->obj_size_size_, header->used_size_ + count_var_size))) { - LOG_WARN("fail to set header obj size.", K(ret), K(new_obj_size), - KP(header->used_size_), K(header->obj_size_size_)); - } else { - // update obj size for parents - update_parents(bytes_changed, true); - } - } + // move value entry + uint64_t curr_value_entry_offset = get_value_entry_offset(index); + uint64_t next_value_entry_offset = get_value_entry_offset(index + 1); + uint64_t value_entry_move_len = get_value_entry_offset(old_elem_count) - next_value_entry_offset; + // MEMMOVE(data + curr_value_entry_offset, data + next_value_entry_offset, value_entry_move_len); + if (OB_FAIL(cursor_->move_data(pos_ + curr_value_entry_offset, pos_ + next_value_entry_offset, value_entry_move_len))) { + LOG_WARN("move_data fail", K(ret), K(pos_), K(curr_value_entry_offset), K(next_value_entry_offset), K(value_entry_move_len), K(index)); + } else if (OB_FAIL(set_element_count(new_elem_count))) { + LOG_WARN("set element count fail", K(ret), K(new_elem_count), K(old_elem_count), K(index)); + } else if (OB_FAIL(reset(pos_))) { + LOG_WARN("reset fail", K(ret)); + } else if (OB_FAIL(rebuild_json_array(new_bin_str))) { + LOG_WARN("rebuild array fail", K(ret), K(pos_), K(meta_)); + } else if (node_stack_size <= 0) { + if (OB_FAIL(reset_root(new_bin_str.string()))) { + LOG_WARN("reset_root fail", K(ret), K(index), K(node_stack_size), K(new_bin_str)); } + } else if (OB_FAIL(node_stack_.back(parent_node_meta))) { + LOG_WARN("get node fail", K(ret), K(node_stack_size)); + } else if (OB_FAIL(new_bin.reset(new_bin_str.string(), 0, nullptr))) { + LOG_WARN("reset fail", K(ret)); + } else if (OB_FAIL(move_parent_iter())) { + LOG_WARN("move_parent_iter fail", K(ret), K(pos_), K(meta_)); + } else if (OB_FAIL(update(parent_node_meta.idx_, &new_bin))) { + LOG_WARN("update parent fail", K(ret), K(meta_), K(pos_), K(parent_node_meta)); + } else if (OB_FAIL(element(parent_node_meta.idx_))) { + LOG_WARN("move child fail", K(ret), K(meta_), K(pos_), K(parent_node_meta)); } return ret; } @@ -3560,20 +3032,23 @@ int ObJsonBin::remove(size_t index) INIT_SUCC(ret); ObJsonNodeType node_type = this->json_type(); ObJBVerType ver_type = this->get_vertype(); + ObJsonBinUpdateCtx *update_ctx = get_update_ctx(); if (node_type != ObJsonNodeType::J_ARRAY && node_type != ObJsonNodeType::J_OBJECT) { ret = OB_OBJ_TYPE_ERROR; LOG_WARN("wrong node_type.", K(ret), K(node_type)); - } else if (index >= element_count_) { + } else if (index >= get_element_count()) { ret = OB_OUT_OF_ELEMENT; - LOG_WARN("index out of range.", K(ret), K(index), K(element_count_)); - } else if (!is_alloc_) { - ret = OB_ERR_READ_ONLY; - LOG_WARN("json binary is read only.", K(ret), K(is_alloc_)); + LOG_WARN("index out of range.", K(ret), K(index), K(get_element_count())); + } else if (OB_ISNULL(update_ctx)) { + ret = OB_ERR_NULL_VALUE; + LOG_WARN("update_ctx is null", K(ret)); } else { switch (ver_type) { case ObJBVerType::J_ARRAY_V0: + ret = array_remove_v0(index); + break; case ObJBVerType::J_OBJECT_V0: { - ret = remove_v0(index); + ret = object_remove_v0(index); break; } default: { @@ -3593,8 +3068,6 @@ int ObJsonBin::remove(const ObString &key) ObJsonNodeType node_type = this->json_type(); if (node_type != ObJsonNodeType::J_OBJECT) { ret = OB_OBJ_TYPE_ERROR; - } else if (!is_alloc_) { - ret = OB_ERR_READ_ONLY; } else if (OB_FAIL(lookup_index(key, &idx))) { if (ret == OB_SEARCH_NOT_FOUND) { ret = OB_SUCCESS; // if not found, return succ @@ -3610,22 +3083,22 @@ int ObJsonBin::remove(const ObString &key) int ObJsonBin::rebuild() { INIT_SUCC(ret); - if (!is_alloc_ || allocator_ == NULL) { + ObJsonBuffer buffer(allocator_); + ObString new_bin; + if (allocator_ == NULL) { ret = OB_ERR_READ_ONLY; LOG_WARN("json binary is read only.", K(ret)); - } else { - ObJsonBuffer new_bin(allocator_); - if (OB_FAIL(rebuild(new_bin))) { + } else if (node_stack_.size() > 0) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("not at root", K(ret), K(pos_)); + } else if (OB_FAIL(rebuild(buffer))) { LOG_WARN("rebuild failed.", K(ret)); - } else { - result_.reuse(); - if (OB_FAIL(result_.append(new_bin.ptr(), new_bin.length()))) { - LOG_WARN("append for new result falied", K(ret)); - } else { - curr_.assign_ptr(result_.ptr(), result_.length()); - ret = reset_iter(); - } - } + } else if (OB_FAIL(buffer.get_result_string(new_bin))) { + LOG_WARN("get_result_string fail", K(ret)); + } else if (OB_FAIL(set_current(new_bin, pos_))) { + LOG_WARN("set_current fail", K(ret)); + } else { + ret = reset_iter(); } return ret; } @@ -3634,203 +3107,371 @@ int ObJsonBin::rebuild_at_iter(ObJsonBuffer &buf) { INIT_SUCC(ret); buf.reuse(); - uint8_t ver_type = static_cast(get_vertype()); - ObJsonNodeType node_type = this->json_type(); - if (node_type != ObJsonNodeType::J_ARRAY && node_type != ObJsonNodeType::J_OBJECT) { - ret = OB_OBJ_TYPE_ERROR; - } else if (OB_FAIL(rebuild_json_value(curr_.ptr() + pos_, curr_.length() - pos_, ver_type, ver_type, uint_val_, buf))) { - LOG_WARN("rebuild json binary iter falied, ", K(ret)); + if (OB_FAIL(rebuild_json_value(buf))) { + LOG_WARN("rebuild json binary iter falied", K(ret)); } return ret; } -int ObJsonBin::rebuild_json_process_value_v0(const char *data, uint64_t length, const char *old_val_entry, - uint64_t new_val_entry_offset, uint64_t count, uint8_t var_type, int64_t st_pos, ObJsonBuffer &result) const +int ObJsonBin::insert_v0(int64_t index, const ObString &new_key, ObJsonBin *new_value) { INIT_SUCC(ret); - uint64_t type_size = ObJsonVar::get_var_size(var_type); - uint64_t val_entry_size = (type_size + sizeof(uint8_t)); - uint8_t v_type = static_cast(JBLS_UINT8); - for (uint64_t i = 0; OB_SUCC(ret) && i < count; i++) { - uint64_t new_val_offset = result.length() - st_pos; - uint64_t val_offset, val_type; - if (OB_FAIL(ObJsonVar::read_var(old_val_entry + val_entry_size * i, var_type, &val_offset))) { - LOG_WARN("failed to read val offset", K(ret)); - } else if (OB_FAIL(ObJsonVar::read_var(old_val_entry + val_entry_size * i + type_size, v_type, &val_type))) { - LOG_WARN("failed to read val type", K(ret)); + bool is_exist = false; + bool is_object_type = json_type() == ObJsonNodeType::J_OBJECT; + size_t idx = index; + if (! is_object_type) { + } else if (OB_FAIL(lookup_index(new_key, &idx))) { + if (ret == OB_SEARCH_NOT_FOUND) { + ret = OB_SUCCESS; } else { - uint8_t type = static_cast(val_type); - if (!OB_JSON_TYPE_IS_INLINE(type)) { - ret = rebuild_json_value(data + val_offset, length - val_offset, type, type, val_offset, result); - if (OB_SUCC(ret)) { - // fill value offset - char* new_val_entry = result.ptr() + new_val_entry_offset; - if (OB_FAIL(ObJsonVar::set_var(new_val_offset, var_type, new_val_entry + val_entry_size * i))) { - LOG_WARN("failed to set val offset.", K(ret), K(i), K(new_val_offset), K(var_type)); - } + LOG_WARN("lookup key internel fail", K(ret), K(new_key)); + } + } else if (OB_FAIL(update_v0(idx, new_value))) { + LOG_WARN("key exist, do update fail", K(idx), K(new_key), K(idx)); + } else { + is_exist = true; + } + + if (OB_FAIL(ret) || is_exist) { + } else if (is_object_type && OB_FAIL(lookup_insert_postion(new_key, idx))) { + LOG_WARN("lookup key postion fail", K(ret), K(is_object_type), K(idx), K(new_key)); + } else if (OB_FAIL(insert_recursion(idx, new_key, new_value))) { + LOG_WARN("fail", K(ret)); + } + return ret; +} + + +int ObJsonBin::update_append_v0(int index, ObJsonBin *new_value, bool &is_update_append) +{ + INIT_SUCC(ret); + uint8_t entry_var_type = this->entry_var_type(); + uint8_t value_type = new_value->get_type(); + ObJsonBinUpdateCtx *update_ctx = get_update_ctx(); + ObJsonBuffer &update_buffer = update_ctx->get_tmp_buffer(); + uint64_t start_pos = cursor_->get_length(); + uint64_t value_offset = start_pos - get_extend_seg_offset(); + uint64_t src_value_offset = 0; + uint8_t src_value_type = 0; + + if (entry_var_type < ObJsonVar::get_var_type(value_offset)) { + } else if (OB_FAIL(get_value_entry(index, src_value_offset, src_value_type))) { + LOG_WARN("get_key_entry fail", K(ret), K(index)); + } else if (OB_FAIL(set_value_entry(index, value_offset, J_FORWARD_V0))) { + LOG_WARN("set_value_entry fail", K(ret), K(value_offset), K(value_type)); + } else if (! need_type_prefix(value_type) // append-update will record type for all type + && (OB_FAIL(update_buffer.append(reinterpret_cast(&value_type), sizeof(value_type))))) { + LOG_WARN("append type fail", K(ret), K(value_type)); + } else if (OB_FAIL(new_value->rebuild_json_value(update_buffer))) { + LOG_WARN("serialize_json_value fail", K(ret)); + } else if (OB_FAIL(cursor_->append(update_buffer.string()))) { + LOG_WARN("append fail", K(ret)); + } else if (OB_FAIL(record_append_update_offset( + index, + start_pos, + update_buffer.length(), + new_value->get_type()))) { + LOG_WARN("record_append_update_offset fail", K(ret)); + } else { + is_update_append = true; + } + return ret; +} + +// entry var type might get bigger, and affect parents. +// so may need update with tree +int ObJsonBin::update_recursion(int index, ObJsonBin *new_value) +{ + INIT_SUCC(ret); + int node_stack_size = node_stack_.size(); + ObJBNodeMetaStack dup_stack(allocator_); + ObJsonBinUpdateCtx *update_ctx = get_update_ctx(); + ObJsonBuffer& result = update_ctx->get_tmp_buffer(); + uint64_t start_pos = cursor_->get_length(); + uint8_t cur_value_type = get_type(); + // rebuild self with new value + if (OB_FAIL(rebuild_with_new_value(index, new_value, result))) { + LOG_WARN("fail", K(ret)); + } else if (node_stack_size <= 0) { + if (OB_FAIL(reset_root(result.string()))) { + LOG_WARN("reset_root fail", K(ret), K(index), KPC(new_value), K(result)); + } + // set and rebuild parent if need + } else if (OB_FAIL(dup_stack.copy(node_stack_))) { + LOG_WARN("copy node stack fail", K(ret), K(node_stack_size)); + } else { + ObJBNodeMeta parent_node_meta; + ObJsonBuffer tmp_buf(allocator_); + ObJsonBin tmp_bin; + int recursion_end_idx = -1; + for(int i = node_stack_size - 1; OB_SUCC(ret) && i >= 0 && recursion_end_idx == -1; --i) { + uint64_t child_pos = pos_; + tmp_buf.reuse(); + bool is_updated = false; + ObString child_data = result.string(); + if (OB_FAIL(node_stack_.back(parent_node_meta))) { + LOG_WARN("get node fail", K(ret), K(node_stack_size), K(i)); + } else if (OB_FAIL(move_parent_iter())) { + LOG_WARN("move fail", K(ret), K(parent_node_meta)); + } else if (parent_node_meta.entry_type_ >= ObJsonVar::get_var_type(start_pos - get_extend_seg_offset())) { + if (OB_FAIL(set_value_entry(parent_node_meta.idx_, start_pos - get_extend_seg_offset(), J_FORWARD_V0))) { + LOG_WARN("set_value_entry fail", K(ret), K(pos_), K(child_pos)); + } else if (OB_FAIL(cursor_->append(child_data))) { + LOG_WARN("append fail", K(ret), K(pos_)); + } else if (OB_FAIL(record_append_update_offset( + parent_node_meta.idx_, + start_pos, + cursor_->get_length() - start_pos, + cur_value_type))) { + LOG_WARN("record_append_update_offset fail", K(ret), K(start_pos), K(parent_node_meta)); } else { - LOG_WARN("rebuild child node failed.", K(ret), K(i), K(val_type)); + recursion_end_idx = i; } + } else if (OB_FAIL(tmp_buf.append(child_data))) { + LOG_WARN("copy buffer fail", K(ret), K(pos_)); + } else if (OB_FAIL(tmp_bin.reset(tmp_buf.string(), 0, nullptr))) { + LOG_WARN("reset fail", K(ret)); + } else if (OB_FALSE_IT(result.reuse())) { + } else if (OB_FAIL(rebuild_with_new_value(parent_node_meta.idx_, &tmp_bin, result))) { + LOG_WARN("fail", K(ret), K(parent_node_meta), K(tmp_bin)); + } else { + cur_value_type = get_type(); + } + } + + if (OB_FAIL(ret)) { + } else if (-1 == recursion_end_idx) { + if (OB_FAIL(reset_root(result.string()))) { + LOG_WARN("reset_root fail", K(ret), K(index), KPC(new_value), K(result)); + } else { + recursion_end_idx = 0; + } + } + + for (int i = recursion_end_idx; OB_SUCC(ret) && i < node_stack_size; ++i) { + if (OB_FAIL(dup_stack.at(i, parent_node_meta))) { + LOG_WARN("get node fail", K(ret), K(node_stack_size), K(i)); + } else if (OB_FAIL(element(parent_node_meta.idx_))) { + LOG_WARN("move back postion fail", K(ret), K(i), K(parent_node_meta)); } } } return ret; } -int ObJsonBin::rebuild_json_process_value(const char *data, uint64_t length, const char *old_val_entry, - uint64_t new_val_entry_offset, uint64_t count, uint8_t var_type, int64_t st_pos, - ObJsonBuffer &result, ObJBVerType cur_vertype, ObJBVerType dest_vertype) const +int ObJsonBin::insert_recursion(int index, const ObString &new_key, ObJsonBin *new_value) { INIT_SUCC(ret); - ObJsonNodeType type = ObJsonVerType::get_json_type(cur_vertype); - if (((cur_vertype == ObJBVerType::J_ARRAY_V0 || cur_vertype == ObJBVerType::J_OBJECT_V0) && - (dest_vertype == ObJBVerType::J_ARRAY_V0 || dest_vertype == ObJBVerType::J_OBJECT_V0))) { - ret = rebuild_json_process_value_v0(data, length, old_val_entry, new_val_entry_offset, count, var_type, st_pos, result); - } else { + int node_stack_size = node_stack_.size(); + ObJsonBinUpdateCtx *update_ctx = get_update_ctx(); + uint64_t start_pos = cursor_->get_length(); + ObJBNodeMeta parent_node_meta; + ObJsonBin tmp_bin; + ObJsonBuffer tmp_buf(allocator_); + bool is_updated = false; + uint8_t cur_value_type = get_type(); + if (OB_FAIL(rebuild_with_new_insert_value(index, new_key, new_value, tmp_buf))) { + LOG_WARN("fail", K(ret)); + } else if (node_stack_size <= 0) { + if (OB_FAIL(reset_root(tmp_buf.string()))) { + LOG_WARN("reset_root fail", K(ret), K(index), KPC(new_value), K(tmp_buf)); + } + } else if (OB_FAIL(node_stack_.back(parent_node_meta))) { + LOG_WARN("get node fail", K(ret), K(node_stack_size)); + } else if (OB_FAIL(move_parent_iter())) { + LOG_WARN("move fail", K(ret), K(parent_node_meta)); + } else if (parent_node_meta.entry_type_ >= ObJsonVar::get_var_type(start_pos - get_extend_seg_offset())) { + if (OB_FAIL(set_value_entry(parent_node_meta.idx_, start_pos - get_extend_seg_offset(), J_FORWARD_V0))) { + LOG_WARN("set_value_entry fail", K(ret), K(pos_), K(start_pos)); + } else if (OB_FAIL(cursor_->append(tmp_buf.string()))) { + LOG_WARN("copy buffer fail", K(ret), K(pos_)); + } else if (OB_FAIL(record_append_update_offset( + parent_node_meta.idx_, + start_pos, + cursor_->get_length() - start_pos, + cur_value_type))) { + LOG_WARN("record_append_update_offset fail", K(ret), K(parent_node_meta.idx_)); + } else if (OB_FAIL(element(parent_node_meta.idx_))) { + LOG_WARN("move back fail", K(ret), K(parent_node_meta.idx_)); + } + } else if (OB_FAIL(tmp_bin.reset(tmp_buf.string(), 0, nullptr))) { + LOG_WARN("reset fail", K(ret)); + } else if (tmp_bin.obj_size() != tmp_buf.length()) { ret = OB_ERR_UNEXPECTED; - LOG_WARN("failed:rebuild value.", K(ret), K(cur_vertype), K(dest_vertype)); + LOG_WARN("obj_size incorrect", K(ret), "bin", tmp_bin.obj_size(), "buf", tmp_buf.length()); + } else if (OB_FAIL(update_recursion(parent_node_meta.idx_, &tmp_bin))) { + LOG_WARN("update_recursion fail", K(ret)); + } else if (OB_FAIL(element(parent_node_meta.idx_))) { + LOG_WARN("move back fail", K(ret), K(parent_node_meta.idx_)); } return ret; } -/** - * input data version is v0, rebuilding using latest format -*/ -int ObJsonBin::rebuild_json_obj_v0(const char *data, uint64_t length, ObJsonBuffer &result) const +int ObJsonBin::update_v0(int index, ObJsonBin *new_value) +{ + INIT_SUCC(ret); + uint64_t src_value_offset = 0; + uint8_t src_value_type = 0; + bool is_update_inline = false; + bool is_update_inplace = false; + bool is_update_append = false; + bool is_updated = false; + if (OB_FAIL(get_value_entry(index, src_value_offset, src_value_type))) { + LOG_WARN("get_key_entry fail", K(ret), K(index)); + } else if (OB_FAIL(try_update_inline(index, new_value, is_update_inline))) { + LOG_WARN("try_update_inline fail", K(ret), K(index)); + } else if (is_update_inline) { + LOG_DEBUG("try_update_inline success", K(index)); + if (OB_FAIL(record_inline_update_offset(index))) { + LOG_WARN("record_inline_update_offset fail", K(ret), K(index)); + } + } else if (OB_FAIL(try_update_inplace(index, new_value, is_update_inplace))) { + LOG_WARN("try_update_inplace fail", K(ret), K(index)); + } else if (is_update_inplace) { + LOG_DEBUG("try_update_inplace success", K(index)); + } else if (OB_FAIL(update_append_v0(index, new_value, is_update_append))) { + LOG_DEBUG("update_append_v0 fail", K(ret), K(index)); + } else if (is_update_append) { + LOG_DEBUG("is_update_append success", K(index)); + } else if (OB_FAIL(update_recursion(index, new_value))) { + LOG_WARN("fail", K(ret)); + } + return ret; +} + +int ObJsonBin::rebuild_json_object_v0(ObJsonBuffer &result) const { INIT_SUCC(ret); bool with_key_dict = false; - int64_t st_pos = result.length(); + int64_t start_pos = result.length(); uint64_t offset = 0; - uint8_t node_type, var_type, obj_size_type; - uint64_t count, obj_size; + uint64_t element_count = this->element_count(); + ObJsonBin dst_bin; + ObJsonBinMeta meta; + uint64_t obj_size = 0; - const ObJsonBinObjHeader *header = reinterpret_cast(data + offset); - // parsing header using v0 format - parse_obj_header(data, offset, node_type, var_type, obj_size_type, count, obj_size); - - // if v0 is latest version and is_continuous, do copy, else serialize as latest version - if (header->is_continuous_ && ObJBVerType::J_OBJECT_V0 == get_object_vertype()) { // memory is continuous, can do memcopy - if (OB_FAIL(result.append(data, obj_size))) { - LOG_WARN("append obj failed.", K(ret), K(obj_size)); - } + if (OB_FAIL(this->get_serialize_size(obj_size))) { + LOG_WARN("get_serialize_size fail", K(ret), K(meta_)); } else { - // rebuild using latest format - // copy obj header, key entry, val entry, key(if need) - uint64_t type_size = ObJsonVar::get_var_size(var_type); - uint64_t key_entry_size = type_size * 2; - uint64_t val_entry_size = (type_size + sizeof(uint8_t)); - uint64_t copy_len = offset + key_entry_size * count + val_entry_size * count; - if (length < copy_len) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("data length is not enough for obj.", K(ret), K(length), K(copy_len)); - } else { - if (!with_key_dict) { // without key dict, need copy key - // using latest bin format - uint64_t key_offset, key_len; - const char *key_entry = (data + offset); - const char *last_key_offset_ptr = key_entry + key_entry_size * (count - 1); - const char *last_key_len_ptr = key_entry + key_entry_size * (count - 1) + type_size; - // get last key offest and len - if (OB_FAIL(ObJsonVar::read_var(last_key_offset_ptr, var_type, &key_offset))) { - LOG_WARN("failed to read key offset", K(ret)); - } else if (OB_FAIL(ObJsonVar::read_var(last_key_len_ptr, var_type, &key_len))) { - LOG_WARN("failed to read key len", K(ret)); - } else { - copy_len = key_offset + key_len; - if (copy_len > length) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("data length is not enough for obj.", K(ret), K(length), K(copy_len)); - } else if (OB_FAIL(result.append(data, copy_len))) { // if format is changed, must do modify - LOG_WARN("failed to append data.", K(ret), K(copy_len)); - } - } - } - } + meta.set_type(get_object_vertype(), false); + meta.set_element_count(element_count); + meta.set_element_count_var_type(ObJsonVar::get_var_type(element_count)); + meta.set_obj_size(obj_size); + meta.set_obj_size_var_type(ObJsonVar::get_var_type(obj_size)); + meta.set_entry_var_type(meta.obj_size_var_type()); + meta.set_is_continuous(true); + meta.calc_entry_array(); + } + if (OB_FAIL(ret)) { + } else if (OB_FAIL(meta.to_header(result))) { + LOG_WARN("to obj header fail", K(ret)); + } else if (OB_FAIL(dst_bin.reset(result.string(), start_pos, nullptr))) { + LOG_WARN("reset bin fail", K(ret), K(meta)); + } - if (OB_SUCC(ret)) { - // process value - const char *val_entry = (data + offset + key_entry_size * count); - uint64_t new_val_entry_offset = st_pos + offset + key_entry_size * count; - ret = rebuild_json_process_value(data, length, val_entry, new_val_entry_offset, count, var_type, st_pos, result, - ObJBVerType::J_OBJECT_V0, ObJBVerType::J_OBJECT_V0); - if (OB_SUCC(ret)) { - // set new result is continuous, remove highest 1bit - ObJsonBinObjHeader *new_header = reinterpret_cast(result.ptr() + st_pos); - new_header->is_continuous_ = 1; - } else { - LOG_WARN("rebuild child node failed.", K(ret)); - } + for (uint64_t i = 0; OB_SUCC(ret) && i < element_count; i++) { + ObString src_key; + uint64_t key_offset = result.length() - start_pos; + if (OB_FAIL(get_key(i, src_key))) { + LOG_WARN("get_key from src_bin fail", K(ret), K(i)); + } else if (OB_FAIL(dst_bin.rebuild_child_key(i, src_key, key_offset, result))) { + LOG_WARN("set_key_entry fail", K(ret), K(src_key)); + // result may realloc, so need ensure point same memory + } else if (OB_FALSE_IT(dst_bin.set_current(result.string(), start_pos))) { } } + + ObJsonBin child_value; + for (int i = 0; OB_SUCC(ret) && i < element_count; i++) { + uint64_t value_offset = result.length() - start_pos; + if (OB_FAIL(get_value(i, child_value))) { + LOG_WARN("get child value fail", K(ret), K(i)); + } else if (OB_FAIL(dst_bin.rebuild_child(i, child_value, value_offset, result))) { + LOG_WARN("try_update_inline fail", K(ret), K(i)); + // result may realloc, so need ensure point same memory + } else if (OB_FALSE_IT(dst_bin.set_current(result.string(), start_pos))) { + } + } + if (OB_FAIL(ret)) { + } else if (dst_bin.obj_size() < (result.length() - start_pos)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("obj_size incorrect", K(ret), "bin", dst_bin.obj_size(), "buf", result.length(), K(start_pos)); + } else if (dst_bin.obj_size() == (result.length() - start_pos)) { // if equal, just skip + } else if (OB_FAIL(dst_bin.set_obj_size(result.length() - start_pos))) { + LOG_WARN("set obj_size fail", K(ret), "bin", dst_bin.obj_size(), "buf", result.length(), K(start_pos)); + } return ret; } -int ObJsonBin::rebuild_json_obj(const char *data, uint64_t length, ObJsonBuffer &result, - ObJBVerType src_vertype, ObJBVerType dest_vertype) const +int ObJsonBin::rebuild_json_object(ObJsonBuffer &result) const { INIT_SUCC(ret); - if (ObJBVerType::J_OBJECT_V0 == src_vertype && ObJBVerType::J_OBJECT_V0 == dest_vertype) { - ret = rebuild_json_obj_v0(data, length, result); + if (ObJBVerType::J_OBJECT_V0 == get_vertype() && ObJBVerType::J_OBJECT_V0 == get_vertype()) { + ret = rebuild_json_object_v0(result); } else { ret = OB_ERR_UNEXPECTED; - LOG_WARN("rebuild json object, invalid vertype.", K(ret), K(src_vertype), K(dest_vertype)); + LOG_WARN("rebuild json object, invalid vertype.", K(ret)); } return ret; } -int ObJsonBin::rebuild_json_array_v0(const char *data, uint64_t length, ObJsonBuffer &result) const +int ObJsonBin::rebuild_json_array_v0(ObJsonBuffer &result) const { INIT_SUCC(ret); - int64_t st_pos = result.length(); + int64_t start_pos = result.length(); uint64_t offset = 0; - uint8_t node_type, var_type, obj_size_type; - uint64_t count, obj_size; - if (length <= OB_JSON_BIN_ARR_HEADER_LEN) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("data length is not enough.", K(ret), K(length)); + uint64_t element_count = this->element_count(); + ObJsonBin dst_bin; + ObJsonBinMeta meta; + uint64_t obj_size = 0; + if (OB_FAIL(this->get_serialize_size(obj_size))) { + LOG_WARN("get_serialize_size fail", K(ret), K(meta_)); } else { - const ObJsonBinObjHeader *header = reinterpret_cast(data + offset); - parse_obj_header(data, offset, node_type, var_type, obj_size_type, count, obj_size); - if (header->is_continuous_) { - if (OB_FAIL(result.append(data, obj_size))) { - LOG_WARN("append array failed.", K(ret), K(obj_size)); - } - } else { - // copy obj header, val entry - uint64_t type_size = ObJsonVar::get_var_size(var_type); - uint64_t val_entry_size = (type_size + sizeof(uint8_t)); - uint64_t copy_len = offset + val_entry_size * count; - if (copy_len > length) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("data length is not enough for val entry.", K(ret), K(length), K(copy_len)); - } else if (OB_FAIL(result.append(data, copy_len))) { - LOG_WARN("failed to append data.", K(ret), K(copy_len)); - } else { - // process value - const char *val_entry = (data + offset); - uint64_t new_val_entry_offset = st_pos + offset; - ret = rebuild_json_process_value(data, length, val_entry, new_val_entry_offset, count, var_type, st_pos, - result, ObJBVerType::J_ARRAY_V0, ObJBVerType::J_ARRAY_V0); - if (OB_SUCC(ret)) { - // set new result is continuous, remove highest 1bit - ObJsonBinObjHeader *new_header = reinterpret_cast(result.ptr() + st_pos); - new_header->is_continuous_ = 1; - } else { - LOG_WARN("rebuild child node failed.", K(ret)); - } - } + meta.set_type(get_array_vertype(), false); + meta.set_element_count(element_count); + meta.set_element_count_var_type(ObJsonVar::get_var_type(element_count)); + meta.set_obj_size(obj_size); + meta.set_obj_size_var_type(ObJsonVar::get_var_type(obj_size)); + meta.set_entry_var_type(meta.obj_size_var_type()); + meta.set_is_continuous(true); + meta.calc_entry_array(); + } + + if (OB_FAIL(ret)) { + } else if (OB_FAIL(meta.to_header(result))) { + LOG_WARN("to obj header fail", K(ret)); + } else if (OB_FAIL(dst_bin.reset(result.string(), start_pos, nullptr))) { + LOG_WARN("reset bin fail", K(ret), K(meta)); + } + ObJsonBin child_value; + for (int i = 0; OB_SUCC(ret) && i < element_count; i++) { + uint64_t value_offset = result.length() - start_pos; + uint8_t value_type = 0; + bool is_update_inline = false; + if (OB_FAIL(get_value(i, child_value))) { + LOG_WARN("get child value fail", K(ret), K(i)); + } else if (OB_FAIL(dst_bin.rebuild_child(i, child_value, value_offset, result))) { + LOG_WARN("try_update_inline fail", K(ret), K(i)); + // result may realloc, so need ensure point same memory + } else if (OB_FALSE_IT(dst_bin.set_current(result.string(), start_pos))) { } } + if (OB_FAIL(ret)) { + } else if (dst_bin.obj_size() < (result.length() - start_pos)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("obj_size incorrect", K(ret), "bin", dst_bin.obj_size(), "buf", result.length(), K(start_pos)); + } else if (dst_bin.obj_size() == (result.length() - start_pos)) { // if equal, just skip + } else if (OB_FAIL(dst_bin.set_obj_size(result.length() - start_pos))) { + LOG_WARN("set obj_size fail", K(ret), "bin", dst_bin.obj_size(), "buf", result.length(), K(start_pos)); + } return ret; } -int ObJsonBin::rebuild_json_array(const char *data, uint64_t length, ObJsonBuffer &result, - ObJBVerType src_vertype, ObJBVerType dest_vertype) const +int ObJsonBin::rebuild_json_array(ObJsonBuffer &result) const { INIT_SUCC(ret); - if (ObJBVerType::J_ARRAY_V0 == src_vertype && ObJBVerType::J_ARRAY_V0 == dest_vertype) { - ret = rebuild_json_array_v0(data, length, result); + if (ObJBVerType::J_ARRAY_V0 == get_vertype() && ObJBVerType::J_ARRAY_V0 == get_vertype()) { + ret = rebuild_json_array_v0(result); } else { ret = OB_ERR_UNEXPECTED; LOG_WARN("rebuild json object.", K(ret)); @@ -3838,42 +3479,26 @@ int ObJsonBin::rebuild_json_array(const char *data, uint64_t length, ObJsonBuffe return ret; } -int ObJsonBin::rebuild_json_value(const char *data, - uint64_t length, - uint8_t src_type, - uint8_t dst_type, - uint64_t inline_data, - ObJsonBuffer &result) const +int ObJsonBin::rebuild_json_value(ObJsonBuffer &result) const { INIT_SUCC(ret); - bool is_src_inlined = OB_JSON_TYPE_IS_INLINE(src_type); - bool is_dst_inlined = OB_JSON_TYPE_IS_INLINE(dst_type); - ObJBVerType src_vertype = static_cast(OB_JSON_TYPE_GET_INLINE(src_type)); - ObJBVerType dest_vertype = static_cast(OB_JSON_TYPE_GET_INLINE(dst_type)); - ObJsonNodeType node_type = ObJsonVerType::get_json_type(src_vertype); + bool is_inlined = OB_JSON_TYPE_IS_INLINE(get_type()); + ObJBVerType vertype = static_cast(OB_JSON_TYPE_GET_INLINE(get_type())); + ObJsonNodeType node_type = ObJsonVerType::get_json_type(vertype); + ObString data; switch (node_type) { case ObJsonNodeType::J_NULL: { - if (!is_dst_inlined) { - if (OB_FAIL(result.append("\0", sizeof(char)))) { - LOG_WARN("failed to rebuild null type.", K(ret)); - } + if (OB_FAIL(result.append("\0", sizeof(char)))) { + LOG_WARN("failed to rebuild null type.", K(ret)); } break; } case ObJsonNodeType::J_DECIMAL: case ObJsonNodeType::J_ODECIMAL: { - ObPrecision prec = -1; - ObScale scale = -1; - number::ObNumber temp_number; - int64_t pos = 0; - if (OB_FAIL(serialization::decode_i16(data, length, pos, &prec))) { - LOG_WARN("fail to deserialize decimal precision.", K(ret), K(length)); - } else if (OB_FAIL(serialization::decode_i16(data, length, pos, &scale))) { - LOG_WARN("fail to deserialize decimal scale.", K(ret), K(length), K(prec)); - } else if (OB_FAIL(temp_number.deserialize(data, length, pos))) { - LOG_WARN("failed to deserialize decimal data", K(ret)); - } else { - ret = result.append(data, pos); + if (OB_FAIL(cursor_->get(pos_, meta_.bytes_, data))) { + LOG_WARN("get data fail", K(ret), K(pos_)); + } else if (OB_FAIL(result.append(data))) { + LOG_WARN("failed to append", K(ret), K(pos_)); } break; } @@ -3881,76 +3506,79 @@ int ObJsonBin::rebuild_json_value(const char *data, case ObJsonNodeType::J_OINT: case ObJsonNodeType::J_UINT: case ObJsonNodeType::J_OLONG: { - if (!is_dst_inlined) { - if (is_src_inlined) { - if (OB_FAIL(serialize_json_integer(inline_data, result))) { - LOG_WARN("failed to rebuild serialize integer.", K(ret), K(inline_data)); - } - } else { - int64_t val = 0; - int64_t pos = 0; - if (OB_FAIL(serialization::decode_vi64(data, length, pos, &val))) { - LOG_WARN("decode integer failed.", K(ret), K(length)); - } else if (OB_FAIL(result.append(data, pos))) { - LOG_WARN("failed to append integer date.", K(ret), K(pos)); - } + if (is_inlined) { + if (OB_FAIL(ObJsonBinSerializer::serialize_json_integer(get_uint(), result))) { + LOG_WARN("failed to rebuild serialize integer.", K(ret)); + } + } else { + int64_t val = 0; + int64_t pos = pos_; + if (OB_FAIL(cursor_->decode_vi64(pos, &val))) { + LOG_WARN("decode integer failed.", K(ret), K(pos)); + } else if (OB_FAIL(cursor_->get(pos_, pos - pos_, data))) { + LOG_WARN("get data fail", K(ret), K(pos), K(pos_)); + } else if (OB_FAIL(result.append(data))) { + LOG_WARN("failed to append integer date.", K(ret), K(pos)); } } break; } case ObJsonNodeType::J_DOUBLE: case ObJsonNodeType::J_ODOUBLE: { - if (length < sizeof(double)) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("data length not enough for double.", K(ret), K(length)); - } else { - ret = result.append(data, sizeof(double)); + double val = 0; + if (OB_FAIL(cursor_->get(pos_, sizeof(double), data))) { + LOG_WARN("get data fail", K(ret), K(pos_)); + } else if (OB_FAIL(result.append(data))) { + LOG_WARN("failed to append integer date.", K(ret), K(pos_)); + } + break; + } + case ObJsonNodeType::J_OFLOAT: { + if (OB_FAIL(cursor_->get(pos_, sizeof(float), data))) { + LOG_WARN("get data fail", K(ret), K(pos_)); + } else if (OB_FAIL(result.append(data))) { + LOG_WARN("failed to append integer date.", K(ret), K(pos_)); } break; } case ObJsonNodeType::J_STRING: { int64_t val = 0; - int64_t pos = 0; - if (src_vertype == ObJBVerType::J_STRING_V0) { + int64_t pos = pos_; + if (vertype == ObJBVerType::J_STRING_V0) { pos += sizeof(uint8_t); - if (OB_FAIL(serialization::decode_vi64(data, length, pos, &val))) { - LOG_WARN("fail to decode str length.", K(ret), K(length)); - } else if (length < pos + val) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("data length is not enough for string.", K(ret), K(length), K(pos), K(val)); - } else { - uint64_t str_length = static_cast(val); - ret = result.append(data, str_length + pos); + if (OB_FAIL(cursor_->decode_vi64(pos, &val))) { + LOG_WARN("fail to decode str length.", K(ret), K(pos)); + } else if (OB_FAIL(cursor_->get(pos_, pos - pos_ + val, data))) { + LOG_WARN("get data fail", K(ret), K(pos_), K(pos), K(val)); + } else if (OB_FAIL(result.append(data))) { + LOG_WARN("failed to append", K(ret), K(pos), K(data)); } } else { ret = OB_ERR_UNEXPECTED; - LOG_WARN("invalid string vertype.", K(ret), K(src_vertype)); + LOG_WARN("invalid string vertype.", K(ret), K(vertype)); } break; } case ObJsonNodeType::J_OBJECT: { - ret = rebuild_json_obj(data, length, result, src_vertype, dest_vertype); + ret = rebuild_json_object(result); break; } case ObJsonNodeType::J_ARRAY: { - ret = rebuild_json_array(data, length, result, src_vertype, dest_vertype); + ret = rebuild_json_array(result); break; } case ObJsonNodeType::J_BOOLEAN: { - if (!is_dst_inlined) { - if (OB_FAIL(serialize_json_integer(inline_data, result))) { - LOG_WARN("failed to rebuild serialize boolean.", K(ret), K(inline_data)); - } + if (OB_FAIL(ObJsonBinSerializer::serialize_json_integer(get_boolean(), result))) { + LOG_WARN("failed to rebuild serialize boolean.", K(ret)); } break; } case ObJsonNodeType::J_DATE: case ObJsonNodeType::J_ORACLEDATE: { - if (length < sizeof(int32_t)) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("data length not enough for date.", K(ret), K(length)); - } else { - ret = result.append(data, sizeof(int32_t)); + if (OB_FAIL(cursor_->get(pos_, sizeof(int32_t), data))) { + LOG_WARN("get data fail", K(ret), K(pos_)); + } else if (OB_FAIL(result.append(data))) { + LOG_WARN("failed to append integer date.", K(ret), K(pos_)); } break; } @@ -3960,31 +3588,27 @@ int ObJsonBin::rebuild_json_value(const char *data, case ObJsonNodeType::J_ODATE: case ObJsonNodeType::J_OTIMESTAMP: case ObJsonNodeType::J_OTIMESTAMPTZ: { - if (length < sizeof(int64_t)) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("data length not enough for datetime.", K(ret), K(length)); - } else { - ret = result.append(data, sizeof(int64_t)); + if (OB_FAIL(cursor_->get(pos_, sizeof(int64_t), data))) { + LOG_WARN("get data fail", K(ret), K(pos_)); + } else if (OB_FAIL(result.append(data))) { + LOG_WARN("failed to append integer date.", K(ret), K(pos_)); } break; } case ObJsonNodeType::J_OPAQUE: { - if (src_vertype == ObJBVerType::J_OPAQUE_V0) { - if (length < sizeof(uint16_t) + sizeof(uint64_t) + sizeof(uint8_t)) { + int64_t val = 0; + if (vertype == ObJBVerType::J_OPAQUE_V0) { + if (OB_FAIL(cursor_->read_i64(pos_ + sizeof(uint8_t) + sizeof(uint16_t), &val))) { ret = OB_ERR_UNEXPECTED; - LOG_WARN("data length not enough for opaque len.", K(ret), K(length)); - } else { - uint64_t val_len = *reinterpret_cast(data + sizeof(uint8_t) + sizeof(uint16_t)); - if (length < sizeof(uint16_t) + sizeof(uint64_t) + val_len) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("data length not enough for opaque val.", K(ret), K(length), K(val_len)); - } else { - ret = result.append(data, val_len + sizeof(uint16_t) + sizeof(uint64_t) + sizeof(uint8_t)); - } + LOG_WARN("data length not enough for opaque len.", K(ret), K(pos_)); + } else if (OB_FAIL(cursor_->get(pos_, sizeof(uint8_t) + sizeof(uint16_t) + sizeof(int64_t) + val, data))) { + LOG_WARN("get data fail", K(ret), K(pos_)); + } else if (OB_FAIL(result.append(data))) { + LOG_WARN("failed to append integer date.", K(ret), K(pos_)); } } else { ret = OB_ERR_UNEXPECTED; - LOG_WARN("invalid json opaque vertype.", K(ret), K(src_vertype)); + LOG_WARN("invalid json opaque vertype.", K(ret), K(vertype)); } break; } @@ -3997,81 +3621,208 @@ int ObJsonBin::rebuild_json_value(const char *data, return ret; } -int ObJsonBin::rebuild(ObJsonBuffer &result) +int ObJsonBin::replace_value(const ObString &new_data) { INIT_SUCC(ret); - result.reuse(); - char *ptr = curr_.ptr(); - if (OB_ISNULL(ptr)) { - ret = OB_ERR_NULL_VALUE; - LOG_WARN("binary is null ptr.", K(ret)); - } else { - uint64_t offset = 0; - uint8_t type = 0; - // first parse header type - type = *reinterpret_cast(ptr); - - // do recursion - if (OB_FAIL(rebuild_json_value(ptr + offset, curr_.length() - offset, type, type, uint_val_, result))) { - LOG_WARN("do rebuild recursion failed.", K(ret), K(type)); - } + uint64_t curr_area_size = 0; + if (this->is_inline_vertype()) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("can not replace inline", K(ret)); + } else if (OB_FAIL(get_area_size(curr_area_size))) { + LOG_WARN("get area size fail", K(ret)); + } else if (curr_area_size < new_data.length()) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("new_area_size is too large", K(ret), K(curr_area_size), "new_data_length", new_data.length()); + } else if (OB_FAIL(cursor_->set(pos_, new_data))) { + LOG_WARN("set data fail", K(ret), K(pos_), K(curr_area_size), K(new_data)); } return ret; } -uint64_t ObJsonBin::get_serialize_size() const +int ObJsonBin::try_update_inplace_in_extend( + int index, + ObJsonBin *new_value, + bool &is_update_inplace) { - uint64_t size = 0; - ObJBVerType ver_type = get_vertype(); - if (ObJsonVerType::is_array(ver_type) - || ObJsonVerType::is_object(ver_type) - || ObJsonVerType::is_opaque_or_string(ver_type)) { - size = get_used_bytes(); + INIT_SUCC(ret); + ObJsonBin child; + ObString new_data; + uint64_t child_area_size = 0; + uint64_t new_area_size = 0; + uint64_t src_value_offset = 0; + uint8_t src_value_type = 0; + uint8_t new_value_type = OB_JSON_TYPE_GET_INLINE(new_value->get_type()); + uint64_t real_offset = 0; + if (OB_FAIL(get_value_entry(index, src_value_offset, src_value_type))) { + LOG_WARN("get_value_entry fail", K(ret), K(index)); + } else if (! is_forward_v0(src_value_type)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("not in extend", K(ret), K(index), K(src_value_type), K(src_value_offset)); + } else if (OB_FALSE_IT(real_offset = get_extend_value_offset(src_value_offset))) { + } else if (OB_FAIL(get_value(index, child))) { + LOG_WARN("get child fail", K(ret), K(index)); + } else if (OB_FAIL(child.get_area_size(child_area_size))) { + LOG_WARN("get area size fail", K(ret)); + } else if (OB_FAIL(new_value->get_area_size(new_area_size))) { + LOG_WARN("get area size fail", K(ret)); + } else if (! need_type_prefix(new_value_type)) { + if (child_area_size + child.pos_ - real_offset < new_area_size + OB_JSON_BIN_VALUE_TYPE_LEN) { // skip + } else if (OB_FAIL(new_value->get_value_binary(new_data))) { + LOG_WARN("get_value_binary fail", K(ret)); + } else if (new_data.length() != new_area_size) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("new data length not equal to new_area_size", K(new_data.length()), K(new_area_size)); + } else if (OB_FAIL(cursor_->write_i8(real_offset, new_value_type))) { + LOG_WARN("write type byte fail", K(ret), K(index), K(src_value_offset), K(real_offset), K(child.get_type()), K(new_value_type)); + } else if (OB_FAIL(cursor_->set(real_offset + OB_JSON_BIN_VALUE_TYPE_LEN, new_data))) { + LOG_WARN("set value fail", K(ret), K(index), K(new_data)); + } else if (OB_FAIL(record_extend_inplace_update_offset( + index, real_offset, new_area_size + OB_JSON_BIN_VALUE_TYPE_LEN, new_value->get_type()))) { + LOG_WARN("record_extend_inplace_update_offset fail", K(ret)); + } else { + is_update_inplace = true; + } + } else if (child_area_size + child.pos_ - real_offset < new_area_size) { // skip + } else if (OB_FAIL(new_value->get_value_binary(new_data))) { + LOG_WARN("get_value_binary fail", K(ret)); + } else if (new_data.length() != new_area_size) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("new data length not equal to new_area_size", K(new_data.length()), K(new_area_size)); + } else if (OB_FAIL(cursor_->set(real_offset, new_data))) { + LOG_WARN("set value fail", K(ret), K(index), K(new_data)); + } else if (OB_FAIL(record_extend_inplace_update_offset( + index, real_offset, new_area_size, new_value->get_type()))) { + LOG_WARN("record_extend_inplace_update_offset fail", K(ret)); } else { - size = 1 /*vertype byte*/ + get_used_bytes(); + is_update_inplace = true; } - return size; + return ret; +} + +int ObJsonBin::try_update_inplace( + int index, + ObJsonBin *new_value, + bool &is_update_inplace) +{ + INIT_SUCC(ret); + ObJsonBin child; + ObString new_data; + uint64_t child_area_size = 0; + uint64_t new_area_size = 0; + uint64_t src_value_offset = 0; + uint8_t src_value_type = 0; + uint8_t new_value_type = OB_JSON_TYPE_GET_INLINE(new_value->get_type()); + if (OB_FAIL(get_value_entry(index, src_value_offset, src_value_type))) { + LOG_WARN("get_value_entry fail", K(ret), K(index)); + } else if (is_forward_v0(src_value_type)) { + if (OB_FAIL(try_update_inplace_in_extend(index, new_value, is_update_inplace))) { + LOG_WARN("try_update_inplace_in_extend fail", K(ret), K(index)); + } + } else if (OB_FAIL(get_value(index, child))) { + LOG_WARN("get child fail", K(ret), K(index)); + } else if (child.is_inline_vertype()) { // can not inplace, skip + } else if (OB_FAIL(child.get_area_size(child_area_size))) { + LOG_WARN("get area size fail", K(ret)); + } else if (OB_FAIL(new_value->get_area_size(new_area_size))) { + LOG_WARN("get area size fail", K(ret)); + } else if (child_area_size < new_area_size) { // skip + } else if (OB_FAIL(new_value->get_value_binary(new_data))) { + LOG_WARN("get area size fail", K(ret)); + } else if (new_data.length() != new_area_size) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("new data length not equal to new_area_size", K(new_data.length()), K(new_area_size)); + } else if (OB_FAIL(child.replace_value(new_data))) { + LOG_WARN("replace child value fail", K(ret), K(index), K(new_data)); + } else if (src_value_type != new_value_type && OB_FAIL(set_value_entry(index, src_value_offset, new_value_type))) { + LOG_WARN("set_value_entry fail", K(ret), K(index), K(src_value_offset)); + } else if (OB_FAIL(record_inplace_update_offset(index, new_value, src_value_type != new_value_type))) { + LOG_WARN("record_inplace_update_offset fail", K(ret), K(index)); + } else { + is_update_inplace = true; + } + return ret; +} + +int ObJsonBin::rebuild(ObJsonBuffer &result) const +{ + INIT_SUCC(ret); + result.reuse(); + if (is_empty_data()) { + ret = OB_ERR_NULL_VALUE; + LOG_WARN("binary is empty ptr.", K(ret), KPC(this)); + } else if (OB_FAIL(add_doc_header_v0(result))) { + LOG_WARN("add_doc_header_v0 fail", K(ret)); + } else if (OB_FAIL(rebuild_json_value(result))) { + LOG_WARN("do rebuild recursion failed.", K(ret), K(get_type())); + } else if (OB_FAIL(set_doc_header_v0(result, result.length()))) { + LOG_WARN("set_doc_header_v0 fail", K(ret)); + } + return ret; } void ObJsonBin::destroy() { - result_.reset(); - stack_buf_.reset(); + if (OB_NOT_NULL(ctx_) && is_alloc_ctx_) { + ctx_->~ObJsonBinCtx(); + allocator_->free(ctx_); + ctx_ = nullptr; + } + node_stack_.reset(); + local_cursor_.reset(); + cursor_ = &local_cursor_; } -int ObJsonBin::stack_copy(ObJsonBuffer& src, ObJsonBuffer& dst) +int ObJsonBin::reset() { INIT_SUCC(ret); - dst.reset(); - uint64_t len = src.length(); - if (OB_FAIL(dst.append(src.ptr(), src.length()))) { - LOG_WARN("copy path stack failed", K(ret), K(src.length())); + destroy(); + return ret; +} + +int ObJsonBin::init_meta() +{ + INIT_SUCC(ret); + ObJsonBinMetaParser meta_parser(cursor_, pos_, meta_); + if (OB_FAIL(meta_parser.parse())) { + LOG_WARN("meta parse fail", K(ret)); } return ret; } -int ObJsonBin::stack_pop(ObJsonBuffer& stack) +int ObJsonBin::ObJBNodeMetaStack::copy(const ObJBNodeMetaStack& src) { INIT_SUCC(ret); - uint64_t len = stack.length(); + buf_.reset(); + uint64_t len = src.buf_.length(); + if (0 == src.size()) { + } else if (OB_FAIL(buf_.append(src.buf_.ptr(), len))) { + LOG_WARN("copy path stack failed", K(ret), K(len)); + } + return ret; +} + +int ObJsonBin::ObJBNodeMetaStack::pop() +{ + INIT_SUCC(ret); + uint64_t len = buf_.length(); if (len > 0) { - stack.set_length(len - JB_PATH_NODE_LEN); + buf_.set_length(len - JB_PATH_NODE_LEN); } else { ret = OB_ERR_UNEXPECTED; - LOG_WARN("failed pop stack.", K(ret)); + LOG_WARN("failed pop stack", K(ret)); } return ret; } -int ObJsonBin::stack_back(ObJsonBuffer& stack, ObJBNodeMeta& node, bool is_pop) +int ObJsonBin::ObJBNodeMetaStack::back(ObJBNodeMeta& node, bool is_pop) { INIT_SUCC(ret); - uint64_t len = stack.length(); + uint64_t len = buf_.length(); if (len > 0) { - char* data = (stack.ptr() + len) - JB_PATH_NODE_LEN; + char* data = (buf_.ptr() + len) - JB_PATH_NODE_LEN; node = *(reinterpret_cast(data)); if (is_pop) { - stack.set_length(len - JB_PATH_NODE_LEN); + buf_.set_length(len - JB_PATH_NODE_LEN); } } else { ret = OB_ERR_UNEXPECTED; @@ -4080,18 +3831,33 @@ int ObJsonBin::stack_back(ObJsonBuffer& stack, ObJBNodeMeta& node, bool is_pop) return ret; } -int ObJsonBin::stack_push(ObJsonBuffer& stack, const ObJBNodeMeta& node) -{ - return stack.append(reinterpret_cast(&node), JB_PATH_NODE_LEN); -} - -int ObJsonBin::stack_at(ObJsonBuffer& stack, uint32_t idx, ObJBNodeMeta& node) +int ObJsonBin::ObJBNodeMetaStack::back(ObJBNodeMeta& node) const { INIT_SUCC(ret); - uint32_t size = stack.length() / JB_PATH_NODE_LEN; + uint64_t len = buf_.length(); + if (len > 0) { + const char* data = (buf_.ptr() + len) - JB_PATH_NODE_LEN; + node = *(reinterpret_cast(data)); + } else { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("failed get stack node.", K(ret)); + } + return ret; +} + + +int ObJsonBin::ObJBNodeMetaStack::push(const ObJBNodeMeta& node) +{ + return buf_.append(reinterpret_cast(&node), JB_PATH_NODE_LEN); +} + +int ObJsonBin::ObJBNodeMetaStack::at(uint32_t idx, ObJBNodeMeta& node) const +{ + INIT_SUCC(ret); + uint32_t size = buf_.length() / JB_PATH_NODE_LEN; if (size > idx) { - char* data = (stack.ptr() + idx * JB_PATH_NODE_LEN); - node = *(reinterpret_cast(data)); + const char* data = (buf_.ptr() + idx * JB_PATH_NODE_LEN); + node = *(reinterpret_cast(data)); } else { ret = OB_ERR_UNEXPECTED; LOG_WARN("failed invalid id.", K(idx), K(size)); @@ -4099,20 +3865,20 @@ int ObJsonBin::stack_at(ObJsonBuffer& stack, uint32_t idx, ObJBNodeMeta& node) return ret; } -void ObJsonBin::stack_update(ObJsonBuffer& stack, uint32_t idx, const ObJBNodeMeta& new_value) +void ObJsonBin::ObJBNodeMetaStack::update(uint32_t idx, const ObJBNodeMeta& new_value) { - ObJBNodeMeta* node = reinterpret_cast(stack.ptr() + idx * JB_PATH_NODE_LEN); + ObJBNodeMeta* node = reinterpret_cast(buf_.ptr() + idx * JB_PATH_NODE_LEN); *node = new_value; } -int32_t ObJsonBin::stack_size(const ObJsonBuffer& stack) const +int32_t ObJsonBin::ObJBNodeMetaStack::size() const { - return stack.length() / JB_PATH_NODE_LEN; + return buf_.length() / JB_PATH_NODE_LEN; } -void ObJsonBin::stack_reset(ObJsonBuffer& stack) +void ObJsonBin::ObJBNodeMetaStack::reset() { - stack.reset(); + buf_.reset(); } ObJBVerType ObJsonVerType::get_json_vertype(ObJsonNodeType in_type) @@ -4454,6 +4220,19 @@ bool ObJsonVerType::is_scalar(ObJBVerType type) return ret_bool; } +bool ObJsonVerType::is_signed_online_integer(uint8_t type) +{ + bool res = false; + ObJBVerType vertype = static_cast(OB_JSON_TYPE_GET_INLINE(type)); + ObJsonNodeType node_type = ObJsonVerType::get_json_type(vertype); + if (! OB_JSON_TYPE_IS_INLINE(type)) { + } else if (ObJsonNodeType::J_INT == node_type + || ObJsonNodeType::J_OINT == node_type) { + res = true; + } + return res; +} + /* var size */ int ObJsonVar::read_var(const char *data, uint8_t type, uint64_t *var) { @@ -4490,6 +4269,61 @@ int ObJsonVar::read_var(const char *data, uint8_t type, uint64_t *var) return ret; } +int ObJsonVar::read_var(const ObILobCursor *cursor, int64_t offset, uint8_t type, uint64_t *var) +{ + INIT_SUCC(ret); + if (OB_ISNULL(cursor)) { + ret = OB_ERR_NULL_VALUE; + LOG_WARN("input data is null.", K(ret)); + } else { + ObJsonBinLenSize size = static_cast(type); + switch (size) { + case JBLS_UINT8: { + uint8_t data = 0; + if (OB_FAIL(cursor->read_i8(offset, reinterpret_cast(&data)))) { + LOG_WARN("read_i8 fail", K(ret), K(offset), K(type)); + } else { + *var = static_cast(data); + } + break; + } + case JBLS_UINT16: { + uint16_t data = 0; + if (OB_FAIL(cursor->read_i16(offset, reinterpret_cast(&data)))) { + LOG_WARN("read_i16 fail", K(ret), K(offset), K(type)); + } else { + *var = static_cast(data); + } + break; + } + case JBLS_UINT32: { + uint32_t data = 0; + if (OB_FAIL(cursor->read_i32(offset, reinterpret_cast(&data)))) { + LOG_WARN("read_i32 fail", K(ret), K(offset), K(type)); + } else { + *var = static_cast(data); + } + break; + } + case JBLS_UINT64: { + uint64_t data = 0; + if (OB_FAIL(cursor->read_i64(offset, reinterpret_cast(&data)))) { + LOG_WARN("read_i64 fail", K(ret), K(offset), K(type)); + } else { + *var = static_cast(data); + } + break; + } + default: { + ret = OB_NOT_SUPPORTED; + LOG_WARN("invalid var type.", K(ret), K(type)); + break; + } + } + } + return ret; +} + int ObJsonVar::append_var(uint64_t var, uint8_t type, ObJsonBuffer &result) { INIT_SUCC(ret); @@ -4601,6 +4435,41 @@ int ObJsonVar::set_var(uint64_t var, uint8_t type, char *pos) return ret; } +int ObJsonVar::set_var(ObILobCursor *cursor, int64_t offset, uint64_t var, uint8_t type) +{ + INIT_SUCC(ret); + if (OB_ISNULL(cursor)) { + ret = OB_ERR_NULL_VALUE; + LOG_WARN("output pos is null.", K(ret)); + } else { + ObJsonBinLenSize size = static_cast(type); + switch (size) { + case JBLS_UINT8: { + ret = cursor->write_i8(offset, var); + break; + } + case JBLS_UINT16: { + ret = cursor->write_i16(offset, var); + break; + } + case JBLS_UINT32: { + ret = cursor->write_i32(offset, var); + break; + } + case JBLS_UINT64: { + ret = cursor->write_i64(offset, var); + break; + } + default: { + ret = OB_NOT_SUPPORTED; + LOG_WARN("invalid var type.", K(ret), K(size)); + break; + } + } + } + return ret; +} + uint64_t ObJsonVar::get_var_size(uint8_t type) { uint64_t var_size = JBLS_MAX; @@ -4678,6 +4547,61 @@ int ObJsonVar::read_var(const char *data, uint8_t type, int64_t *var) return ret; } +int ObJsonVar::read_var(const ObILobCursor *cursor, int64_t offset, uint8_t type, int64_t *var) +{ + INIT_SUCC(ret); + if (OB_ISNULL(cursor)) { + ret = OB_ERR_NULL_VALUE; + LOG_WARN("input data is null.", K(ret)); + } else { + ObJsonBinLenSize size = static_cast(type); + switch (size) { + case JBLS_UINT8: { + int8_t data = 0; + if (OB_FAIL(cursor->read_i8(offset, &data))) { + LOG_WARN("read_i8 fail", K(ret), K(offset), K(type)); + } else { + *var = static_cast(data); + } + break; + } + case JBLS_UINT16: { + int16_t data = 0; + if (OB_FAIL(cursor->read_i16(offset, &data))) { + LOG_WARN("read_i16 fail", K(ret), K(offset), K(type)); + } else { + *var = static_cast(data); + } + break; + } + case JBLS_UINT32: { + int32_t data = 0; + if (OB_FAIL(cursor->read_i32(offset, &data))) { + LOG_WARN("read_i32 fail", K(ret), K(offset), K(type)); + } else { + *var = static_cast(data); + } + break; + } + case JBLS_UINT64: { + int64_t data = 0; + if (OB_FAIL(cursor->read_i64(offset, &data))) { + LOG_WARN("read_i64 fail", K(ret), K(offset), K(type)); + } else { + *var = static_cast(data); + } + break; + } + default: { + ret = OB_NOT_SUPPORTED; + LOG_WARN("invalid var type.", K(ret), K(type)); + break; + } + } + } + return ret; +} + uint64_t ObJsonVar::var_int2uint(int64_t var) { ObJsonBinLenSize size = static_cast(ObJsonVar::get_var_type(var)); @@ -4756,5 +4680,659 @@ uint8_t ObJsonVar::get_var_type(int64_t var) } return static_cast(lsize); } + +bool ObJsonVar::is_fit_var_type(uint64_t var, uint8_t type) +{ + return ObJsonVar::get_var_type(var) <= type; +} + +int ObJsonBin::get_key_entry(int index, uint64_t &key_offset, uint64_t &key_len) const +{ + INIT_SUCC(ret); + uint8_t var_type = entry_var_type(); + uint64_t offset = get_key_entry_offset(index); + if (OB_FAIL(ObJsonVar::read_var(cursor_, pos_ + offset, var_type, &key_offset))) { + LOG_WARN("read key_offset fail", K(ret)); + } else if (OB_FAIL(ObJsonVar::read_var(cursor_, pos_ + offset + ObJsonVar::get_var_size(var_type), var_type, &key_len))) { + LOG_WARN("read key_len fail", K(ret)); + } + return ret; +} + +int64_t ObJsonBin::get_value_entry_size() const +{ + uint8_t var_type = entry_var_type(); + return 1/*type size*/ + ObJsonVar::get_var_size(var_type) /*offset size*/; +} + +int ObJsonBin::get_value_entry(int index, uint64_t &value_offset, uint8_t &value_type) const +{ + INIT_SUCC(ret); + uint8_t var_type = entry_var_type(); + uint64_t offset = get_value_entry_offset(index); + if (OB_FAIL(ObJsonVar::read_var(cursor_, pos_ + offset, var_type, &value_offset))) { + LOG_WARN("read obj_size_ fail", K(ret), K(pos_), K(offset), K(var_type)); + } else if (OB_FAIL(cursor_->read_i8(pos_ + offset + ObJsonVar::get_var_size(var_type), reinterpret_cast(&value_type)))) { + LOG_WARN("read_i8 fail", K(ret), K(pos_), K(offset), K(var_type)); + } + return ret; +} + +int ObJsonBin::get_value(int index, ObJsonBin &value) const +{ + INIT_SUCC(ret); + uint8_t var_type = entry_var_type(); + uint64_t offset = get_value_entry_offset(index); + uint64_t value_offset = 0; + uint8_t value_type = 0; + if (OB_FAIL(get_value_entry(index, value_offset, value_type))) { + LOG_WARN("get_value_entry fail", K(ret), K(index));; + } else if (OB_JSON_TYPE_IS_INLINE(value_type)) { + offset += pos_; + } else if (is_forward_v0(value_type)) { + offset = get_extend_value_offset(value_offset); + if (OB_FAIL(get_extend_value_type(offset, value_type))) { + LOG_WARN("get_extend_value_type fail", K(ret), K(index), K(value_offset)); + } else if (! need_type_prefix(value_type)) { + offset += sizeof(uint8_t); + } + } else { + offset = pos_ + value_offset; + } + + if (OB_FAIL(ret)) { + } else if (OB_FAIL(reset_child(value, value_type, offset, var_type))) { + LOG_WARN("reset child value fail", K(ret), K(index), K(value_type), K(pos_), K(value_offset), K(offset)); + } + return ret; +} + +int ObJsonBin::set_key_entry(int index, uint64_t key_offset, uint64_t key_len, bool check) +{ + INIT_SUCC(ret); + uint8_t var_type = entry_var_type(); + uint64_t offset = get_key_entry_offset(index); + if (check && ObJsonVar::get_var_type(key_offset) > var_type) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("key_offset var type overflow", K(ret), K(key_offset), K(var_type)); + } else if (check && ObJsonVar::get_var_type(key_len) > var_type) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("key_len var type overflow", K(ret), K(key_len), K(var_type)); + } else if (OB_FAIL(ObJsonVar::set_var(cursor_, pos_ + offset, key_offset, var_type))) { + LOG_WARN("read key_offset fail", K(ret)); + } else if (OB_FAIL(ObJsonVar::set_var(cursor_, pos_ + offset + ObJsonVar::get_var_size(var_type), key_len, var_type))) { + LOG_WARN("read key_len fail", K(ret)); + } + return ret; +} + +int ObJsonBin::set_value_entry(int index, uint64_t value_offset, uint8_t value_type, bool check) +{ + INIT_SUCC(ret); + uint8_t var_type = entry_var_type(); + uint64_t offset = get_value_entry_offset(index); + if (check && (ObJsonVerType::is_signed_online_integer(value_type) ? + (ObJsonVar::get_var_type(ObJsonVar::var_uint2int(value_offset, var_type)) > var_type) : + (ObJsonVar::get_var_type(value_offset) > var_type))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("var type overflow", K(ret), K(value_offset), K(var_type), K(value_type)); + } else if (OB_FAIL(ObJsonVar::set_var(cursor_, pos_ + offset, value_offset, var_type))) { + LOG_WARN("set_var fail", K(ret)); + } else if (OB_FAIL(cursor_->write_i8(pos_ + offset + ObJsonVar::get_var_size(var_type), value_type))) { + LOG_WARN("write_i8 fail", K(ret)); + } + return ret; +} + +int ObJsonBin::set_obj_size(uint64_t obj_size) +{ + INIT_SUCC(ret); + uint64_t offset = get_obj_size_offset(); + if (OB_FAIL(ObJsonVar::set_var(cursor_, pos_ + offset, obj_size, obj_size_var_type()))) { + LOG_WARN("set_var fail", K(ret)); + } else { + meta_.set_obj_size(obj_size); + } + return ret; +} + +int ObJsonBin::set_element_count(uint64_t count) +{ + INIT_SUCC(ret); + uint64_t offset = get_element_count_offset(); + if (OB_FAIL(ObJsonVar::set_var(cursor_, pos_ + offset, count, element_count_var_type()))) { + LOG_WARN("set_var fail", K(ret)); + } else { + meta_.set_element_count(count); + } + return ret; +} + +int ObJsonBin::parse_doc_header_v0() +{ + INIT_SUCC(ret); + ObString header_data; + uint8_t type = 0; + const ObJsonBinDocHeader *header = nullptr; + if (OB_FAIL(cursor_->get(pos_, sizeof(ObJsonBinDocHeader), header_data))) { + LOG_WARN("get header data fail", K(ret), K(pos_)); + } else if (OB_ISNULL(ctx_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("ctx is null", K(ret)); + } else if (OB_FALSE_IT(type = *reinterpret_cast(header_data.ptr()))) { + } else if (J_DOC_HEADER_V0 != type) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("type not J_DOC_HEADER_V0", K(ret), K(type)); + } else if (OB_ISNULL(header = reinterpret_cast(header_data.ptr()))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get header fail", K(ret), K(header_data)); + } else { + pos_ = sizeof(ObJsonBinDocHeader); + ctx_->extend_seg_offset_ = header->extend_seg_offset_; + } + return ret; +} + + +int ObJsonBin::add_doc_header_v0(ObJsonBuffer &buffer) +{ + ObJsonBinDocHeader header; + return buffer.append(reinterpret_cast(&header), sizeof(header)); +} + +int ObJsonBin::set_doc_header_v0( + ObJsonBuffer &buffer, + int64_t extend_seg_offset) +{ + INIT_SUCC(ret); + ObJsonBinDocHeader *header = nullptr; + if (buffer.length() < sizeof(ObJsonBinDocHeader)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("length not enough", K(ret), K(buffer.length())); + } else if (OB_ISNULL(header = reinterpret_cast(buffer.ptr()))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("header is null", K(ret)); + } else { + header->extend_seg_offset_ = extend_seg_offset; + } + return ret; +} + +int ObJsonBin::set_doc_header_v0( + ObString &buffer, + int64_t extend_seg_offset) +{ + INIT_SUCC(ret); + ObJsonBinDocHeader *header = nullptr; + if (buffer.length() < sizeof(ObJsonBinDocHeader)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("length not enough", K(ret), K(buffer.length())); + } else if (OB_ISNULL(header = reinterpret_cast(buffer.ptr()))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("header is null", K(ret)); + } else { + header->extend_seg_offset_ = extend_seg_offset; + } + return ret; +} + +static int append_key_to_json_path(const ObString key, ObJsonBuffer &buf) +{ + INIT_SUCC(ret); + if (key.empty()) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("key is empty", K(ret)); + } else if (OB_FAIL(buf.append("\"", 1))) { + LOG_WARN("append quote fail", K(ret)); + } + for (int i = 0; OB_SUCC(ret) && i < key.length(); ++i) { + char c = *(key.ptr() + i); + if (c == '"' && OB_FAIL(buf.append("\\", 1))) { + LOG_WARN("append slash fail", K(ret), K(i)); + } else if (OB_FAIL(buf.append(&c, 1))) { + LOG_WARN("append fail", K(ret), K(i), K(c)); + } + } + if (OB_FAIL(ret)) { + } else if (OB_FAIL(buf.append("\"", 1))) { + LOG_WARN("append quote fail", K(ret)); + } + return ret; +} + +int ObJsonBin::get_json_path_at_iter(int index, ObString &path) const +{ + INIT_SUCC(ret); + ObJsonBuffer buf(allocator_); + + int size = node_stack_.size(); + ObJBNodeMeta node; + ObString key; + ObJsonBin bin; + + if (OB_FAIL(buf.append("$", 1))) { + LOG_WARN("append $ fail", K(ret)); + } + + for (int i = 0; OB_SUCC(ret) && i < size; ++i) { + if (OB_FAIL(node_stack_.at(i, node))) { + LOG_WARN("get node fail", K(ret), K(i), K(size)); + } else if (OB_FAIL(reset_child(bin, node.offset_))) { + LOG_WARN("reset fail", K(ret)); + } else if (ObJBVerType::J_ARRAY_V0 == OB_JSON_TYPE_GET_INLINE(node.ver_type_)) { + ObFastFormatInt ffi(node.idx_); + if (OB_FAIL(buf.append("[", 1))) { + LOG_WARN("append [ fail", K(ret)); + } else if (OB_FAIL(buf.append(ffi.ptr(), ffi.length()))) { + LOG_WARN("append idx fail", K(ret), K(node.idx_)); + } else if (OB_FAIL(buf.append("]", 1))) { + LOG_WARN("append ] fail", K(ret)); + } + } else if (OB_FAIL(bin.get_key(node.idx_, key))) { + LOG_WARN("get key fail", K(ret), K(node)); + } else if (OB_FAIL(buf.append(".", 1))) { + LOG_WARN("append . fail", K(ret), K(key)); + } else if (OB_FAIL(append_key_to_json_path(key, buf))) { + LOG_WARN("append key fail", K(ret), K(key)); + } + } + + if (index < 0 ) { + } else if (ObJBVerType::J_ARRAY_V0 == get_vertype()) { + ObFastFormatInt ffi(index); + if (OB_FAIL(buf.append("[", 1))) { + LOG_WARN("append [ fail", K(ret)); + } else if (OB_FAIL(buf.append(ffi.ptr(), ffi.length()))) { + LOG_WARN("append idx fail", K(ret), K(index)); + } else if (OB_FAIL(buf.append("]", 1))) { + LOG_WARN("append ] fail", K(ret)); + } + } else if (ObJBVerType::J_OBJECT_V0 != get_vertype()) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("must call at object or array", K(ret), K(get_type())); + } else if (OB_FAIL(get_key(index, key))) { + LOG_WARN("get key fail", K(ret), K(index)); + } else if (OB_FAIL(buf.append(".", 1))) { + LOG_WARN("append . fail", K(ret), K(key)); + } else if (OB_FAIL(append_key_to_json_path(key, buf))) { + LOG_WARN("append key fail", K(ret), K(key)); + } + + if (OB_SUCC(ret)) { + buf.get_result_string(path); + } + return ret; +} + +int ObJsonBin::record_extend_inplace_update_offset(int index, int64_t value_offset, int64_t value_len, uint8_t value_type) +{ + INIT_SUCC(ret); + ObJsonBinUpdateCtx *update_ctx = get_update_ctx(); + ObString path; + ObString new_data; + ObString value; + if (OB_ISNULL(update_ctx)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("update ctx is null", K(ret), K(index), K(pos_)); + } else if (OB_FAIL(get_json_path_at_iter(index, path))) { + LOG_WARN("get_json_path_at_iter fail", K(ret)); + } else if (OB_FAIL(cursor_->get(value_offset + (need_type_prefix(value_type) ? 0 : 1), value_len - (need_type_prefix(value_type) ? 0 : 1), new_data))) { + LOG_WARN("get value data fail", K(ret), K(index), K(value_type)); + } else if (OB_FAIL(ob_write_string(*allocator_, new_data, value))) { + LOG_WARN("copy value data fail", K(ret), K(value), K(value_offset), K(pos_)); + } else if (OB_FAIL(update_ctx->record_diff(ObJsonDiffOp::REPLACE, value_type, path, value))) { + LOG_WARN("record diff fail", K(ret), K(index), K(pos_), K(value_type), K(value_offset), K(path)); + } else if (OB_FAIL(update_ctx->record_binary_diff(value_offset, value_len))) { + LOG_WARN("record diff fail", K(ret), K(index), K(pos_), K(value_type), K(value_offset), K(path)); + } + return ret; +} + +int ObJsonBin::record_inline_update_offset(int index) +{ + INIT_SUCC(ret); + ObJsonBinUpdateCtx *update_ctx = get_update_ctx(); + uint64_t inline_value_offset = get_value_entry_offset(index); + uint64_t inline_value = 0; + uint8_t value_type = 0; + ObString path; + ObString new_data; + ObString value; + if (OB_FAIL(get_value_entry(index, inline_value, value_type))) { + LOG_WARN("get value entry fail", K(ret), K(index)); + } else if (OB_ISNULL(update_ctx)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("update ctx is null", K(ret), K(index), K(pos_)); + } else if (OB_FAIL(get_json_path_at_iter(index, path))) { + LOG_WARN("get_json_path_at_iter fail", K(ret)); + } else if (OB_FAIL(cursor_->get(pos_ + inline_value_offset, entry_var_size(), new_data))) { + LOG_WARN("get value data fail", K(ret), K(inline_value), K(value_type)); + } else if (OB_FAIL(ob_write_string(*allocator_, new_data, value))) { + LOG_WARN("copy value data fail", K(ret), K(value), K(inline_value_offset), K(pos_)); + } else if (OB_FAIL(update_ctx->record_inline_diff(ObJsonDiffOp::REPLACE, value_type, path, entry_var_type(), value))) { + LOG_WARN("record diff fail", K(ret), K(index), K(pos_), K(value_type), K(inline_value), K(path)); + } else if (OB_FAIL(update_ctx->record_binary_diff(pos_ + inline_value_offset, entry_var_size() + OB_JSON_BIN_VALUE_TYPE_LEN))) { + LOG_WARN("record diff fail", K(ret), K(index), K(pos_), K(value_type), K(inline_value), K(path)); + } + return ret; +} + +int ObJsonBin::record_inplace_update_offset(int index, ObJsonBin *new_value, bool is_record_header_binary) +{ + INIT_SUCC(ret); + ObJsonBinUpdateCtx *update_ctx = get_update_ctx(); + uint64_t value_offset = 0; + uint64_t value_len = 0; + uint8_t value_type = 0; + ObString path; + ObString new_data; + ObString value; + if (OB_FAIL(get_value_entry(index, value_offset, value_type))) { + LOG_WARN("get value entry fail", K(ret), K(index)); + } else if (OB_FAIL(new_value->get_area_size(value_len))) { + LOG_WARN("get_area_size fail", K(ret), K(value_type), K(value_offset), K(index)); + } else if (OB_ISNULL(update_ctx)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("update ctx is null", K(ret), K(index), K(pos_)); + } else if (OB_FAIL(get_json_path_at_iter(index, path))) { + LOG_WARN("get_json_path_at_iter fail", K(ret)); + } else if (OB_FAIL(new_value->get_value_binary(new_data))) { + LOG_WARN("get_value_binary fail", K(ret)); + } else if (OB_FAIL(ob_write_string(*allocator_, new_data, value))) { + LOG_WARN("copy value data fail", K(ret), K(value_offset), K(value_len)); + } else if (OB_FAIL(update_ctx->record_diff(ObJsonDiffOp::REPLACE, value_type, path, value))) { + LOG_WARN("record diff fail", K(ret), K(index), K(pos_), K(value_type), K(value_offset), K(value_len), K(path)); + } else if (OB_FAIL(update_ctx->record_binary_diff(pos_ + value_offset, value_len))) { + LOG_WARN("record diff fail", K(ret), K(index), K(pos_), K(value_type), K(value_offset), K(value_len), K(path)); + } else if (is_record_header_binary && OB_FAIL(update_ctx->record_binary_diff( + pos_ + get_value_entry_offset(index), + entry_var_size() + OB_JSON_BIN_VALUE_TYPE_LEN))) { + LOG_WARN("record diff fail", K(ret), K(index), K(pos_), K(value_type), K(value_offset), K(value_len), K(path)); + } + return ret; +} + +int ObJsonBin::record_append_update_offset(int index, int64_t value_offset, int64_t value_len, uint8_t value_type) +{ + INIT_SUCC(ret); + ObJsonBinUpdateCtx *update_ctx = get_update_ctx(); + ObString path; + ObString new_data; + ObString value; + if (OB_ISNULL(update_ctx)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("update ctx is null", K(ret), K(index), K(pos_)); + } else if (OB_FAIL(get_json_path_at_iter(index, path))) { + LOG_WARN("get_json_path_at_iter fail", K(ret)); + } else if (OB_FAIL(cursor_->get(value_offset + (need_type_prefix(value_type) ? 0 : 1), value_len - (need_type_prefix(value_type) ? 0 : 1), new_data))) { + LOG_WARN("get value data fail", K(ret), K(value_offset), K(value_len)); + } else if (OB_FAIL(ob_write_string(*allocator_, new_data, value))) { + LOG_WARN("copy value data fail", K(ret), K(value_offset), K(value_len)); + } else if (OB_FAIL(update_ctx->record_diff(ObJsonDiffOp::REPLACE, value_type, path, value))) { + LOG_WARN("record diff fail", K(ret), K(index), K(pos_), K(value_type), K(value_offset), K(value_len), K(path)); + } else if (OB_FAIL(update_ctx->record_binary_diff(value_offset, value_len))) { + LOG_WARN("record diff fail", K(ret), K(index), K(pos_), K(value_type), K(value_offset), K(value_len), K(path)); + } else if (OB_FAIL(update_ctx->record_binary_diff( + pos_ + get_value_entry_offset(index), + entry_var_size() + OB_JSON_BIN_VALUE_TYPE_LEN))) { + LOG_WARN("record diff fail", K(ret), K(index), K(pos_), K(value_type), K(value_offset), K(value_len), K(path)); + } + return ret; +} + +int ObJsonBin::record_remove_offset(int index) +{ + INIT_SUCC(ret); + ObJsonBinUpdateCtx *update_ctx = get_update_ctx(); + uint64_t value_entry_end_offset = get_value_entry_offset(this->element_count()); + ObString path; + if (OB_ISNULL(update_ctx)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("update ctx is null", K(ret), K(index), K(pos_)); + } else if (OB_FAIL(get_json_path_at_iter(index, path))) { + LOG_WARN("get_json_path_at_iter fail", K(ret)); + } else if (OB_FAIL(update_ctx->record_remove_diff(path))) { + LOG_WARN("record diff fail", K(ret), K(index), K(pos_), K(path)); + } else if (OB_FAIL(update_ctx->record_binary_diff(pos_, value_entry_end_offset))) { + LOG_WARN("record diff fail", K(ret), K(index), K(pos_), K(path)); + } + return ret; +} + +int ObJsonBin::should_pack_diff(bool &is_should_pack) const +{ + INIT_SUCC(ret); + ObJsonBinUpdateCtx *update_ctx = get_update_ctx(); + uint64_t total_len = cursor_->get_length(); + uint64_t ext_len = total_len - get_extend_seg_offset(); + uint64_t ori_len = total_len - ext_len; + if (0 == total_len || 0 == ori_len) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("binary is empty", K(ret), K(ori_len), K(ori_len), K(ext_len)); + } else if (nullptr == update_ctx || update_ctx->is_rebuild_all_) { + is_should_pack = false; + } else if (0 == pos_) { + // if pos is zero, means no json bin doc header, and means that old json data + // old json data is clob, can not use chunk size, so can not do partial update + is_should_pack = false; + } else if (0 == ext_len || update_ctx->is_no_update()) { // no append update + is_should_pack = true; + } else { + // if ext_len does not exceed 30% of ori_len, should pack diff + is_should_pack = (((double)(ext_len)) / ori_len) < 0.3; + } + return ret; +} + +int ObJsonBinMeta::to_header(ObJsonBinHeader &header) +{ + INIT_SUCC(ret); + header.type_ = get_type(); + header.entry_size_ = entry_var_type(); + header.count_size_ = element_count_var_type(); + header.obj_size_size_ = obj_size_var_type(); + header.is_continuous_ = is_continuous(); + return ret; +} + +int ObJsonBinMeta::to_header(ObJsonBuffer &buffer) +{ + INIT_SUCC(ret); + ObJsonBinHeader header; + uint64_t key_entry_size = 0; + uint64_t value_entry_size = 0; + if (OB_UNLIKELY(ObJsonNodeType::J_OBJECT != json_type() && ObJsonNodeType::J_ARRAY != json_type())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("type not object or array", K(ret), "json type", json_type()); + } else if (OB_FAIL(to_header(header))) { + LOG_WARN("to_header fail", K(ret)); + } else if (OB_FAIL(buffer.append(reinterpret_cast(&header), sizeof(header)))) { + LOG_WARN("append header to buffer fail", K(ret)); + } else if (OB_FAIL(ObJsonVar::append_var(element_count(), element_count_var_type(), buffer))) { + LOG_WARN("failed to append array header member count", K(ret)); + } else if (OB_FAIL(ObJsonVar::append_var(obj_size(), obj_size_var_type(), buffer))) { + LOG_WARN("failed to append array header array size", K(ret)); + } else { + if (ObJsonNodeType::J_OBJECT == json_type()) { + key_entry_size = element_count() * (entry_var_size() * 2); + } + value_entry_size = element_count() * (entry_var_size() + OB_JSON_BIN_VALUE_TYPE_LEN); + if (OB_FAIL(buffer.reserve(key_entry_size + value_entry_size))) { + LOG_WARN("reserve buffer fail", K(ret), K(key_entry_size), K(value_entry_size)); + } else if (OB_FAIL(buffer.set_length(buffer.length() + key_entry_size + value_entry_size))) { + LOG_WARN("reserve buffer fail", K(ret), K(key_entry_size), K(value_entry_size)); + } + } + return ret; +} + +uint64_t ObJsonBinMeta::get_obj_size_offset() const +{ + return sizeof(ObJsonBinObjHeader) + + ObJsonVar::get_var_size(element_count_var_type()); +} + +uint64_t ObJsonBinMeta::get_element_count_offset() const +{ + return sizeof(ObJsonBinObjHeader); +} + +int ObJsonBinMeta::calc_entry_array() +{ + INIT_SUCC(ret); + key_offset_start_ = sizeof(ObJsonBinObjHeader) + + ObJsonVar::get_var_size(element_count_var_type()) + + ObJsonVar::get_var_size(obj_size_var_type()); + value_offset_start_ = key_offset_start_; + if (ObJsonNodeType::J_OBJECT == json_type()) { + value_offset_start_ += element_count() * (entry_var_size() * 2); + } + return ret; +} + +uint64_t ObJsonBinMeta::get_value_entry_offset(int index) const +{ + return value_offset_start_ + index * (entry_var_size() + OB_JSON_BIN_VALUE_TYPE_LEN); +} + +uint64_t ObJsonBinMeta::get_key_entry_offset(int index) const +{ + return key_offset_start_ + index * (entry_var_size() * 2); +} + +uint64_t ObJsonBinMeta::entry_var_size() const +{ + return ObJsonVar::get_var_size(entry_var_type()); +} + +uint64_t ObJsonBinMeta::obj_size_var_size() const +{ + return ObJsonVar::get_var_size(obj_size_var_type()); +} + +uint64_t ObJsonBinMeta::element_count_var_size() const +{ + return ObJsonVar::get_var_size(element_count_var_type()); +} + +int ObJsonBinMetaParser::parse() +{ + INIT_SUCC(ret); + if (OB_ISNULL(cursor_)) { // skip + } else if (OB_FAIL(parse_type_())) { + LOG_WARN("parse type fail", K(ret), K(offset_)); + } else if (OB_FAIL(parse_header_())) { + LOG_WARN("parse header fail", K(ret), K(offset_)); + } + return ret; +} + +int ObJsonBinMetaParser::parse_type_() +{ + INIT_SUCC(ret); + // private method, null and length check at caller + uint8_t header_type = 0; + if (OB_FAIL(cursor_->read_i8(offset_, reinterpret_cast(&header_type)))) { + LOG_WARN("read_i8 fail", K(ret), K(offset_)); + } else { + meta_.set_type(header_type); + } + return ret; +} + +int ObJsonBinMetaParser::parse_header_() +{ + INIT_SUCC(ret); + if (ObJsonNodeType::J_OBJECT != meta_.json_type() && ObJsonNodeType::J_ARRAY != meta_.json_type()) { + } else if (ObJBVerType::J_OBJECT_V0 == meta_.vertype() || ObJBVerType::J_ARRAY_V0 == meta_.vertype()) { + ret = parse_header_v0_(); + } else { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("not support vertype", K(ret), K(meta_.vertype())); + } + return ret; +} + +int ObJsonBinMetaParser::parse_header_v0_() +{ + INIT_SUCC(ret); + ObString header_data; + const ObJsonBinHeader *header = nullptr; + uint64_t obj_size = 0; + uint64_t element_count = 0; + int offset = offset_; + if (OB_FAIL(cursor_->get(offset, sizeof(ObJsonBinHeader), header_data))) { + LOG_WARN("get data fail", K(ret), K(offset)); + } else if (OB_ISNULL(header = reinterpret_cast(header_data.ptr()))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("cast header is null", K(ret), K(offset)); + } else if (OB_FALSE_IT(offset += sizeof(ObJsonBinHeader))) { + } else if (OB_FAIL(ObJsonVar::read_var(cursor_, offset, header->count_size_, &element_count))) { + LOG_WARN("read element_count_ fail", K(ret), KPC(header)); + } else if (OB_FALSE_IT(offset += ObJsonVar::get_var_size(header->count_size_))) { + } else if (OB_FAIL(ObJsonVar::read_var(cursor_, offset, header->obj_size_size_, &obj_size))) { + LOG_WARN("read obj_size_ fail", K(ret), KPC(header)); + } else { + offset += ObJsonVar::get_var_size(header->obj_size_size_); + meta_.set_type(header->type_); + meta_.set_entry_var_type(header->entry_size_); + meta_.set_element_count_var_type(header->count_size_); + meta_.set_obj_size_var_type(header->obj_size_size_); + meta_.set_is_continuous(header->is_continuous_); + meta_.set_obj_size(obj_size); + meta_.set_element_count(element_count); + if (OB_FAIL(meta_.calc_entry_array())) { + LOG_WARN("calc_entry_array fail", K(ret)); + } + } + return ret; +} + +const char *ObJsonBin::get_data() const +{ + INIT_SUCC(ret); + ObString data; + if (OB_FAIL(cursor_->get(pos_ + meta_.str_data_offset_, element_count(), data))) { + LOG_WARN("get data fail", K(ret), K(pos_), K(meta_)); + } + return data.ptr(); +} + +int ObJsonBin::get_data(ObString &data) +{ + INIT_SUCC(ret); + if (OB_FAIL(cursor_->get(pos_ + meta_.str_data_offset_, element_count(), data))) { + LOG_WARN("get data fail", K(ret), K(pos_), K(meta_)); + } + return ret; +} + +int ObJsonBin::get_extend_value_type(uint64_t offset, uint8_t &value_type) const +{ + INIT_SUCC(ret); + if (OB_FAIL(cursor_->read_i8(offset, reinterpret_cast(&value_type)))) { + LOG_WARN("read_i8 fail", K(ret), K(offset)); + } + return ret; +} + +int ObJsonBin::set_current(const ObString &data, int64_t offset) +{ + INIT_SUCC(ret); + if (OB_FAIL(cursor_->reset_data(data))) { + LOG_WARN("reset_data fail", K(ret), K(offset), K(pos_), KPC(cursor_)); + } else { + pos_ = offset; + } + return ret; +} + +ObJsonBinCtx::~ObJsonBinCtx() +{ + if (OB_NOT_NULL(update_ctx_) && is_update_ctx_alloc_) { + update_ctx_->~ObJsonBinUpdateCtx(); + } +} + } // namespace common } // namespace oceanbase diff --git a/deps/oblib/src/lib/json_type/ob_json_bin.h b/deps/oblib/src/lib/json_type/ob_json_bin.h index 296def9eb2..e332c48d0f 100644 --- a/deps/oblib/src/lib/json_type/ob_json_bin.h +++ b/deps/oblib/src/lib/json_type/ob_json_bin.h @@ -20,6 +20,7 @@ #include "lib/container/ob_array.h" #include "lib/container/ob_vector.h" #include "lib/number/ob_number_v2.h" // for number::ObNumber +#include "lib/utility/ob_fast_convert.h" namespace oceanbase { namespace common { @@ -95,7 +96,9 @@ typedef struct ObJsonBinHeader { count_size_(0), obj_size_size_(0), is_continuous_(0), - reserved_(0) {} + reserved_(0) + { + } uint8_t type_; // node type for current node uint8_t entry_size_ : 2; // the size describe var size of key_entry,val_entry uint8_t count_size_ : 2; // the size describe var size of element count @@ -103,9 +106,37 @@ typedef struct ObJsonBinHeader { uint8_t is_continuous_ : 1; // memory of current node and subtree is continous uint8_t reserved_ : 1; // reserved bit char used_size_[]; // var size + +public: + TO_STRING_KV( + K(type_), + K(entry_size_), + K(count_size_), + K(obj_size_size_), + K(is_continuous_), + K(reserved_)); + } ObJsonBinHeader; +struct ObJsonBinDocHeader +{ + uint64_t type_ : 8; + uint64_t reserved_ : 14; + uint64_t extend_seg_offset_ : 42; + + TO_STRING_KV( + K(type_), + K(reserved_), + K(extend_seg_offset_)); + + ObJsonBinDocHeader() : + type_(J_DOC_HEADER_V0), + reserved_(0), + extend_seg_offset_(0) + {} +}; + typedef ObJsonBinHeader ObJsonBinObjHeader; typedef ObJsonBinHeader ObJsonBinArrHeader; @@ -113,6 +144,8 @@ static const int OB_JSON_BIN_HEADER_LEN = 2; // actual size of ObJsonBinHeader static const int OB_JSON_BIN_OBJ_HEADER_LEN = 2; // actual size of ObJsonBinObjHeader static const int OB_JSON_BIN_ARR_HEADER_LEN = 2; // actual size of ObJsonBinArrHeader +static const int OB_JSON_BIN_VALUE_TYPE_LEN = sizeof(uint8_t); + class ObJsonVerType { public: static ObJsonNodeType get_json_type(ObJBVerType type); @@ -123,49 +156,352 @@ public: static bool is_custom(ObJBVerType type); static bool is_scalar(ObJBVerType type); static bool is_opaque_or_string(ObJBVerType type); + static bool is_signed_online_integer(uint8_t type); private: DISALLOW_COPY_AND_ASSIGN(ObJsonVerType); }; +class ObJsonBinUpdateCtx; +struct ObJsonBinCtx +{ +public: + ObJsonBinCtx(): + extend_seg_offset_(0), + update_ctx_(nullptr), + is_update_ctx_alloc_(false) + {} + + ~ObJsonBinCtx(); + + TO_STRING_KV( + K(extend_seg_offset_), K(is_update_ctx_alloc_)); + + int64_t extend_seg_offset_; + ObJsonBinUpdateCtx *update_ctx_; + bool is_update_ctx_alloc_; +}; + +struct ObJsonBinMeta +{ +public: + ObJsonBinMeta() + : + type_(0), + entry_size_(0), + count_size_(0), + obj_size_size_(0), + is_continuous_(0), + element_count_(0), + bytes_(0), + obj_size_(0), + key_offset_start_(0), + value_offset_start_(0), + field_type_(ObObjType::ObNullType), + str_data_offset_(0) + {} + + void reset() + { + type_ = 0; + entry_size_ = 0; + count_size_ = 0; + obj_size_size_ = 0; + is_continuous_ = 0; + element_count_ = 0; + bytes_ = 0; + obj_size_ = 0; + key_offset_start_ = 0; + value_offset_start_ = 0; + field_type_ = ObObjType::ObNullType; + str_data_offset_ = 0; + } + + void set_type(ObJBVerType vertype, bool is_inline) + { + uint8_t type = static_cast(vertype); + if (is_inline) { + type |= OB_JSON_TYPE_INLINE_MASK; + } + type_ = type; + } + + void set_type(uint8_t type) + { + type_ = type; + } + + uint8_t get_type() const + { + return type_; + } + + ObJsonNodeType json_type() const + { + return ObJsonVerType::get_json_type(vertype()); + } + + ObJBVerType vertype() const + { + return static_cast(OB_JSON_TYPE_GET_INLINE(type_)); + } + + bool is_inline_vertype() const + { + return OB_JSON_TYPE_IS_INLINE(type_); + } + + void set_is_continuous(bool is_continuous) + { + is_continuous_ = is_continuous; + } + bool is_continuous() const + { + return is_continuous_; + } + + + void set_entry_var_type(uint8_t var_type) + { + entry_size_ = var_type; + } + + uint8_t entry_var_type() const + { + return entry_size_; + } + + uint64_t entry_var_size() const; + + + void set_obj_size_var_type(uint8_t var_type) + { + obj_size_size_ = var_type; + } + + uint8_t obj_size_var_type() const + { + return obj_size_size_; + } + + uint64_t obj_size_var_size() const; + void set_obj_size(uint64_t size) + { + obj_size_ = size; + } + + uint64_t obj_size() const + { + return obj_size_; + } + + uint64_t get_obj_size_offset() const; + + uint64_t element_count() const { return element_count_; } + void set_element_count(uint64_t count) + { + element_count_ = count; + } + + void set_element_count_var_type(uint8_t var_type) + { + count_size_ = var_type; + } + + uint8_t element_count_var_type() const + { + return count_size_; + } + + uint64_t element_count_var_size() const; + uint64_t get_element_count_offset() const; + + int to_header(ObJsonBinHeader &buffer); + int to_header(ObJsonBuffer &buffer); + + int calc_entry_array(); + + uint64_t get_value_entry_offset(int index) const; + uint64_t get_key_entry_offset(int index) const; + +public: + TO_STRING_KV( + K(type_), + K(entry_size_), + K(count_size_), + K(obj_size_size_), + K(is_continuous_), + K(element_count_), + K(bytes_), + K(obj_size_), + K(key_offset_start_), + K(value_offset_start_), + K(field_type_), + K(str_data_offset_)); + +public: + // ObJsonBinHeader header_; + // use ObJsonBinHeader as filed will report error : + // 'field 'header_' with variable sized type 'oceanbase::common::ObJsonBinHeader' + // not at the end of a struct or class is a GNU extension + // so not used as field. + // below field copy from ObJsonBinHeader. not directly use it, use method instead. + uint8_t type_; + uint8_t entry_size_ : 2; + uint8_t count_size_ : 2; + uint8_t obj_size_size_ : 2; + uint8_t is_continuous_ : 1; + + // elem count for obj or array, length for string or opaque + uint64_t element_count_; + uint64_t bytes_; // acutal used bytes for curr iter node, inlined node will set 0 + + uint64_t obj_size_; + uint64_t key_offset_start_; + uint64_t value_offset_start_; + ObObjType field_type_; // field type for opaque + + uint64_t str_data_offset_; +}; + +class ObJsonBinMetaParser { + +public: + ObJsonBinMetaParser(const ObILobCursor *cursor, int64_t offset, ObJsonBinMeta &meta) + : cursor_(cursor), + offset_(offset), + meta_(meta) {} + int parse(); + +private: + int parse_type_(); + + int parse_header_(); + + int parse_header_v0_(); + +public: + TO_STRING_KV( + KP(cursor_), + K(offset_)); + +private: + const ObILobCursor *cursor_; + int64_t offset_; + ObJsonBinMeta &meta_; +}; + +class ObJsonBinSerializer; + class ObJsonBin : public ObIJsonBase { public: + friend class ObJsonBinSerializer; ObJsonBin() - : ObIJsonBase(NULL), - allocator_(NULL), - result_(NULL), - stack_buf_(NULL) + : ObIJsonBase(nullptr), + allocator_(nullptr), + meta_(), + cursor_(nullptr), + local_cursor_(), + pos_(0), + node_stack_(nullptr), + data_(nullptr), + int_val_(0), + ctx_(nullptr), + is_alloc_ctx_(false), + is_seek_only_(true), + is_schema_(false) { + cursor_ = &local_cursor_; } - explicit ObJsonBin(common::ObIAllocator *allocator) + explicit ObJsonBin(ObIAllocator *allocator) : ObIJsonBase(allocator), allocator_(allocator), - result_(allocator_), - stack_buf_(allocator_) + meta_(), + cursor_(nullptr), + local_cursor_(), + pos_(0), + node_stack_(allocator), + data_(nullptr), + int_val_(0), + ctx_(nullptr), + is_alloc_ctx_(false), + is_seek_only_(true), + is_schema_(false) { + cursor_ = &local_cursor_; } - explicit ObJsonBin(const char *data, const int64_t length, common::ObIAllocator *allocator = NULL) + + explicit ObJsonBin(ObIAllocator *allocator, ObJsonBinCtx *ctx, bool is_alloc_ctx = false) : ObIJsonBase(allocator), allocator_(allocator), - result_(allocator), - curr_(length, data), - is_alloc_(false), - stack_buf_(allocator) + meta_(), + cursor_(nullptr), + local_cursor_(), + pos_(0), + node_stack_(allocator), + data_(nullptr), + int_val_(0), + ctx_(ctx), + is_alloc_ctx_(is_alloc_ctx), + is_seek_only_(true), + is_schema_(false) { + cursor_ = &local_cursor_; + } + + explicit ObJsonBin(const char *data, const int64_t length, ObIAllocator *allocator) + : ObIJsonBase(allocator), + allocator_(allocator), + meta_(), + cursor_(nullptr), + local_cursor_(data, length), + pos_(0), + node_stack_(allocator), + data_(nullptr), + int_val_(0), + ctx_(0), + is_alloc_ctx_(false), + is_seek_only_(true), + is_schema_(false) + { + cursor_ = &local_cursor_; + } + + explicit ObJsonBin(const char *data, const int64_t length, ObJsonBinCtx *ctx) + : ObIJsonBase(nullptr), + allocator_(nullptr), + meta_(), + cursor_(nullptr), + local_cursor_(data, length), + pos_(0), + node_stack_(nullptr), + data_(nullptr), + int_val_(0), + ctx_(ctx), + is_alloc_ctx_(false), + is_seek_only_(true), + is_schema_(false) + { + cursor_ = &local_cursor_; + } + + virtual ~ObJsonBin() { + destroy(); } - virtual ~ObJsonBin() { result_.reset(); stack_buf_.reset(); } OB_INLINE bool get_boolean() const override { return static_cast(uint_val_); } OB_INLINE double get_double() const override { return double_val_; } OB_INLINE float get_float() const override { return float_val_; }; OB_INLINE int64_t get_int() const override { return int_val_; } OB_INLINE uint64_t get_uint() const override { return uint_val_; } - OB_INLINE const char *get_data() const override { return data_; } + OB_INLINE uint64_t get_inline_value() const { return inline_value_; } + //OB_INLINE const char *get_data() const override { return data_; } + OB_INLINE uint64_t get_data_length() const override { - uint64_t data_length = element_count_; + uint64_t data_length = get_element_count(); ObJsonNodeType type = json_type(); if (type == ObJsonNodeType::J_ARRAY || type == ObJsonNodeType::J_OBJECT) { - data_length = bytes_; + data_length = meta_.bytes_; } return data_length; } @@ -180,28 +516,47 @@ public: int get_obtime(ObTime &t) const override; OB_INLINE ObJBVerType get_vertype() const { - return static_cast(OB_JSON_TYPE_GET_INLINE(type_)); + return static_cast(OB_JSON_TYPE_GET_INLINE(get_type())); } + + OB_INLINE uint8_t get_type() const { return meta_.get_type(); } + + OB_INLINE bool is_inline_vertype() const + { + return OB_JSON_TYPE_IS_INLINE(get_type()); + } + OB_INLINE ObJsonInType get_internal_type() const override { return ObJsonInType::JSON_BIN; } - OB_INLINE uint64_t element_count() const override { return element_count_; } - OB_INLINE uint64_t get_used_bytes() const { return bytes_; } // return acutal used bytes for curr iter - uint64_t get_serialize_size() const; + OB_INLINE uint64_t element_count() const override { return meta_.element_count(); } + OB_INLINE uint64_t get_element_count() const { return meta_.element_count(); } + int get_used_bytes(uint64_t &size) const; OB_INLINE ObJsonNodeType json_type() const override { return static_cast(ObJsonVerType::get_json_type(get_vertype())); } OB_INLINE ObObjType field_type() const override { - return field_type_; + return meta_.field_type_; + } + int get_total_value(ObStringBuffer &res) const; + + virtual uint64_t member_count() const override + { + return (meta_.get_type() == static_cast(ObJsonNodeType::J_ARRAY) || meta_.get_type() == static_cast(ObJsonNodeType::J_OBJECT)) ? + element_count() : 1; } int get_array_element(uint64_t index, ObIJsonBase *&value) const override; int get_object_value(uint64_t index, ObIJsonBase *&value) const override; int get_object_value(const ObString &key, ObIJsonBase *&value) const override; + int get_object_value(uint64_t index, ObString &key, ObIJsonBase *&value) const override; int get_key(uint64_t index, common::ObString &key_out) const override; - int get_raw_binary(common::ObString &out, ObIAllocator *allocator = NULL) const; - int get_use_size(uint64_t& used_size) const; - int get_max_offset(const char* data, ObJsonNodeType cur_node, uint64_t& max_offset) const ; + int raw_binary(common::ObString &out, ObIAllocator *allocator) const; + int get_raw_binary(common::ObString &out, ObIAllocator *allocator) const; + int get_raw_binary_v0(common::ObString &out, ObIAllocator *allocator) const; + int get_value_binary(ObString &out) const; + int get_area_size(uint64_t& size) const; + int get_serialize_size(uint64_t &size) const; int array_remove(uint64_t index) override; int object_remove(const common::ObString &key) override; int replace(const ObIJsonBase *old_node, ObIJsonBase *new_node) override; @@ -239,8 +594,23 @@ public: static OB_INLINE ObJBVerType get_otimestamptz_vertype() { return J_OTIMESTAMPTZ_V0; } static OB_INLINE ObJBVerType get_ointervalDS_vertype() { return J_ODAYSECOND_V0; } static OB_INLINE ObJBVerType get_ointervalYM_vertype() { return J_OYEARMONTH_V0; } + static OB_INLINE bool is_doc_header(uint8_t type) { return is_doc_header_v0(type); } +private: + static OB_INLINE bool is_forward_v0(uint8_t type) { return J_FORWARD_V0 == type; } + static OB_INLINE bool is_doc_header_v0(uint8_t type) { return J_DOC_HEADER_V0 == type; } + static int add_doc_header_v0(ObJsonBuffer &buffer); + static int set_doc_header_v0(ObJsonBuffer &buffer,int64_t extend_seg_offset); + static int set_doc_header_v0(ObString &buffer, int64_t extend_seg_offset); public: - int64_t to_string(char *buf, int64_t len) const; + TO_STRING_KV( + K(meta_), + K(pos_), + K(int_val_), + K(uint_val_), + K(double_val_), + K(ctx_) + ); + /* parse json tree to json bin @param[in] Json_tree @@ -255,22 +625,6 @@ public: */ int to_tree(ObJsonNode *&json_tree); - /* - get json binary raw data with copy - always return raw data with common header - @param[out] buf Json binary raw data - @param[in] alloctor memory alloctor - @return Returns OB_SUCCESS on success, error code otherwise. - */ - int raw_binary(ObString &buf, ObIAllocator *allocator) const; - - /* - get json binary raw data without copy - @param[out] buf Json binary raw data - @return Returns OB_SUCCESS on success, error code otherwise. - */ - int raw_binary(ObString &buf) const; - /* get json binary raw data base on iter without copy @param[out] buf Json binary raw data @@ -285,12 +639,58 @@ public: */ int reset_iter(); + /** + * init current bin with type and data + * type is vertype, but may inline + * buffer is the data area + * pos is start parsing point for current node + * value_entry_var_type is used for inline value, indicate the data length need be read + */ + int reset( + const uint8_t type, + const ObString &buffer, + const int64_t pos, + const uint8_t value_entry_var_type, + ObJsonBinCtx *ctx); + + /** + * reset current bin to specify type and pos + * type and value_entry_var_type is used for inline value + */ + int reset(const uint8_t type, const int64_t pos, const uint8_t value_entry_var_type); + + /** + * reset current bin to specify pos + */ + int reset(const int64_t pos); + + /** + * init current bin with type and data + * buf and len is the data area + * pos is start parsing point for current node + * must ensure the first byte is vertype + */ + int reset(const ObString &buffer, int64_t pos, ObJsonBinCtx *ctx); + int reset(ObJsonBinCtx *ctx, int64_t pos); + + /** + * equal to reset , but buf will set curr_ + */ + int reset_child( + ObJsonBin &child, + const uint8_t child_type, + const int64_t child_pos, + const uint8_t value_entry_var_type) const; + int reset_child(ObJsonBin &child, const int64_t child_pos) const; + /* move iter to parent @return Returns OB_SUCCESS on success, error code otherwise. */ int move_parent_iter(); // move iter to parent + int get_parent(ObIJsonBase *& parent) const override; + /* move iter to child node by index @param[in] index The index. @@ -381,10 +781,11 @@ public: */ int rebuild(); - int get_parent(ObIJsonBase *& parent) const override - { - return OB_NOT_SUPPORTED; - } + /** + * Rebuild the whole json binary to buffer + */ + int rebuild(ObJsonBuffer &buffer) const; + /* Rebuild the json binary at iter position, and copy to string This function won't change the data itself. @@ -402,13 +803,20 @@ public: // release resource void destroy(); + void set_is_schema(bool is_schema) {is_schema_ = is_schema;} + virtual int reset(); + OB_INLINE const ObILobCursor* get_cursor() const { return cursor_; } + OB_INLINE ObILobCursor* get_cursor() { return cursor_; } + OB_INLINE void set_cursor(ObILobCursor *cursor) { cursor_ = cursor; } + OB_INLINE ObJsonBinCtx* get_ctx() const { return ctx_; } + OB_INLINE ObJsonBinUpdateCtx* get_update_ctx() const { return nullptr == ctx_ ? nullptr : ctx_->update_ctx_; } - virtual uint64_t member_count() const override - { - return (json_type() == ObJsonNodeType::J_ARRAY || json_type() == ObJsonNodeType::J_OBJECT) ? - element_count() : 1; - } + // get flag for json doc used only for search + OB_INLINE bool get_seek_flag() const { return is_seek_only_; } + // set flag for json doc + OB_INLINE void set_seek_flag(bool is_seek_only) { is_seek_only_ = is_seek_only; } + int clone_new_node(ObJsonBin*& res, common::ObIAllocator *allocator) const; private: // used as stack struct ObJBNodeMeta { @@ -424,153 +832,272 @@ private: ObJBNodeMeta() : ver_type_(0), size_type_(0), entry_type_(0), reserve(0), idx_(0), offset_(0), obj_size_(0) {} ObJBNodeMeta(const ObJBNodeMeta& src): ver_type_(src.ver_type_), size_type_(src.size_type_), entry_type_(src.entry_type_), reserve(0), idx_(src.idx_), offset_(src.offset_), obj_size_(src.obj_size_) {} + + TO_STRING_KV( + K(ver_type_), + K(size_type_), + K(entry_type_), + K(reserve), + K(idx_), + K(offset_), + K(obj_size_)); }; typedef struct ObJBNodeMeta ObJBNodeMeta; static const int64_t JB_PATH_NODE_LEN = sizeof(ObJBNodeMeta); static const int64_t OB_JSON_INSERT_LAST = -1; + class ObJBNodeMetaStack { + public: + ObJBNodeMetaStack(ObIAllocator *allocator) : buf_(allocator) {} + ~ObJBNodeMetaStack() {} + void update(uint32_t idx, const ObJBNodeMeta& new_value); + int copy(const ObJBNodeMetaStack& dst); + int pop(); + int push(const ObJBNodeMeta& node); + int at(uint32_t idx, ObJBNodeMeta& node) const; + int32_t size() const; + void reset(); + int back(ObJBNodeMeta& node, bool is_pop = false); + int back(ObJBNodeMeta& node) const; + private: + ObJsonBuffer buf_; + }; + +private: int add_v0(const ObString &key, ObJsonBin *new_value); int insert_internal_v0(ObJBNodeMeta& meta, int64_t pos, const ObString &key, ObJsonBin *new_value, ObJsonBuffer& result); int insert_v0(int64_t pos, const ObString& key, ObJsonBin *new_value); - int remove_v0(size_t index); + int object_remove_v0(size_t index); + int array_remove_v0(size_t index); + int update_v0(int index, ObJsonBin *new_value); - int estimate_need_rebuild(ObJsonBuffer& update_stack, int64_t size_change, - int32_t pos, uint32_t& top_pos, bool& need_rebuild); + int update_append_v0(int index, ObJsonBin *new_value, bool &is_update_append); + int update_recursion(int index, ObJsonBin *new_value); + int insert_recursion(int index, const ObString &new_key, ObJsonBin *new_value); + int lookup_insert_postion(const ObString &key, size_t &idx) const; - int move_iter(ObJsonBuffer& stack, uint32_t start = 0); - // build at tail, the offset_size type grow largger, need rebuild - int estimate_need_rebuild_kv_entry(ObJsonBuffer &result, ObJsonBuffer& origin_stack, - ObJsonBuffer& update_stack, uint32_t& top_pos, bool& rebuild); - int serialize_json_object(ObJsonObject* object, ObJsonBuffer &result, uint32_t depth = 0); - int serialize_json_array(ObJsonArray *array, ObJsonBuffer &result, uint32_t depth = 0); - int serialize_json_value(ObJsonNode *json_tree, ObJsonBuffer &result); - int serialize_json_integer(int64_t value, ObJsonBuffer &result) const; - int serialize_json_decimal(ObJsonDecimal *json_dec, ObJsonBuffer &result) const; - bool try_update_inline(const ObJsonNode *value, - uint8_t var_type, - int64_t *value_entry_offset, - ObJsonBuffer &result); - bool try_update_inline(const ObJsonBin *value, - uint8_t var_type, - int64_t *value_entry_offset, - ObJsonBuffer &result); + int deserialize_json_value(ObJsonNode *&json_tree); + int deserialize_json_object_v0(ObJsonObject *object); + int deserialize_json_object(ObJsonObject *object); + int deserialize_json_array_v0(ObJsonArray *array); + int deserialize_json_array(ObJsonArray *array); - int deserialize_json_value(const char *data, - uint64_t length, - uint8_t type, - uint64_t value_offset, - ObJsonNode *&json_tree, - uint64_t type_size); + int serialize_number_to_json_decimal(number::ObNumber number, ObJsonBuffer &result); - int deserialize_json_object_v0(const char *data, uint64_t length, ObJsonObject *object); - inline int deserialize_json_object(const char *data, uint64_t length, ObJsonObject *object, ObJBVerType vertype); + int calc_size_with_insert_new_value(const ObString &new_key, const ObJsonBin *new_value, ObJsonBinMeta &new_meta) const; + int calc_size_with_new_value(const ObJsonBin *old_value, const ObJsonBin *new_value, ObJsonBinMeta &new_meta) const; - int deserialize_json_array_v0(const char *data, uint64_t length, ObJsonArray *array); - inline int deserialize_json_array(const char *data, uint64_t length, ObJsonArray *array, ObJBVerType vertype); + int reset_root(const ObString &data); + int rebuild_json_value(ObJsonBuffer &result) const; + int rebuild_json_array_v0(ObJsonBuffer &result) const; + int rebuild_json_array(ObJsonBuffer &result) const; + int rebuild_json_object_v0(ObJsonBuffer &result) const; + int rebuild_json_object(ObJsonBuffer &result) const; + int rebuild_with_new_insert_value(int64_t index, const ObString &new_key, ObJsonBin *new_value, ObStringBuffer &result) const; + int rebuild_with_new_value(int64_t index, ObJsonBin *new_value, ObStringBuffer &result) const; + int extend_entry_var_type( + const bool is_obj_type, + const uint64_t element_count, + const uint64_t old_size, + uint8_t old_entry_var_type, + uint8_t &new_entry_var_type, + uint64_t &new_size) const; - int set_curr_by_type(int64_t new_pos, uint64_t val_offset, uint8_t type, uint8_t entry_size = 0); - void parse_obj_header(const char *data, uint64_t &offset, uint8_t &node_type, - uint8_t &type, uint8_t& obj_size_type, uint64_t &count, uint64_t &obj_size) const; - - int get_element_in_array_v0(size_t index, char **get_addr_only); - inline int get_element_in_array(size_t index, char **get_addr_only = NULL); + int try_update_inline( + const int index, + const ObJsonNode *value, + bool &is_update_inline); + int try_update_inline( + const int index, + const ObJsonBin *value, + bool &is_update_inline); + int try_update_inplace( + int index, + ObJsonBin *new_value, + bool &is_update_inplace); + int try_update_inplace_in_extend( + int index, + ObJsonBin *new_value, + bool &is_update_inplace); + int replace_value(const ObString &new_data); - int get_element_in_object_v0(size_t index, char **get_addr_only = NULL); - inline int get_element_in_object(size_t index, char **get_addr_only = NULL); + // use reset functions instrea, this is innner logic. + int init_bin_data(); + int init_meta(); + int init_string_node(); + int init_string_node_v0(); + int init_opaque_node(); + int init_opaque_node_v0(); + + int parse_doc_header_v0(); + int init_ctx(); + + int get_element_v0(size_t index, uint64_t *get_addr_only); + int get_element_in_array(size_t index, uint64_t *get_addr_only = NULL); + int get_element_in_object(size_t index, uint64_t *get_addr_only = NULL); int get_key_in_object_v0(size_t i, ObString &key) const; - inline int get_key_in_object(size_t i, ObString &key) const; - - int update_parents(int64_t size_change, bool is_continous); + int get_key_in_object(size_t i, ObString &key) const; - int update_offset(uint64_t parent_offset, uint64_t idx, uint64_t value_offset); - /* - Get current iter node whether continuous or not. - */ - bool is_discontinuous() const; - int get_update_val_ptr(ObJsonBin *new_value_bin, char *&val, uint64_t &len, ObJsonBuffer &str); - - int rebuild(ObJsonBuffer &result); - - int rebuild_with_meta(const char *data, uint64_t length, ObJsonBuffer& old_stack, ObJsonBuffer& new_meta, - uint32_t min, uint32_t max, ObJsonBuffer &result, uint32_t depth = 0); - - int rebuild_json_value_v0(const char *data, uint64_t length, uint8_t type, - uint8_t dst_type, uint64_t inline_data, ObJsonBuffer &result) const; - inline int rebuild_json_value(const char *data, uint64_t length, uint8_t type, uint8_t dst_type, - uint64_t inline_data, ObJsonBuffer &result) const; - - - int rebuild_json_array_v0(const char *data, uint64_t length, ObJsonBuffer &result) const; - inline int rebuild_json_array(const char *data, uint64_t length, ObJsonBuffer &result, - ObJBVerType cur_vertype, ObJBVerType dest_vertype) const; - - int rebuild_json_obj_v0(const char *data, uint64_t length, ObJsonBuffer &result) const; - inline int rebuild_json_obj(const char *data, uint64_t length, ObJsonBuffer &result, - ObJBVerType cur_vertype, ObJBVerType dest_vertype) const; - - int rebuild_json_process_value_v0(const char *data, uint64_t length, const char *old_val_entry, uint64_t new_val_entry_offset, - uint64_t count, uint8_t var_type, int64_t st_pos, ObJsonBuffer &result) const; - - inline int rebuild_json_process_value(const char *data, uint64_t length, const char *old_val_entry, - uint64_t new_val_entry_offset, uint64_t count, uint8_t var_type, int64_t st_pos, - ObJsonBuffer &result, ObJBVerType cur_vertype, ObJBVerType dest_vertype) const; - - void stack_update(ObJsonBuffer& stack, uint32_t idx, const ObJBNodeMeta& new_value); - int stack_copy(ObJsonBuffer& src, ObJsonBuffer& dst); - int stack_pop(ObJsonBuffer& stack); - int stack_push(ObJsonBuffer& stack, const ObJBNodeMeta& node); - int stack_at(ObJsonBuffer& stack, uint32_t idx, ObJBNodeMeta& node); - int32_t stack_size(const ObJsonBuffer& stack) const; - void stack_reset(ObJsonBuffer& stack); - int stack_back(ObJsonBuffer& stack, ObJBNodeMeta& node, bool is_pop = false); int check_valid_object_op(ObIJsonBase *value) const; int check_valid_array_op(ObIJsonBase *value) const; int check_valid_object_op(uint64_t index) const; int check_valid_array_op(uint64_t index) const; - int create_new_binary(ObIJsonBase *&value, ObJsonBin *&new_bin) const; + int create_new_binary(ObIJsonBase *value, ObJsonBin *&new_bin) const; + + // data access layer function + // used for key_entry and value_entry + int get_key_entry(int index, uint64_t &key_offset, uint64_t &key_len) const; + int get_value_entry(int index, uint64_t &value_offset, uint8_t &value_type) const; + int64_t get_value_entry_size() const; + int get_value(int index, ObJsonBin &value) const; + int set_key_entry(int index, uint64_t key_offset, uint64_t key_len, bool check=true); + int set_value_entry(int index, uint64_t value_offset, uint8_t value_type, bool check=true); + OB_INLINE uint64_t get_value_entry_offset(int index) const { return meta_.get_value_entry_offset(index); } + OB_INLINE uint64_t get_key_entry_offset(int index) const { return meta_.get_key_entry_offset(index); } + OB_INLINE uint8_t entry_var_type() const { return meta_.entry_var_type(); } + OB_INLINE uint64_t entry_var_size() const { return meta_.entry_var_size(); } + + OB_INLINE uint64_t obj_size() const { return meta_.obj_size(); } + int set_obj_size(uint64_t obj_size); + OB_INLINE uint64_t obj_size_var_size() const { return meta_.obj_size_var_size(); } + OB_INLINE uint64_t get_obj_size_offset() const { return meta_.get_obj_size_offset(); } + OB_INLINE uint8_t obj_size_var_type() const { return meta_.obj_size_var_type(); } + OB_INLINE uint8_t element_count_var_type() const { return meta_.element_count_var_type(); } + OB_INLINE uint64_t element_count_var_size() const { return meta_.element_count_var_size(); } + OB_INLINE uint64_t get_element_count_offset() const { return meta_.get_element_count_offset(); } + int set_element_count(uint64_t count); + OB_INLINE uint64_t get_extend_seg_offset() const { return nullptr == ctx_ ? 0 : ctx_->extend_seg_offset_; } + OB_INLINE void set_extend_seg_offset(uint64_t offset) { if (nullptr != ctx_) ctx_->extend_seg_offset_ = offset; } + OB_INLINE uint64_t get_extend_value_offset(uint64_t offset) const { return get_extend_seg_offset() + offset; } + static OB_INLINE bool need_type_prefix(const uint8_t value_type) + { + return value_type == ObJBVerType::J_ARRAY_V0 || + value_type == ObJBVerType::J_OBJECT_V0 || + ObJsonVerType::is_opaque_or_string(static_cast(value_type)); + } + + int get_extend_value_type(uint64_t offset, uint8_t &value_type) const; + + // for json diff record + int record_inline_update_offset(int index); + int record_inplace_update_offset(int index, ObJsonBin *new_value, bool is_record_header_binary); + int record_extend_inplace_update_offset(int index, int64_t value_offset, int64_t value_len, uint8_t value_type); + int record_append_update_offset(int index, int64_t value_offset, int64_t value_len, uint8_t value_type); + int record_remove_offset(int index); + int record_insert_offset(int index, int64_t value_offset, int64_t value_len, uint8_t value_type); + int get_json_path_at_iter(int index, ObString &path) const; + + int set_current(const ObString &data, int64_t offset); + + int parse_type_(); + int skip_type_byte_(); + int parse_doc_header_(); + bool is_empty_data() const; + + int rebuild_child_key( + const int64_t index, + const ObString& child_key, + const int64_t key_offset, + ObJsonBuffer& result); + int rebuild_child( + const int64_t index, + const ObJsonBin& child_value, + const int64_t value_offset, + ObJsonBuffer& result); + bool is_at_root() const + { + bool res = false; + if (OB_ISNULL(ctx_) || ctx_->extend_seg_offset_ == 0) { + res = (pos_ == 0 || pos_ == OB_JSON_BIN_VALUE_TYPE_LEN) && node_stack_.size() == 0; + } else { + res = pos_ == sizeof(ObJsonBinDocHeader) && node_stack_.size() == 0; + } + return res; + } + int init_cursor(const ObString &data); + +public: + int should_pack_diff(bool &is_should_pack) const; + //TODO replace this with ObString + const char *get_data() const override; + int get_data(ObString &data); + /* data */ private: common::ObIAllocator *allocator_; - ObJsonBuffer result_; - ObString curr_; - bool is_alloc_; - // path node stack used - ObJsonBuffer stack_buf_; - - // curr iter info - uint8_t type_; + + ObJsonBinMeta meta_; + ObILobCursor *cursor_; + ObLobInRowCursor local_cursor_; int64_t pos_; - uint64_t element_count_; // elem count for obj or array, length for string or opaque - uint64_t bytes_; // acutal used bytes for curr iter node, inlined node will set 0 - ObObjType field_type_; // field type for opaque + // path node stack used + ObJBNodeMetaStack node_stack_; + + // aux data field char *data_; union { int64_t int_val_; uint64_t uint_val_; double double_val_; float float_val_; + uint64_t inline_value_; }; number::ObNumber number_; ObPrecision prec_; ObScale scale_; + ObJsonBinCtx* ctx_; + bool is_alloc_ctx_; + // json doc used only for search + bool is_seek_only_; + + bool is_schema_; DISALLOW_COPY_AND_ASSIGN(ObJsonBin); }; class ObJsonVar { public: + static int read_var(const ObILobCursor *cursor, int64_t offset, uint8_t type, uint64_t *var); + static int read_var(const ObILobCursor *cursor, int64_t offset, uint8_t type, int64_t *var); static int read_var(const char *data, uint8_t type, uint64_t *var); + static int read_var(const ObString& buffer, uint8_t type, uint64_t *var); static int append_var(uint64_t var, uint8_t type, ObJsonBuffer &result); static int reserve_var(uint8_t type, ObJsonBuffer &result); static int set_var(uint64_t var, uint8_t type, char *pos); // fill var at pos + static int set_var(ObILobCursor *cursor, int64_t offset, uint64_t var, uint8_t type); static uint64_t get_var_size(uint8_t type); static uint8_t get_var_type(uint64_t var); static int read_var(const char *data, uint8_t type, int64_t *var); static uint64_t var_int2uint(int64_t var); static int64_t var_uint2int(uint64_t var, uint8_t entry_size); static uint8_t get_var_type(int64_t var); + + static bool is_fit_var_type(uint64_t var, uint8_t type); +}; + + +class ObJsonBinSerializer +{ +public: + ObJsonBinSerializer(ObIAllocator *allocator): + allocator_(allocator), + bin_ctx_() + {} + int serialize(ObJsonNode *json_tree, ObString &result); + int serialize_json_object(ObJsonObject* object, ObJsonBuffer &result, uint32_t depth = 0); + int serialize_json_array(ObJsonArray *array, ObJsonBuffer &result, uint32_t depth = 0); + int serialize_json_value(ObJsonNode *json_tree, ObJsonBuffer &result); + +public: + static int serialize_json_integer(int64_t value, ObJsonBuffer &result); + static int serialize_json_decimal(ObJsonDecimal *json_dec, ObJsonBuffer &result); + +private: + ObIAllocator *allocator_; + ObJsonBinCtx bin_ctx_; + }; } // namespace common diff --git a/deps/oblib/src/lib/json_type/ob_json_common.h b/deps/oblib/src/lib/json_type/ob_json_common.h index ed34dc3de8..d59f0d1088 100644 --- a/deps/oblib/src/lib/json_type/ob_json_common.h +++ b/deps/oblib/src/lib/json_type/ob_json_common.h @@ -13,6 +13,7 @@ #ifndef OCEANBASE_LIB_JSON_TYPE_OB_JSON_COMMON_ #define OCEANBASE_LIB_JSON_TYPE_OB_JSON_COMMON_ #include "lib/string/ob_string_buffer.h" +#include "lib/lob/ob_lob_base.h" namespace oceanbase { namespace common { diff --git a/deps/oblib/src/lib/json_type/ob_json_diff.cpp b/deps/oblib/src/lib/json_type/ob_json_diff.cpp new file mode 100644 index 0000000000..52ddbe4de9 --- /dev/null +++ b/deps/oblib/src/lib/json_type/ob_json_diff.cpp @@ -0,0 +1,246 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ +#define USING_LOG_PREFIX LIB +#include "ob_json_diff.h" +#include "ob_json_bin.h" +#include "lib/ob_define.h" +#include "lib/utility/ob_unify_serialize.h" + +namespace oceanbase { +namespace common { + + +OB_DEF_SERIALIZE_SIZE(ObJsonDiff) +{ + int64_t len = 0; + LST_DO_CODE(OB_UNIS_ADD_LEN, + op_, + value_type_, + flag_, + path_, + value_); + return len; +} + +OB_DEF_SERIALIZE(ObJsonDiff) +{ + int ret = OB_SUCCESS; + LST_DO_CODE(OB_UNIS_ENCODE, + op_, + value_type_, + flag_, + path_, + value_); + return ret; +} + +OB_DEF_DESERIALIZE(ObJsonDiff) +{ + int ret = OB_SUCCESS; + LST_DO_CODE(OB_UNIS_DECODE, + op_, + value_type_, + flag_, + path_, + value_); + return ret; +} + +const char* get_json_diff_op_str(ObJsonDiffOp op) +{ + const char *str = nullptr; + switch (op) { + case ObJsonDiffOp::REPLACE: + str = "replace"; + break; + case ObJsonDiffOp::INSERT: + str = "insert"; + break; + case ObJsonDiffOp::REMOVE: + str = "remove"; + break; + default: + str = "unknow"; + break; + }; + return str; +} + +static int print_json_path(const ObString &path, ObStringBuffer &buf) +{ + INIT_SUCC(ret); + for (int i = 0; OB_SUCC(ret) && i < path.length(); ++i) { + char c = *(path.ptr() + i); + if ((c == '"' || c == '\\') && OB_FAIL(buf.append("\\", 1))) { + LOG_WARN("append slash fail", K(ret), K(i), K(c)); + } else if (OB_FAIL(buf.append(&c, 1))) { + LOG_WARN("append fail", K(ret), K(i), K(c)); + } + } + return ret; +} + +int ObJsonDiff::print(ObStringBuffer &buffer) +{ + INIT_SUCC(ret); + if (OB_FAIL(buffer.append("{"))) { + LOG_WARN("buffer append fail", K(ret)); + } else if (OB_FAIL(buffer.append("\"op\": \""))) { + LOG_WARN("buffer append fail", K(ret)); + } else if (OB_FAIL(buffer.append(get_json_diff_op_str(op_)))) { + LOG_WARN("buffer append fail", K(ret)); + } else if (OB_FAIL(buffer.append("\", \"path\": \""))) { + LOG_WARN("buffer append fail", K(ret)); + } else if (OB_FAIL(print_json_path(path_, buffer))) { + LOG_WARN("buffer append fail", K(ret), K(path_)); + } else if (OB_FAIL(buffer.append("\""))) { + LOG_WARN("buffer append fail", K(ret)); + } + + if (OB_FAIL(ret)) { + } else if (ObJsonDiffOp::REMOVE == op_) { + } else if (OB_FAIL(buffer.append(", \"value\": "))) { + LOG_WARN("buffer append fail", K(ret)); + } else { + ObJsonBinCtx bin_ctx; + ObJsonBin j_bin; + ObIJsonBase *j_base = &j_bin; + if (OB_FAIL(j_bin.reset( + value_type_, + value_, + 0, + entry_var_type_, + &bin_ctx))) { + LOG_WARN("reset json bin fail", K(ret), K(value_type_), K(value_)); + } else if (OB_FAIL(j_base->print(buffer, true))) { + LOG_WARN("json binary to string failed in mysql mode", K(ret), K(*j_base)); + } + } + + if (OB_FAIL(ret)) { + } else if (OB_FAIL(buffer.append("}"))) { + LOG_WARN("buffer append fail", K(ret)); + } + return ret; +} + +ObJsonBinUpdateCtx::~ObJsonBinUpdateCtx() +{ + if (nullptr != cursor_) { + cursor_->~ObILobCursor(); + cursor_ = nullptr; + } +} + +bool ObJsonBinUpdateCtx::is_no_update() +{ + return 0 == binary_diffs_.count(); +} + +int ObJsonBinUpdateCtx::record_inline_diff(ObJsonDiffOp op, uint8_t value_type, const ObString &path, uint8_t entry_var_type, const ObString &value) +{ + INIT_SUCC(ret); + ObJsonDiff json_diff; + json_diff.op_ = op; + json_diff.value_type_ = value_type; + json_diff.path_ = path; + json_diff.entry_var_type_ = entry_var_type; + json_diff.value_ = value; + if (OB_FAIL(json_diffs_.push_back(json_diff))) { + LOG_WARN("push_back json diff fail", K(ret), K(json_diff)); + } + return ret; +} + + +int ObJsonBinUpdateCtx::record_diff(ObJsonDiffOp op, uint8_t value_type, const ObString &path, const ObString &value) +{ + INIT_SUCC(ret); + ObJsonDiff json_diff; + json_diff.op_ = op; + json_diff.value_type_ = value_type; + json_diff.path_ = path; + json_diff.value_ = value; + if (OB_FAIL(json_diffs_.push_back(json_diff))) { + LOG_WARN("push_back json diff fail", K(ret), K(json_diff)); + } + return ret; +} + +int ObJsonBinUpdateCtx::record_remove_diff(const ObString &path) { + INIT_SUCC(ret); + ObJsonDiff json_diff; + json_diff.op_ = ObJsonDiffOp::REMOVE; + json_diff.path_ = path; + if (OB_FAIL(json_diffs_.push_back(json_diff))) { + LOG_WARN("push_back json diff fail", K(ret), K(json_diff)); + } + return ret; +} + +int ObJsonBinUpdateCtx::record_binary_diff(int64_t offset, int64_t len) +{ + INIT_SUCC(ret); + ObJsonBinaryDiff binary_diff; + binary_diff.dst_offset_ = offset; + binary_diff.dst_len_ = len; + if (OB_FAIL(binary_diffs_.push_back(binary_diff))) { + LOG_WARN("push_back json diff fail", K(ret), K(binary_diff)); + } + return ret; +} + + +DEFINE_GET_SERIALIZE_SIZE(ObJsonDiffHeader) +{ + int64_t size = 0; + size += serialization::encoded_length_i8(version_); + size += serialization::encoded_length_i8(cnt_); + return size; +} + +DEFINE_SERIALIZE(ObJsonDiffHeader) +{ + int ret = OB_SUCCESS; + int64_t new_pos = pos; + if (nullptr == buf || 0 >= buf_len || 0 > pos) { + ret = OB_INVALID_ARGUMENT; + LOG_ERROR("serialize failed", K(ret), K(pos), K(buf_len)); + } else if (OB_FAIL(serialization::encode_i8(buf, buf_len, new_pos, version_))) { + LOG_ERROR("serialize failed", K(ret), K(pos), K(buf_len)); + } else if (OB_FAIL(serialization::encode_i8(buf, buf_len, new_pos, cnt_))) { + LOG_ERROR("serialize failed", K(ret), K(pos), K(buf_len)); + } else { + pos = new_pos; + } + return ret; +} + +DEFINE_DESERIALIZE(ObJsonDiffHeader) +{ + int ret = OB_SUCCESS; + int64_t new_pos = pos; + if (nullptr == buf || 0 >= data_len || 0 > pos) { + ret = OB_INVALID_ARGUMENT; + LOG_ERROR("serialize failed", K(ret), K(pos), K(data_len)); + } else if (OB_FAIL(serialization::decode_i8(buf, data_len, new_pos, reinterpret_cast(&version_)))) { + LOG_ERROR("serialize failed", K(ret), K(pos), K(data_len)); + } else if (OB_FAIL(serialization::decode_i8(buf, data_len, new_pos, reinterpret_cast(&cnt_)))) { + LOG_ERROR("serialize failed", K(ret), K(pos), K(data_len)); + } else { + pos = new_pos; + } + return ret; +} + +} // namespace common +} // namespace oceanbase diff --git a/deps/oblib/src/lib/json_type/ob_json_diff.h b/deps/oblib/src/lib/json_type/ob_json_diff.h new file mode 100644 index 0000000000..2bc0dcdf5b --- /dev/null +++ b/deps/oblib/src/lib/json_type/ob_json_diff.h @@ -0,0 +1,149 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#ifndef OCEANBASE_SQL_OB_JSON_DIFF +#define OCEANBASE_SQL_OB_JSON_DIFF + +#include "common/object/ob_object.h" +#include "lib/json_type/ob_json_common.h" + +namespace oceanbase { +namespace common { + +// only support one element +enum class ObJsonDiffOp : uint8_t +{ + INVALID = 0, + // same as json_replace(json, path, value) + REPLACE = 1, + // same as json_array_insert(json, path, value) if json is array element + // same as json_insert(json, path, value) + INSERT, + // same as json_remove(json, path) + REMOVE, + RENAME +}; + +const char* get_json_diff_op_str(ObJsonDiffOp op); + + +struct ObJsonDiff { + OB_UNIS_VERSION(1); +public: + + ObJsonDiff() : + op_(ObJsonDiffOp::INVALID), + value_type_(0), + flag_(0), + path_(), + value_() + {} + + int print(ObStringBuffer &buffer); + +public: + ObJsonDiffOp op_; + uint8_t value_type_; + union { + struct { + uint16_t entry_var_type_ : 2; + }; + uint16_t flag_; + }; + ObString path_; + ObString value_; + TO_STRING_KV( + K(op_), + K(value_type_), + K(entry_var_type_), + K(path_), + K(value_)); +}; + +struct ObJsonBinaryDiff +{ + ObJsonBinaryDiff(): + dst_offset_(0), + dst_len_(0) + {} + int64_t dst_offset_; + int64_t dst_len_; + + TO_STRING_KV( + K(dst_offset_), + K(dst_len_)) +}; + + +typedef ObSEArray ObJsonDiffArray; +typedef ObSEArray ObJsonBinaryDiffArray; + +class ObJsonBinUpdateCtx { +public: + ObJsonBinUpdateCtx(ObIAllocator *allocator) : + allocator_(allocator), + is_rebuild_all_(false), + cursor_(nullptr), + tmp_buffer_(allocator) + {} + ~ObJsonBinUpdateCtx(); + + ObStringBuffer& get_tmp_buffer() { tmp_buffer_.reuse(); return tmp_buffer_; } + + int current_data(ObString &data) const + { + return cursor_->get_data(data); + } + + bool is_no_update(); + bool is_rebuild_all() { return is_rebuild_all_; } + + void set_lob_cursor(ObILobCursor *cursor) { cursor_ = cursor; } + + int record_inline_diff(ObJsonDiffOp op, uint8_t value_type, const ObString &path, uint8_t entry_var_type, const ObString &value); + int record_remove_diff(const ObString &path); + int record_diff(ObJsonDiffOp op, uint8_t value_type, const ObString &path, const ObString &value); + int record_binary_diff(int64_t offset, int64_t len); + +public: + ObIAllocator *allocator_; + bool is_rebuild_all_; + ObILobCursor *cursor_; + ObStringBuffer tmp_buffer_; + ObJsonDiffArray json_diffs_; + ObJsonBinaryDiffArray binary_diffs_; + + TO_STRING_KV( + K(is_rebuild_all_), + K(json_diffs_), + K(binary_diffs_)); + +}; + +struct ObJsonDiffHeader { + + ObJsonDiffHeader() : + version_(0), + cnt_(0) + {} + uint8_t version_; + uint8_t cnt_; + TO_STRING_KV( + K(version_), + K(cnt_)); + NEED_SERIALIZE_AND_DESERIALIZE; +}; + + +} // namespace common +} // namespace oceanbase +#endif // OCEANBASE_SQL_OB_JSON_BIN \ No newline at end of file diff --git a/deps/oblib/src/lib/json_type/ob_json_parse.cpp b/deps/oblib/src/lib/json_type/ob_json_parse.cpp index b774f1ae5a..27bf7c05ec 100644 --- a/deps/oblib/src/lib/json_type/ob_json_parse.cpp +++ b/deps/oblib/src/lib/json_type/ob_json_parse.cpp @@ -81,7 +81,9 @@ int ObJsonParser::parse_json_text(ObIAllocator *allocator, MEMCPY(buf, text, length); buf[length] = '\0'; bool with_unique_key = HAS_FLAG(parse_flag, JSN_UNIQUE_FLAG); - ObRapidJsonHandler handler(allocator, with_unique_key); + bool is_schema = HAS_FLAG(parse_flag, JSN_SCHEMA_FLAG); + bool preserve_dup = HAS_FLAG(parse_flag, JSN_PRESERVE_DUP_FLAG); + ObRapidJsonHandler handler(allocator, with_unique_key, is_schema, preserve_dup); ObRapidJsonAllocator parse_allocator(allocator); rapidjson::InsituStringStream ss(static_cast(buf)); ObRapidJsonReader reader(&parse_allocator); @@ -112,10 +114,11 @@ int ObJsonParser::parse_json_text(ObIAllocator *allocator, allocator->free(buf); if (handler.has_duplicate_key()) { ret = OB_ERR_DUPLICATE_KEY; + } else if (is_schema && OB_FAIL(handler.get_error_code())) { } else { ret = OB_ERR_INVALID_JSON_TEXT; } - if (offset != NULL){ + if (offset != NULL) { *offset = reader.GetErrorOffset(); } if (syntaxerr != NULL) { @@ -271,7 +274,7 @@ bool ObRapidJsonHandler::seeing_value(ObJsonNode *value) INIT_SUCC(ret); next_state_ = ObJsonExpectNextState::EXPECT_OBJECT_KEY; ObJsonObject *object = dynamic_cast(current_element_); - if (OB_FAIL(object->add(key_, value, with_unique_key_, true, false))) { + if (OB_FAIL(object->add(key_, value, with_unique_key_, true, false, is_schema_))) { LOG_WARN("fail to add element to json object", K(ret)); if (ret == OB_ERR_DUPLICATE_KEY) { with_duplicate_key_ = true; @@ -334,7 +337,9 @@ bool ObRapidJsonHandler::is_end_object_or_array() obj->update_serialize_size(); obj->stable_sort(); int64_t origin_num = obj->element_count(); - obj->unique(); + if (!preserve_dup_key_) { + obj->unique(); + } if (with_unique_key_ && obj->element_count() < origin_num) { is_continue = false; with_duplicate_key_ = true; @@ -472,26 +477,29 @@ bool ObRapidJsonHandler::String(const char *str, rapidjson::SizeType length, boo void *buf = alloc(sizeof(ObJsonString)); if (OB_ISNULL(buf)) { LOG_WARN_RET(OB_ALLOCATE_MEMORY_FAILED, "fail to alloc memory for string json node", K(OB_ALLOCATE_MEMORY_FAILED)); - } else { - if (copy) { - void *dst_buf = NULL; - ObString src_str(length, str); - if (length > 0) { - if (OB_ISNULL(dst_buf = allocator_->alloc(length))) { - LOG_WARN_RET(OB_ALLOCATE_MEMORY_FAILED, "allocate memory fail", K(length)); - } else { - MEMCPY(dst_buf, src_str.ptr(), src_str.length()); - ObJsonString *node = new (buf) ObJsonString(static_cast(dst_buf), length); - is_continue = seeing_value(node); - } + } else if (is_schema_ && OB_NOT_NULL(str) && length > 0 + && next_state_ == ObJsonExpectNextState::EXPECT_OBJECT_VALUE + && key_.compare("$ref") == 0 && *str != '#') { + err_code_ = OB_ERR_UNSUPPROTED_REF_IN_JSON_SCHEMA; + LOG_WARN_RET(OB_ERR_UNSUPPROTED_REF_IN_JSON_SCHEMA, "unsupported ref in json schema", K(OB_ERR_UNSUPPROTED_REF_IN_JSON_SCHEMA)); + } else if (copy) { + void *dst_buf = NULL; + ObString src_str(length, str); + if (length > 0) { + if (OB_ISNULL(dst_buf = allocator_->alloc(length))) { + LOG_WARN_RET(OB_ALLOCATE_MEMORY_FAILED, "allocate memory fail", K(length)); } else { - ObJsonString *node = new (buf) ObJsonString(str, length); + MEMCPY(dst_buf, src_str.ptr(), src_str.length()); + ObJsonString *node = new (buf) ObJsonString(static_cast(dst_buf), length); is_continue = seeing_value(node); } } else { ObJsonString *node = new (buf) ObJsonString(str, length); is_continue = seeing_value(node); } + } else { + ObJsonString *node = new (buf) ObJsonString(str, length); + is_continue = seeing_value(node); } return is_continue; diff --git a/deps/oblib/src/lib/json_type/ob_json_parse.h b/deps/oblib/src/lib/json_type/ob_json_parse.h index c9e8ad6c3e..efc172da08 100644 --- a/deps/oblib/src/lib/json_type/ob_json_parse.h +++ b/deps/oblib/src/lib/json_type/ob_json_parse.h @@ -36,6 +36,8 @@ public: static const uint32_t JSN_STRICT_FLAG = 1; static const uint32_t JSN_RELAXED_FLAG = 2; static const uint32_t JSN_UNIQUE_FLAG = 4; + static const uint32_t JSN_SCHEMA_FLAG = 8; + static const uint32_t JSN_PRESERVE_DUP_FLAG = 16; static const int PARSE_SYNTAXERR_MESSAGE_LENGTH = 256; static int get_tree(ObIAllocator *allocator, const ObString &text, @@ -127,15 +129,18 @@ public: EXPECT_OBJECT_VALUE, EXPECT_EOF }; - explicit ObRapidJsonHandler(ObIAllocator *allocator, bool with_unique_key = false) + explicit ObRapidJsonHandler(ObIAllocator *allocator, bool with_unique_key = false, bool is_schema = false, bool preserve_dup_key = false) : next_state_(ObJsonExpectNextState::EXPECT_ANYTHING), dom_as_built_(NULL), current_element_(NULL), depth_(0), key_(), allocator_(allocator), + err_code_(OB_SUCCESS), with_unique_key_(with_unique_key), - with_duplicate_key_(false) + with_duplicate_key_(false), + is_schema_(is_schema), + preserve_dup_key_(preserve_dup_key) { } virtual ~ObRapidJsonHandler() {} @@ -177,6 +182,7 @@ public: bool EndArray(rapidjson::SizeType length); bool Key(const char *str, rapidjson::SizeType length, bool copy); bool has_duplicate_key() { return with_duplicate_key_; } + int get_error_code() { return err_code_; } private: ObJsonExpectNextState next_state_; // The state that is expected to be resolved next. @@ -185,8 +191,11 @@ private: uint64_t depth_; // The depth of the tree currently parsed. common::ObString key_; // The current resolved key value ObIAllocator *allocator_; // A memory allocator that allocates node memory. + int err_code_; // error code bool with_unique_key_; // Whether check unique key for object bool with_duplicate_key_; // Whether contain duplicate key for object + bool is_schema_; // is json schema text + bool preserve_dup_key_; // preserve duplicate key DISALLOW_COPY_AND_ASSIGN(ObRapidJsonHandler); }; diff --git a/deps/oblib/src/lib/json_type/ob_json_path.cpp b/deps/oblib/src/lib/json_type/ob_json_path.cpp index ea48b2d61c..d681c02210 100644 --- a/deps/oblib/src/lib/json_type/ob_json_path.cpp +++ b/deps/oblib/src/lib/json_type/ob_json_path.cpp @@ -974,10 +974,10 @@ bool ObJsonPathCache::is_match(ObString& path_str, size_t idx) return result; } -int ObJsonPathCache::find_and_add_cache(ObJsonPath*& res_path, ObString& path_str, int arg_idx) +int ObJsonPathCache::find_and_add_cache(ObJsonPath*& res_path, ObString& path_str, int arg_idx, bool is_const) { INIT_SUCC(ret); - if (!is_match(path_str, arg_idx)) { + if (!((is_const && arg_idx < size()) || is_match(path_str, arg_idx))) { void* buf = allocator_->alloc(sizeof(ObJsonPath)); if (OB_NOT_NULL(buf)) { ObJsonPath* path = new (buf) ObJsonPath(path_str, allocator_); @@ -4004,15 +4004,16 @@ int ObJsonPath::parse_comp_exist(ObJsonPathFilterNode* filter_comp_node) uint64_t sub_len = end - start + 2; char* sub_path = static_cast (allocator_->alloc(sub_len)); if (OB_ISNULL(sub_path)) { - ret = OB_ALLOCATE_MEMORY_FAILED; - LOG_WARN("fail to allocate memory for sub_path.",K(ret), K(len),K(start_ptr)); - } else { - sub_path[0] = ObJsonPathItem::ROOT; - MEMCPY(sub_path + 1, start_ptr, sub_len - 1); - } + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("fail to allocate memory for sub_path.",K(ret), K(len),K(start_ptr)); + } else { + sub_path[0] = ObJsonPathItem::ROOT; + MEMCPY(sub_path + 1, start_ptr, sub_len - 1); + } ObString exist_subpath(sub_len, sub_path); ObJsonPath* spath = static_cast (allocator_->alloc(sizeof(ObJsonPath))); - if (OB_ISNULL(spath)) { + if (OB_FAIL(ret)) { + } else if (OB_ISNULL(spath)) { // error ret = OB_ALLOCATE_MEMORY_FAILED; LOG_WARN("allocate row buffer failed at sub_path",K(ret), K(index_), K(expression_)); diff --git a/deps/oblib/src/lib/json_type/ob_json_path.h b/deps/oblib/src/lib/json_type/ob_json_path.h index 20d5e93671..efd86d587b 100644 --- a/deps/oblib/src/lib/json_type/ob_json_path.h +++ b/deps/oblib/src/lib/json_type/ob_json_path.h @@ -426,7 +426,7 @@ public: size_t size(); void reset(); - int find_and_add_cache(ObJsonPath*& parse_path, ObString& path_str, int arg_idx); + int find_and_add_cache(ObJsonPath*& parse_path, ObString& path_str, int arg_idx, bool is_const = false); void set_allocator(common::ObIAllocator *allocator); common::ObIAllocator* get_allocator(); private: diff --git a/deps/oblib/src/lib/json_type/ob_json_schema.cpp b/deps/oblib/src/lib/json_type/ob_json_schema.cpp new file mode 100644 index 0000000000..b9e7faf89f --- /dev/null +++ b/deps/oblib/src/lib/json_type/ob_json_schema.cpp @@ -0,0 +1,4680 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + * This file contains implementation support for the json base abstraction. + */ + +#define USING_LOG_PREFIX SQL +#include "ob_json_schema.h" +#include +#include "ob_json_bin.h" +#include "ob_json_parse.h" +#include "lib/encode/ob_base64_encode.h" // for ObBase64Encoder +#include "lib/utility/ob_fast_convert.h" // ObFastFormatInt::format_unsigned +#include "lib/charset/ob_dtoa.h" // ob_gcvt_opt +#include "rpc/obmysql/ob_mysql_global.h" // DOUBLE_TO_STRING_CONVERSION_BUFFER_SIZE +#include "lib/charset/ob_charset.h" // for strntod +#include "common/ob_smart_var.h" // for SMART_VAR +#include + +namespace oceanbase { +namespace common { +class ObJsonSchemaItem +{ +public: + static constexpr char* ROOT = const_cast("#"); + static constexpr char* REF = const_cast("$ref"); + static constexpr char* SCHEMA = const_cast("schema"); + static constexpr char* COMPOSITION = const_cast("composition"); + static constexpr char* PROPERTIES = const_cast("properties");// 10 + static constexpr char* PATTERN_PRO = const_cast("patternProperties"); //17 + static constexpr char* ADDITIONAL_PRO = const_cast("additionalProperties"); // 20 + static constexpr char* ALLOF = const_cast("allOf"); + static constexpr char* ANYOF = const_cast("anyOf"); + static constexpr char* ONEOF = const_cast("oneOf"); + static constexpr char* NOT = const_cast("not"); + static constexpr char* DEPENDENCIES = const_cast("dependencies"); + // DEPENDENTREQUIRED and DEPENDENTSCHEMAS are values of "dependencies" keyword + // when value type is object, record as DEPENDENTSCHEMAS + static constexpr char* DEPENDENTREQUIRED = const_cast("dependentRequired");// 17 + static constexpr char* DEPENDENTSCHEMAS = const_cast("dependentSchemas"); + static constexpr char* ENUM = const_cast("enum");// 4 + static constexpr char* TYPE = const_cast("type");// 4 + static constexpr char* TYPE_STRING = const_cast("string"); + static constexpr char* TYPE_NUMBER = const_cast("number"); + static constexpr char* TYPE_INTEGER = const_cast("integer"); + static constexpr char* TYPE_BOOLEAN = const_cast("boolean"); + static constexpr char* TYPE_NULL = const_cast("null"); + static constexpr char* TYPE_OBJECT = const_cast("object"); + static constexpr char* TYPE_ARRAY = const_cast("array"); + static constexpr char* MIN_LEN = const_cast("minLength"); // 9 + static constexpr char* MAX_LEN = const_cast("maxLength"); // 9 + static constexpr char* PATTERN = const_cast("pattern"); + static constexpr char* MULTIPLE_OF = const_cast("multipleOf"); // 10 + static constexpr char* MINMUM = const_cast("minimum"); // 7 + static constexpr char* MAXMUM = const_cast("maximum"); // 7 + static constexpr char* EXCLUSIVE_MINMUM = const_cast("exclusiveMinimum"); // 16 + static constexpr char* EXCLUSIVE_MAXMUM = const_cast("exclusiveMaximum"); // 16 + static constexpr char* REQUIRED = const_cast("required"); // 8 + static constexpr char* MIN_PROPERTIES = const_cast("minProperties"); // 13 + static constexpr char* MAX_PROPERTIES = const_cast("maxProperties"); // 13 + static constexpr char* ITEMS = const_cast("items"); + static constexpr char* TUPLE_ITEMS = const_cast("tupleItems"); + static constexpr char* ADDITIONAL_ITEMS = const_cast("additionalItems"); + static constexpr char* UNIQUE_ITEMS = const_cast("uniqueItems"); // 11 + static constexpr char* MIN_ITEMS = const_cast("minItems"); // 8 + static constexpr char* MAX_ITEMS = const_cast("maxItems"); // 8 +}; + +static const int JS_TYPE_LEN = 4; // strlen(ObJsonSchemaItem::TYPE) +static const int JS_PATTERN_LEN = 7; // strlen(ObJsonSchemaItem::PATTERN), +static const int JS_REQUIRED_LEN = 8; //strlen(ObJsonSchemaItem::REQUIRED), +static const int JS_STRMAX_LEN = 9; // strlen(ObJsonSchemaItem::MAX_LEN), +static const int JS_MULTIPLE_LEN = 10; // strlen(ObJsonSchemaItem::MULTIPLE_OF), +static const int JS_UNIQUE_ITEMS_LEN = 11; //strlen(ObJsonSchemaItem::UNIQUE_ITEMS) +static const int JS_PROMAX_LEN = 13; // strlen(ObJsonSchemaItem::MAX_PROPERTIES), +static const int JS_ADD_ITEMS_LEN = 15; //strlen(ObJsonSchemaItem::ADDITIONAL_ITEMS), +static const int JS_EXCLUSIVE_LEN = 16; // strlen(ObJsonSchemaItem::EXCLUSIVE_MAXMUM), +static const int JS_DEP_REQUIRED_LEN = 17; //strlen(ObJsonSchemaItem::DEPENDENTREQUIRED), +static const int JS_ADD_PRO_LEN = 20; // strlen(ObJsonSchemaItem::ADDITIONAL_PRO), + +int ObJsonSchemaTree::build_schema_tree(ObIJsonBase *json_doc) +{ + INIT_SUCC(ret); + ObJsonObject* origin_json = nullptr; + ObJsonObject* cur_root = nullptr; + serial_num_ = 0; + + if (OB_ISNULL(json_doc) || OB_ISNULL(allocator_)) { + ret = OB_BAD_NULL_ERROR; + LOG_WARN("shouldn't be null", K(ret), KPC(json_doc)); + } else if (json_doc->json_type() != ObJsonNodeType::J_OBJECT) { + // json schema must be object + ret = OB_ERR_TYPE_OF_JSON_SCHEMA; + LOG_WARN("json schema must be object", K(ret), K(json_doc->json_type())); + } else if (json_doc->is_bin()) { + ObJsonBin *j_bin = static_cast(json_doc); + ObJsonNode *j_tree = nullptr; + if (OB_FAIL(j_bin->to_tree(j_tree))) { + LOG_WARN("fail to change bin to tree", K(ret)); + } else { + origin_json = static_cast(j_tree); + } + } else { + origin_json = static_cast(json_doc); + } + + if (OB_FAIL(ret)) { + } else if (OB_ISNULL(root_doc_) && OB_FALSE_IT(root_doc_ = origin_json)) { + } else if (OB_ISNULL(schema_map_ = OB_NEWx(ObJsonArray, allocator_, allocator_)) + || OB_ISNULL(cur_root = OB_NEWx(ObJsonObject, allocator_, allocator_)) + || OB_ISNULL(ref_ = OB_NEWx(ObJsonObject, allocator_, allocator_)) + || OB_ISNULL(typeless_ = OB_NEWx(ObJsonInt, allocator_, DEFAULT_PREVIOUS_NUMBER))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("fail to init schema tree.", K(ret)); + } else if (OB_FAIL(schema_map_->append(cur_root))) { + LOG_WARN("fail to push schema root.", K(ret)); + } else if (OB_FALSE_IT(++serial_num_) || OB_FALSE_IT(cur_schema_stk_.reset())) { + } else if (OB_FAIL(cur_schema_stk_.push(cur_root))) { + LOG_WARN("fail to push cur schema.", K(ret)); + } else if (OB_FAIL(schema_map_->append(ref_))) { + LOG_WARN("fail to push schema root.", K(ret)); + } else if (OB_FALSE_IT(++serial_num_)) { + } else if (OB_FAIL(inner_build_schema_tree(origin_json, false))) { + LOG_WARN("fail to build schema.", K(ret)); + } + return ret; +} +bool ObJsonSchemaTree::if_have_ref(ObJsonObject* origin_schema) { + bool ret_bool = false; + ObJsonNode* node = origin_schema->get_value(ObJsonSchemaItem::REF); + if (OB_ISNULL(node)) { + // didn't define, its normal + } else if (node->json_type() == ObJsonNodeType::J_STRING) { + ObString ref_str = ObString(node->get_data_length(), node->get_data()); + bool end_while = false; + bool is_legal_name = true; + while (!ref_str.empty() && !end_while && is_legal_name) { + ObString key_str = ObJsonSchemaUtils::get_pointer_key(ref_str, end_while); + if (key_str == ObJsonSchemaItem::ROOT) { + } else if (!ObJsonSchemaUtils::is_legal_json_pointer_name(key_str)) { + is_legal_name = false; + } + } + ret_bool = end_while && is_legal_name; + } + return ret_bool; +} + +int ObJsonSchemaTree::get_ref_pointer_value(const ObString origin_ref_str, ObJsonObject*& ref_value) +{ + INIT_SUCC(ret); + ObString ref_str = origin_ref_str; + if (ref_str.length() > 1) { + ObIJsonBase *new_doc = nullptr; + bool end_while = false; + new_doc = root_doc_; + while (!ref_str.empty() && !end_while && OB_NOT_NULL(new_doc) && OB_SUCC(ret)) { + ObString key_str = ObJsonSchemaUtils::get_pointer_key(ref_str, end_while); + ObIJsonBase *tmp_doc = nullptr; + if (key_str.empty()) { + new_doc = nullptr; + } else if (key_str == ObJsonSchemaItem::ROOT) { + new_doc = root_doc_; + } else if (OB_FAIL(new_doc->get_object_value(key_str, tmp_doc)) || OB_ISNULL(tmp_doc)) { + // didn't find, its normal + ret = OB_SUCCESS; + end_while = true; + new_doc = nullptr; + } else { + new_doc = tmp_doc; + } + } + if (OB_NOT_NULL(new_doc) && new_doc->json_type() == ObJsonNodeType::J_OBJECT) { + ref_value = static_cast(new_doc); + } else { + ref_value = nullptr; + } + } else if (ref_str.length() == 1) { //ref_str = "#" + ref_value = root_doc_; + } + return ret; +} + +int ObJsonSchemaTree::handle_ref_keywords(ObJsonObject* origin_schema, ObIArray &schema_vec_stk, + const bool& is_composition, ObJsonArray* comp_array) +{ + INIT_SUCC(ret); + ObJsonNode* node = origin_schema->get_value(ObJsonSchemaItem::REF); + ObString origin_ref = ObString(node->get_data_length(), node->get_data()); + ObJsonObject* ref_val = nullptr; + if (OB_FAIL(get_ref_pointer_value(origin_ref, ref_val))) { + LOG_WARN("fail to get ref value.", K(ret)); + } else if (OB_ISNULL(ref_val)) { + // didn't find, its normal + } else if (OB_FAIL(generate_schema_and_record(ObJsonSchemaItem::REF, node, schema_vec_stk, is_composition, comp_array))) { + LOG_WARN( "fail to add type schema", K(ret)); + } else { + // record and parse ref value + ObJsonNode* ref_schema = ref_->get_value(origin_ref); + if (OB_ISNULL(ref_schema)) { + if (origin_ref.compare(json_ptr_) == 0) { + if (OB_FAIL(ref_->add(origin_ref, typeless_, true, false, false))) { + LOG_WARN("fail to add ref schema.", K(ret)); + } + } else { + ObJsonSchemaTree ref_schema_tree(allocator_, root_doc_, origin_ref); + if (OB_FAIL(ref_schema_tree.build_schema_tree(ref_val))) { + LOG_WARN("fail to build schema.", K(ret)); + } else if (OB_ISNULL(ref_schema_tree.schema_map_)) { + } else if (OB_FAIL(ref_->add(origin_ref, ref_schema_tree.schema_map_, true, false, false))) { + LOG_WARN("fail to add ref schema.", K(ret)); + } + } + } // not null, already parsed, do nothing + } + return ret; +} + +int ObJsonSchemaTree::inner_build_schema_tree(ObJsonObject* origin_schema, bool is_composition, ObJsonArray* comp_array) +{ + INIT_SUCC(ret); + ObArray schema_vec_stk; + schema_vec_stk.set_block_size(SCHEMA_DEFAULT_PAGE_SIZE); + ObJsonNode* type = nullptr; + ObJsonSchemaType schema_type; + ObJsonSubSchemaKeywords key_words; + // record keywords that have subschema + key_words.flags_ = 0; + + if (cur_schema_stk_.size() < 1 || OB_ISNULL(schema_map_) || OB_ISNULL(allocator_) + || OB_ISNULL(origin_schema) || (is_composition && OB_ISNULL(comp_array))) { + ret = OB_BAD_NULL_ERROR; + LOG_WARN("shouldn't be null.", K(ret), K(cur_schema_stk_.size()), KPC(schema_map_)); + } else if (OB_FAIL(get_schema_vec(schema_vec_stk, is_composition))) { + // all schema add to schema_vec: + // if in composition, add to composition_vec, when check, just record result + // if not in composition, add to schema_vec, when check and if illegal, raise error report and stop validation + LOG_WARN("fail to get schema vec.", K(ret)); + // check public schema key words: type + } else if (if_have_ref(origin_schema)) { + if (OB_FAIL(handle_ref_keywords(origin_schema, schema_vec_stk, is_composition, comp_array))) { + LOG_WARN("fail to handle ref.", K(ret)); + } + } else if (OB_FAIL(get_difined_type(origin_schema, schema_vec_stk, schema_type, is_composition, comp_array))) { + LOG_WARN("fail to get schema type.", K(ret)); + // check public schema key words: enum + } else if (schema_type.error_type_ == 1) { + // wrong type, the schema must be false, do not need check other keywords + // but its legal, don't raise error + } else if (OB_FAIL(handle_keywords_with_specific_type(ObJsonSchemaItem::ENUM, ObJsonNodeType::J_ARRAY, + origin_schema, schema_vec_stk, is_composition, comp_array))) { + LOG_WARN("fail to get schema enum.", K(ret)); + // check keywords by defined type, if not define, check each key + } else if (OB_FAIL(check_keywords_by_type(schema_type, origin_schema, schema_vec_stk, key_words, is_composition, comp_array))) { + LOG_WARN("fail to check schema by type.", K(ret)); + } else if (OB_FAIL(ObJsonSchemaUtils::check_if_composition_legal(origin_schema, key_words))) { + LOG_WARN("fail to check if_composition_legal.", K(ret)); + } else if (OB_FALSE_IT(schema_vec_stk.destroy())) { // useless now + } else if (key_words.flags_ != 0 && OB_FAIL(handle_keywords_with_subschemas(key_words, origin_schema, schema_vec_stk, is_composition, comp_array))) { + LOG_WARN("fail to handle key words with subschema.", K(ret)); + } + return ret; +} + +int ObJsonSchemaTree::get_schema_vec(ObIArray &schema_vec_stk, bool is_composition) +{ + INIT_SUCC(ret); + int size = cur_schema_stk_.size(); + ObJsonNode* res_vec = nullptr; + for (int i = 0; OB_SUCC(ret) && i < size; ++i) { + ObJsonObject* cur_schema = cur_schema_stk_.at(i); + ObJsonNode* tmp_json = nullptr; + if (is_composition) { + ObJsonArray* schema_vec = nullptr; + if (OB_ISNULL(tmp_json = cur_schema->get_value(ObJsonSchemaItem::COMPOSITION))) { + if (OB_ISNULL(schema_vec = OB_NEWx(ObJsonArray, allocator_, allocator_))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("fail to alloc composition node.", K(ret)); + } else if (OB_FAIL(cur_schema->add(ObJsonSchemaItem::COMPOSITION, schema_vec, true, false, false))) { + LOG_WARN("fail to add composition node.", K(ret)); + } else { + res_vec = schema_vec; + } + } else if (tmp_json->json_type() == ObJsonNodeType::J_ARRAY) { + res_vec = tmp_json; + } else { + ret = OB_ERR_INVALID_JSON_TYPE; + LOG_WARN("must be array.", K(ret)); + } + } else if (OB_ISNULL(tmp_json = cur_schema->get_value(ObJsonSchemaItem::SCHEMA))) { + ObJsonObject* schema_vec = nullptr; + if (OB_ISNULL(schema_vec = OB_NEWx(ObJsonObject, allocator_, allocator_))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("fail to alloc schema node.", K(ret)); + } else if (OB_FAIL(cur_schema->add(ObJsonSchemaItem::SCHEMA, schema_vec, true, false, false))) { + LOG_WARN("fail to add schema node.", K(ret)); + } else { + res_vec = schema_vec; + } + } else if (tmp_json->json_type() == ObJsonNodeType::J_OBJECT) { + res_vec = tmp_json; + } else { + ret = OB_ERR_INVALID_JSON_TYPE; + LOG_WARN("must be array.", K(ret)); + } + + // the schema need record to all members in schma_vec_stk + if (OB_FAIL(ret)) { + } else if (OB_FAIL(schema_vec_stk.push_back(res_vec))) { + LOG_WARN("fail to push schema_vec node.", K(ret)); + } + } + return ret; +} + +int ObJsonSchemaTree::generate_schema_and_record(const ObString& key_word, + ObJsonNode* value, + ObIArray &schema_vec_stk, + const bool& is_composition, + ObJsonArray* comp_array) +{ + INIT_SUCC(ret); + if (is_composition && OB_FAIL(generate_comp_and_record(key_word, value, schema_vec_stk, comp_array))) { + LOG_WARN("fail to add comp node.", K(ret)); + } else if (!is_composition && OB_FAIL(generate_schema_info(key_word, value, schema_vec_stk))) { + LOG_WARN("fail to add schema node.", K(ret)); + } + return ret; +} + +int ObJsonSchemaTree::generate_schema_info(const ObString& key_word, ObJsonNode* value, ObIArray &schema_vec_stk) +{ + INIT_SUCC(ret); + int size = schema_vec_stk.count(); + for (int i = 0; OB_SUCC(ret) && i < size; ++i) { + ObJsonNode* cur_schema_node = schema_vec_stk.at(i); + ObJsonObject* cur_schema_vec = nullptr; + ObJsonNode* old_key_value = nullptr; + if (cur_schema_node->json_type() != ObJsonNodeType::J_OBJECT) { + ret = OB_ERR_WRONG_VALUE; + LOG_WARN("must be object type.", K(ret), K(i)); + } else if (OB_FALSE_IT(cur_schema_vec = static_cast(cur_schema_node))) { + } else if (OB_ISNULL(old_key_value = cur_schema_vec->get_value(key_word))) { + if (OB_FAIL(cur_schema_vec->add(key_word, value))) { + LOG_WARN("fail to add.", K(ret), K(i)); + } + } else { + int res = 0; + bool update_old_key = false; + if (OB_SUCC(old_key_value->compare(*value, res)) && res == 0) { + // same value, do nothing + } else if (OB_FAIL(union_schema_def(key_word, value, old_key_value, update_old_key))) { + LOG_WARN("fail to get union.", K(key_word), K(ret)); + } else if (update_old_key && OB_FAIL(cur_schema_vec->add(key_word, value, false, true, true))) { + LOG_WARN("fail to update value.", K(key_word), K(ret)); + } + } + } // end for + return ret; +} + +int ObJsonSchemaTree::union_type(ObJsonNode*& new_value, ObJsonNode* old_value, bool& update_old_key) +{ + INIT_SUCC(ret); + if (new_value->json_type() != ObJsonNodeType::J_UINT || old_value->json_type() != ObJsonNodeType::J_UINT) { + ret = OB_ERR_WRONG_VALUE; + LOG_WARN("must be array type.", K(ret)); + } else { + ObJsonSchemaType old_val; + old_val.flags_ = old_value->get_uint(); + ObJsonSchemaType new_val; + new_val.flags_ = new_value->get_uint(); + ObJsonSchemaType final_val; + final_val.flags_ = 0; + if (old_val.error_type_ == 1 || new_val.error_type_ == 1) { + final_val.error_type_ = 1; + } else { + if ((old_val.integer_ == 1 && new_val.integer_) + || (old_val.integer_ == 1 && new_val.number_) + || (old_val.number_ == 1 && new_val.integer_)) { + final_val.integer_ = 1; + } + final_val.null_ = old_val.null_ & new_val.null_; + final_val.boolean_ = old_val.boolean_ & new_val.boolean_; + final_val.string_ = old_val.string_ & new_val.string_; + final_val.number_ = old_val.number_ & new_val.number_; + final_val.object_ = old_val.object_ & new_val.object_; + final_val.array_ = old_val.array_ & new_val.array_; + if (final_val.flags_ == 0) { + final_val.error_type_ = 1; + } + if (final_val.flags_ == 0) { + final_val.error_type_ = 1; + } + } + + // set new type + update_old_key = false; + ObJsonUint* final_value = static_cast(old_value); + final_value->set_value(final_val.flags_); + } + return ret; +} + + +int ObJsonSchemaTree::union_array_key_words_value(ObJsonNode*& new_value, ObJsonNode* old_value, bool& update_old_key, bool get_merge) +{ + INIT_SUCC(ret); + ObSortedVector res; + ObJsonContentCmp cmp; + ObJsonContentUnique unique; + if (new_value->json_type() != ObJsonNodeType::J_ARRAY || old_value->json_type() != ObJsonNodeType::J_ARRAY) { + ret = OB_ERR_WRONG_VALUE; + LOG_WARN("must be array type.", K(ret)); + } else { + ObJsonArray* new_val = static_cast(new_value); + ObJsonArray* old_val = static_cast(old_value); + int old_size = old_val->element_count(); + int new_size = new_val->element_count(); + if (old_size == 0) { + update_old_key = true; + } else if (new_size == 0) { + } else { + for (int i = 0; i < old_size && OB_SUCC(ret); ++i) { + ObSortedVector::iterator pos = res.end(); + ObJsonNode* node = (*old_val)[i]; + if (OB_FAIL(res.insert_unique(node, pos, cmp, unique))) { + if (ret == OB_CONFLICT_VALUE) { + ret = OB_SUCCESS; // confilict means found duplicated nodes, it is not an error. + } + } + } + old_val->clear(); + for (int i = 0; i < new_size && OB_SUCC(ret); ++i) { + ObSortedVector::iterator pos = res.end(); + ObJsonNode* node = (*new_val)[i]; + if (node->json_type() != ObJsonNodeType::J_STRING) { + // ignore + } else if (OB_FAIL(res.insert_unique(node, pos, cmp, unique))) { + if (ret == OB_CONFLICT_VALUE) { + ret = OB_SUCCESS; + if (!get_merge && OB_FAIL(old_val->append(node))) { // get union, only need confict value + LOG_WARN("fail to append.", K(ret)); + } + } + } else if (get_merge && OB_FAIL(old_val->append(node))) { + // get merge, add values that not in old_val + LOG_WARN("fail to append.", K(ret)); + } + } + } + } + return ret; +} + +int ObJsonSchemaTree::union_scalar_key_words_value(ObJsonNode*& new_value, ObJsonNode* old_value, bool& update_old_key) +{ + INIT_SUCC(ret); + if (old_value->json_type() == ObJsonNodeType::J_ARRAY) { + if (OB_FAIL(old_value->array_append(new_value))) { + LOG_WARN("fail to append.", K(ret)); + } + } else { + update_old_key = true; + ObJsonArray* array_val = nullptr; + if (OB_ISNULL(array_val = OB_NEWx(ObJsonArray, allocator_, allocator_))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("fail to alloc array node.", K(ret)); + } else if (OB_FAIL(array_val->array_append(old_value))) { + LOG_WARN("fail to append.", K(ret)); + } else if (OB_FAIL(array_val->array_append(new_value))) { + LOG_WARN("fail to append.", K(ret)); + } else { + new_value = array_val; + } + } + return ret; +} + +int ObJsonSchemaTree::union_add_pro_value(ObJsonNode*& new_value, ObJsonNode* old_value) +{ + INIT_SUCC(ret); + if (new_value->json_type() != ObJsonNodeType::J_ARRAY || old_value->json_type() != ObJsonNodeType::J_ARRAY + || new_value->element_count() != ADDITIONAL_PRO_ARRAY_COUNT || (old_value->element_count()) % ADDITIONAL_PRO_ARRAY_COUNT != 0) { + ret = OB_ERR_WRONG_VALUE; + LOG_WARN("must be object type.", K(ret)); + } else { + int size = new_value->element_count(); + ObJsonArray* new_val = static_cast(new_value); + ObJsonArray* old_val = static_cast(old_value); + for (int i = 0; i < ADDITIONAL_PRO_ARRAY_COUNT && OB_SUCC(ret); ++i) { + ObJsonNode* new_node = (*new_val)[i]; + if (OB_FAIL(old_val->append(new_node))) { + LOG_WARN("fail to get union.", K(ret)); + } + } + } + return ret; +} + +int ObJsonSchemaTree::union_schema_def(const ObString& key_word, ObJsonNode*& value, ObJsonNode* old_key_value, bool& update_old_key) +{ + INIT_SUCC(ret); + int len = key_word.length(); + update_old_key = false; + int res = 0; + switch (len) { + case JS_TYPE_LEN : { // type or enum + if (key_word.compare(ObJsonSchemaItem::TYPE) == 0 && OB_SUCC(union_type(value, old_key_value, update_old_key))) { + } else if (key_word.compare(ObJsonSchemaItem::ENUM) == 0 && OB_SUCC(union_array_key_words_value(value, old_key_value, update_old_key))) { + } else if (key_word.compare(ObJsonSchemaItem::REF) == 0 && OB_SUCC(union_scalar_key_words_value(value, old_key_value, update_old_key))) { + } else if (OB_SUCC(ret)) { + ret = OB_ERR_WRONG_VALUE; + } + break; + } + case JS_STRMAX_LEN : { + if (key_word.compare(ObJsonSchemaItem::MAX_LEN) == 0 && OB_SUCC(old_key_value->compare(*value, res))) { + update_old_key = res > 0 ? true : false; + } else if (key_word.compare(ObJsonSchemaItem::MIN_LEN) == 0 && OB_SUCC(old_key_value->compare(*value, res))) { + update_old_key = res < 0 ? true : false; + } else if (OB_SUCC(ret)) { + ret = OB_ERR_WRONG_VALUE; + } + break; + } + case JS_PATTERN_LEN : { // pattern, maximum, minimun + if (key_word.compare(ObJsonSchemaItem::MAXMUM) == 0 && OB_SUCC(old_key_value->compare(*value, res))) { + update_old_key = res > 0 ? true : false; + } else if (key_word.compare(ObJsonSchemaItem::MINMUM) == 0 && OB_SUCC(old_key_value->compare(*value, res))) { + update_old_key = res < 0 ? true : false; + } else if (key_word.compare(ObJsonSchemaItem::PATTERN) == 0 + && OB_SUCC(union_scalar_key_words_value(value, old_key_value, update_old_key))) { + } else if (OB_SUCC(ret)) { + ret = OB_ERR_WRONG_VALUE; + } + break; + } + case JS_EXCLUSIVE_LEN : { + if (key_word.compare(ObJsonSchemaItem::EXCLUSIVE_MAXMUM) == 0 && OB_SUCC(old_key_value->compare(*value, res))) { + update_old_key = res > 0 ? true : false; + } else if (key_word.compare(ObJsonSchemaItem::EXCLUSIVE_MINMUM) == 0 && OB_SUCC(old_key_value->compare(*value, res))) { + update_old_key = res < 0 ? true : false; + } else if (OB_SUCC(ret)) { + ret = OB_ERR_WRONG_VALUE; + } + break; + } + case JS_MULTIPLE_LEN : { + if (key_word.compare(ObJsonSchemaItem::MULTIPLE_OF) == 0 + && OB_FAIL(union_scalar_key_words_value(value, old_key_value, update_old_key))) { + LOG_WARN("fail to union matiple.", K(ret)); + } + break; + } + case JS_PROMAX_LEN : { + if (key_word.compare(ObJsonSchemaItem::MAX_PROPERTIES) == 0 && OB_SUCC(old_key_value->compare(*value, res))) { + update_old_key = res > 0 ? true : false; + } else if (key_word.compare(ObJsonSchemaItem::MIN_PROPERTIES) == 0 && OB_SUCC(old_key_value->compare(*value, res))) { + update_old_key = res < 0 ? true : false; + } else if (OB_SUCC(ret)) { + ret = OB_ERR_WRONG_VALUE; + } + break; + } + case JS_REQUIRED_LEN : { // required, maxitems, minitems + if (key_word.compare(ObJsonSchemaItem::MAX_ITEMS) == 0 && OB_SUCC(old_key_value->compare(*value, res))) { + update_old_key = res > 0 ? true : false; + } else if (key_word.compare(ObJsonSchemaItem::MIN_ITEMS) == 0 && OB_SUCC(old_key_value->compare(*value, res))) { + update_old_key = res < 0 ? true : false; + } else if (key_word.compare(ObJsonSchemaItem::REQUIRED) == 0 && OB_SUCC(union_array_key_words_value(value, old_key_value, update_old_key, true))) { + } else if (OB_SUCC(ret)) { + ret = OB_ERR_WRONG_VALUE; + } + break; + } + case JS_DEP_REQUIRED_LEN : { + if (key_word.compare(ObJsonSchemaItem::DEPENDENTREQUIRED) == 0 + && OB_FAIL(union_scalar_key_words_value(value, old_key_value, update_old_key))) { + LOG_WARN("fail to union matiple.", K(ret)); + } + break; + } + case JS_ADD_PRO_LEN : { + if (key_word.compare(ObJsonSchemaItem::ADDITIONAL_PRO) == 0 && OB_FAIL(union_add_pro_value(value, old_key_value))) { + LOG_WARN("fail to union additional pro.", K(ret)); + } + break; + } + case JS_ADD_ITEMS_LEN : { + if (key_word.compare(ObJsonSchemaItem::ADDITIONAL_ITEMS) == 0 && OB_SUCC(old_key_value->compare(*value, res))) { + update_old_key = res > 0 ? true : false; + } else if (OB_SUCC(ret)) { + ret = OB_ERR_WRONG_VALUE; + } + break; + } + default: { + ret = OB_ERR_WRONG_VALUE; + LOG_WARN("wrong type.", K(ret)); + } + } + return ret; +} + + +int ObJsonSchemaTree::generate_comp_and_record(const ObString& key_word, + ObJsonNode* value, + ObIArray &schema_vec_stk, + ObJsonArray* comp_array) +{ + INIT_SUCC(ret); + ObJsonObject* key_word_schema = nullptr; + int size = schema_vec_stk.count(); + if (OB_ISNULL(key_word_schema = OB_NEWx(ObJsonObject, allocator_, allocator_))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("fail to alloc key_word_schema node.", K(ret)); + } else if (OB_FAIL(key_word_schema->add(key_word, value, true, false, false))) { + LOG_WARN("fail to add schema node.", K(key_word), K(ret)); + } + for (int i = 0; OB_SUCC(ret) && i < size; ++i) { + ObJsonNode* cur_schema_node = schema_vec_stk.at(i); + ObJsonArray* cur_schema_vec = nullptr; + ObJsonNode* record_schema = nullptr; + ObJsonInt* record_schema_idx = nullptr; + if (i == 0) { + record_schema = key_word_schema; + } else { + record_schema = typeless_; + } + if (cur_schema_node->json_type() != ObJsonNodeType::J_ARRAY) { + ret = OB_ERR_WRONG_VALUE; + LOG_WARN("must be array type.", K(ret), K(i)); + } else if (OB_FALSE_IT(cur_schema_vec = static_cast(cur_schema_node))) { + } else if (OB_FAIL(schema_map_->append(record_schema))) { // append type to schema_map_ + LOG_WARN("fail to push schema map.", K(ret)); + } else if (OB_ISNULL(record_schema_idx = OB_NEWx(ObJsonInt, allocator_, serial_num_))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN( "fail to alloc memory for array json node", K(ret)); + } else if (OB_FAIL(cur_schema_vec->append(record_schema_idx))) { // append to cur_schema_vec + LOG_WARN("fail to push into schema_vec.", K(ret)); + } else if (OB_ISNULL(record_schema_idx = OB_NEWx(ObJsonInt, allocator_, serial_num_))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN( "fail to alloc memory for array json node", K(ret)); + } else if (OB_FAIL(comp_array->append(record_schema_idx))) { + LOG_WARN("fail to push composition array.", K(ret)); + } else { + // num of schema_map++ + ++serial_num_; + } + } + return ret; +} + +int ObJsonSchemaTree::handle_keywords_with_specific_type(const ObString& key_word, + const ObJsonNodeType& expect_type, + ObJsonObject* origin_schema, + ObIArray &schema_vec_stk, + const bool& is_composition, + ObJsonArray* comp_array) +{ + INIT_SUCC(ret); + ObJsonNode* node = origin_schema->get_value(key_word); + if (OB_ISNULL(node)) { + // didn't define, its normal + } else if (node->element_count() == 0) { + } else if (node->json_type() == expect_type) { + if (OB_FAIL(generate_schema_and_record(key_word, node, schema_vec_stk, is_composition, comp_array))) { + LOG_WARN( "fail to add type schema", K(key_word), K(ret)); + } + } /*else if (lib::is_oracle_mode()) { + if implement oracle json schema, check each keyword and its expect type, because: + a.in mysql mode: + 1. if the keywords and its expect_type didn't match, mysql would ignore the keyword; + 2. but if a composition/item keywords is array type, but its children aren't object, mysql would coredump; + (in this situation, raise error anyway) + b.in oracle mode: + 1. if the keywords and its expect_type didn't match, oracle would return error "invalid JSON schema document"; + c.but if an objects which key doesn't match key_words, both oracle and mysql would ignore. + }*/ + return ret; +} + +int ObJsonSchemaTree::handle_positive_int_keywords(const ObString& key_word, ObJsonObject* origin_schema, + ObIArray &schema_vec_stk, + const bool& is_composition, ObJsonArray* comp_array) +{ + INIT_SUCC(ret); + ObJsonNode* node = origin_schema->get_value(key_word); + if (OB_ISNULL(node)) { + // didn't define, its normal + } else if (node->element_count() == 0) { + } else if (node->json_type() == ObJsonNodeType::J_INT) { + int num = node->get_int(); + if (num < 0) { // illegal value, ignore and not take effect + } else if (OB_FAIL(generate_schema_and_record(key_word, node, schema_vec_stk, is_composition, comp_array))) { + LOG_WARN( "fail to add type schema", K(key_word), K(ret)); + } + } + return ret; +} + +int ObJsonSchemaTree::handle_keywords_with_number_value(const ObString& key_word, + ObJsonObject* origin_schema, + ObIArray &schema_vec_stk, + const bool& is_composition, + ObJsonArray* comp_array, + bool must_be_positive /*= false*/) +{ + INIT_SUCC(ret); + ObJsonNode* node = origin_schema->get_value(key_word); + if (OB_ISNULL(node)) { + // didn't define, its normal + } else if (node->is_number()) { + bool is_valid = true; + if (must_be_positive) { + double val = 0.0; + if (OB_FAIL(ObJsonSchemaUtils::get_json_number(node, val))) { + LOG_WARN( "fail to get num", K(node->json_type()), K(val)); + } else if (val < 0) { // illegal value, ignore and not take effect + is_valid = false; + } + } + if (OB_FAIL(ret)) { + } else if (is_valid && OB_FAIL(generate_schema_and_record(key_word, node, schema_vec_stk, is_composition, comp_array))) { + LOG_WARN( "fail to add type schema", K(key_word), K(ret)); + } + } + return ret; +} + +int ObJsonSchemaUtils::set_type_by_string(const ObString& str, ObJsonSchemaType& s_type) +{ + INIT_SUCC(ret); + if (str.length() < strlen(ObJsonSchemaItem::TYPE_NULL)) { + s_type.error_type_ = 1; + } else { + switch (str[0]) { + case 's': { + if (str.compare(ObJsonSchemaItem::TYPE_STRING) == 0) { + s_type.string_ = 1; + } else { + s_type.error_type_ = 1; + } + break; + } + case 'n': { + if (str.compare(ObJsonSchemaItem::TYPE_NULL) == 0) { + s_type.null_ = 1; + } else if (str.compare(ObJsonSchemaItem::TYPE_NUMBER) == 0) { + s_type.number_ = 1; + } else { + s_type.error_type_ = 1; + } + break; + } + case 'i': { + if (str.compare(ObJsonSchemaItem::TYPE_INTEGER) == 0) { + s_type.integer_ = 1; + } else { + s_type.error_type_ = 1; + } + break; + } + case 'b': { + if (str.compare(ObJsonSchemaItem::TYPE_BOOLEAN) == 0) { + s_type.boolean_ = 1; + } else { + s_type.error_type_ = 1; + } + break; + } + case 'o': { + if (str.compare(ObJsonSchemaItem::TYPE_OBJECT) == 0) { + s_type.object_ = 1; + } else { + s_type.error_type_ = 1; + } + break; + } + case 'a': { + if (str.compare(ObJsonSchemaItem::TYPE_ARRAY) == 0) { + s_type.array_ = 1; + } else { + s_type.error_type_ = 1; + } + break; + } + default : { + s_type.error_type_ = 1; + break; + } + } + } + return ret; +} + +int ObJsonSchemaTree::get_difined_type(ObJsonObject* origin_schema, + ObIArray &schema_vec_stk, + ObJsonSchemaType& s_type, + const bool& is_composition, + ObJsonArray* comp_array) +{ + INIT_SUCC(ret); + s_type.flags_ = 0; + ObJsonNode* node = origin_schema->get_value(ObJsonSchemaItem::TYPE); + if (OB_ISNULL(node)) { + } else if (node->json_type() == ObJsonNodeType::J_STRING) { + ObJsonString* j_str = static_cast(node); + ObString str = j_str->get_str(); + if (OB_FAIL(ObJsonSchemaUtils::set_type_by_string(str, s_type))) { + LOG_WARN( "fail to get type", K(str), K(ret)); + } + } else if (node->json_type() == ObJsonNodeType::J_ARRAY) { + int array_size = node->element_count(); + for (int i = 0; i < array_size && OB_SUCC(ret); ++i) { + ObIJsonBase* tmp_node = nullptr; + if (OB_FAIL(node->get_array_element(i, tmp_node))) { + LOG_WARN( "fail to get node", K(i), K(array_size), K(ret)); + } else if (tmp_node->json_type() == ObJsonNodeType::J_STRING) { + ObString str(tmp_node->get_data_length(), tmp_node->get_data()); + if (OB_FAIL(ObJsonSchemaUtils::set_type_by_string(str, s_type))) { + LOG_WARN( "fail to get type", K(i), K(array_size), K(ret)); + } else if (s_type.error_type_ == 1) { + s_type.error_type_ = 0; + } + } // not string, ignore + } + if (OB_SUCC(ret) && s_type.flags_ == 0) { + s_type.error_type_ = 1; + } + } else { // key word "type" is special, if defined type, it must be checked, even is meaningless definition(such as wrong type or error string); + s_type.error_type_ = 1; + } + + if (OB_SUCC(ret) && s_type.flags_ != 0) { + ObJsonUint* type_value = nullptr; + if (OB_ISNULL(type_value = OB_NEWx(ObJsonUint, allocator_, s_type.flags_))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN( "fail to alloc memory for array json node", K(ret)); + } else if (OB_FAIL(generate_schema_and_record(ObJsonSchemaItem::TYPE, type_value, + schema_vec_stk, is_composition, comp_array))) { + LOG_WARN( "fail to add type schema", K(ret)); + } + } + return ret; +} + +// Previously to Draft 2019-09, dependentRequired and dependentSchemas were one keyword called dependencies. +// If the dependency value was an array, it would behave like dependentRequired. +// If the dependency value was a object, it would behave like dependentSchemas. +int ObJsonSchemaTree::get_dep_schema_if_defined(ObJsonObject* json_schema, + ObIArray &schema_vec_stk, + ObJsonSubSchemaKeywords& key_words, + const bool& is_composition, + ObJsonArray* comp_array) +{ + INIT_SUCC(ret); + ObJsonNode* node = json_schema->get_value(ObJsonSchemaItem::DEPENDENCIES); + if (OB_ISNULL(node)) { + // didn't define, its normal + } else if (node->json_type() == ObJsonNodeType::J_OBJECT && node->element_count() > 0) { + json_schema = static_cast(node); + ObJsonObject* deps_require_node = nullptr; + int count_schema_required = 0; + for (int i = 0; OB_SUCC(ret) && i < json_schema->element_count() && i >= 0; ++i) { + ObString key; + ObJsonNode* value = nullptr; + ObJsonObject* origin_schema = nullptr; + if (OB_FAIL(json_schema->get_value_by_idx(i, key, value))) { + LOG_WARN("fail to get key-value.", K(i), K(json_schema->element_count()), K(ret)); + } else if (key_words.dep_schema_ == 0 && value->json_type() == ObJsonNodeType::J_OBJECT && value->element_count() > 0) { + // value is not subschema, ignore in mysql, raise error in oracle + key_words.dep_schema_ = 1; + } else if (value->json_type() == ObJsonNodeType::J_ARRAY && value->element_count() > 0) { + if (OB_ISNULL(deps_require_node) + && OB_ISNULL(deps_require_node = OB_NEWx(ObJsonObject, allocator_, allocator_))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + } else if (OB_FAIL(deps_require_node->add(key, value, true, true))) { + LOG_WARN("fail to add key-value.", K(key), K(ret)); + } else if (OB_FAIL(json_schema->remove(key))) { + LOG_WARN("fail to remove origin key-value.", K(key), K(ret)); + } else { + ++count_schema_required; + --i; + } + } + } + + if (OB_FAIL(ret)) { + } else if (count_schema_required > 0 + && OB_FAIL(generate_schema_and_record(ObJsonSchemaItem::DEPENDENTREQUIRED, deps_require_node, + schema_vec_stk, is_composition, comp_array))) { + LOG_WARN( "fail to add type schema", K(ret)); + } + } + return ret; +} + +int ObJsonSchemaTree::check_keywords_by_type(const ObJsonSchemaType& schema_type, + ObJsonObject* origin_schema, + ObIArray &schema_vec_stk, + ObJsonSubSchemaKeywords& key_words, + const bool& is_composition, + ObJsonArray* comp_array) +{ + INIT_SUCC(ret); + if (schema_type.flags_ == 0) { + if (OB_FAIL(check_keywords_of_string(origin_schema, schema_vec_stk, is_composition, comp_array)) + || OB_FAIL(check_keywords_of_number(origin_schema, schema_vec_stk, is_composition, comp_array)) + || OB_FAIL(check_keywords_of_object(origin_schema, schema_vec_stk, key_words, is_composition, comp_array)) + || OB_FAIL(check_keywords_of_array(origin_schema, schema_vec_stk, key_words, is_composition, comp_array))) { + LOG_WARN("fail to get check keywords", K(ret)); + } + } else { + if (OB_SUCC(ret) && schema_type.string_ == 1) { + ret = check_keywords_of_string(origin_schema, schema_vec_stk, is_composition, comp_array); + } + if (OB_SUCC(ret) && (schema_type.number_ == 1 || schema_type.integer_ == 1)) { + ret = check_keywords_of_number(origin_schema, schema_vec_stk, is_composition, comp_array); + } + if (OB_SUCC(ret) && schema_type.object_ == 1) { + ret = check_keywords_of_object(origin_schema, schema_vec_stk, key_words, is_composition, comp_array); + } + if (OB_SUCC(ret) && schema_type.array_ == 1) { + ret = check_keywords_of_array(origin_schema, schema_vec_stk, key_words, is_composition, comp_array); + } + } + return ret; +} + +int ObJsonSchemaTree::check_keywords_of_string(ObJsonObject* origin_schema, + ObIArray &schema_vec_stk, + const bool& is_composition, + ObJsonArray* comp_array) +{ + INIT_SUCC(ret); + if (OB_FAIL(handle_positive_int_keywords(ObJsonSchemaItem::MAX_LEN, origin_schema, schema_vec_stk, is_composition, comp_array)) + || OB_FAIL(handle_positive_int_keywords(ObJsonSchemaItem::MIN_LEN, origin_schema, schema_vec_stk, is_composition, comp_array))) { + LOG_WARN( "fail to add type max/min length", K(ret)); + } else { + ObJsonNode* node = origin_schema->get_value(ObJsonSchemaItem::PATTERN); + if (OB_ISNULL(node)) { + // didn't define, its normal + } else if (node->element_count() == 0) { + } else if (node->json_type() == ObJsonNodeType::J_STRING) { + ObString pattern = ObString(node->get_data_length(), node->get_data()); + bool valid_pattern = false; + if (OB_FAIL(ObJsonSchemaUtils::is_valid_pattern(pattern, str_buf_, valid_pattern))) { + } else if (!valid_pattern) { + // invalid, do not record + } else if (OB_FAIL(generate_schema_and_record(ObJsonSchemaItem::PATTERN, node, schema_vec_stk, is_composition, comp_array))) { + LOG_WARN( "fail to add type schema", K(ret)); + } + } + } + return ret; +} + +int ObJsonSchemaTree::check_keywords_of_number(ObJsonObject* origin_schema, + ObIArray &schema_vec_stk, + const bool& is_composition, + ObJsonArray* comp_array) +{ + INIT_SUCC(ret); + ObJsonNode* max = origin_schema->get_value(ObJsonSchemaItem::MAXMUM); + ObJsonNode* min = origin_schema->get_value(ObJsonSchemaItem::MINMUM); + // for json_schema draft 4, exclusive_maxmum is boolean + // so, if didn't define maxmun, even there is exclusive_maxmum definition, the definition is meaningless + if (OB_NOT_NULL(max) && max->is_number()) { + ObJsonNode* exclusive_max = nullptr; + if (OB_NOT_NULL(exclusive_max = origin_schema->get_value(ObJsonSchemaItem::EXCLUSIVE_MAXMUM)) + && exclusive_max->json_type() == ObJsonNodeType::J_BOOLEAN + && exclusive_max->get_boolean() == true) { + if (OB_FAIL(generate_schema_and_record(ObJsonSchemaItem::EXCLUSIVE_MAXMUM, max, schema_vec_stk, is_composition, comp_array))) { + LOG_WARN( "fail to add type schema", K(ret)); + } + } else if (OB_FAIL(generate_schema_and_record(ObJsonSchemaItem::MAXMUM, max, schema_vec_stk, is_composition, comp_array))) { + LOG_WARN( "fail to add type schema", K(ret)); + } + } + + if (OB_SUCC(ret) && OB_NOT_NULL(min) && min->is_number()) { + ObJsonNode* exclusive_min = nullptr; + if (OB_NOT_NULL(exclusive_min = origin_schema->get_value(ObJsonSchemaItem::EXCLUSIVE_MINMUM)) + && exclusive_min->json_type() == ObJsonNodeType::J_BOOLEAN + && exclusive_min->get_boolean() == true) { + if (OB_FAIL(generate_schema_and_record(ObJsonSchemaItem::EXCLUSIVE_MINMUM, min, schema_vec_stk, is_composition, comp_array))) { + LOG_WARN( "fail to add type schema", K(ret)); + } + } else if (OB_FAIL(generate_schema_and_record(ObJsonSchemaItem::MINMUM, min, schema_vec_stk, is_composition, comp_array))) { + LOG_WARN( "fail to add type schema", K(ret)); + } + } + + if (OB_SUCC(ret) && OB_FAIL(handle_keywords_with_number_value(ObJsonSchemaItem::MULTIPLE_OF, + origin_schema, schema_vec_stk, is_composition, comp_array, true))) { + LOG_WARN( "fail to add type maxmum/minmum/mutiple", K(ret)); + } + return ret; +} + +int ObJsonSchemaTree::check_keywords_of_object(ObJsonObject* origin_schema, + ObIArray &schema_vec_stk, + ObJsonSubSchemaKeywords& key_words, + const bool& is_composition, + ObJsonArray* comp_array) +{ + INIT_SUCC(ret); + if (OB_FAIL(handle_positive_int_keywords(ObJsonSchemaItem::MAX_PROPERTIES, origin_schema, + schema_vec_stk, is_composition, comp_array)) + || OB_FAIL(handle_positive_int_keywords(ObJsonSchemaItem::MIN_PROPERTIES, origin_schema, + schema_vec_stk, is_composition, comp_array))) { + LOG_WARN( "fail to add type max/min properties", K(ret)); + } else if (OB_FAIL(get_dep_schema_if_defined(origin_schema, schema_vec_stk, key_words, + is_composition, comp_array))) { + LOG_WARN("fail to get schema dependencies.", K(ret)); + // in mysql mode, required could be anytype, but ignore the values if not string + // but in oracle mode, it must be string, or else is illegal, should raise error + // todo: oracle mode adaptation + } else if (OB_FAIL(handle_keywords_with_specific_type(ObJsonSchemaItem::REQUIRED, + ObJsonNodeType::J_ARRAY, + origin_schema, schema_vec_stk, + is_composition, comp_array))) { + LOG_WARN( "fail to add type required", K(ret)); + } + // property with sub_schema, just record, deal with it later + if (OB_SUCC(ret)) { + ObJsonNode* node = nullptr; + if (OB_NOT_NULL(node = origin_schema->get_value(ObJsonSchemaItem::PROPERTIES)) + && node->json_type() == ObJsonNodeType::J_OBJECT && node->element_count() > 0) { + key_words.properties_ = 1; + } + if (OB_NOT_NULL(node = origin_schema->get_value(ObJsonSchemaItem::PATTERN_PRO)) + && node->json_type() == ObJsonNodeType::J_OBJECT && node->element_count() > 0) { + key_words.pattern_pro_ = 1; + } + if (OB_NOT_NULL(node = origin_schema->get_value(ObJsonSchemaItem::ADDITIONAL_PRO))) { + if (node->json_type() == ObJsonNodeType::J_BOOLEAN) { + ObJsonArray* add_array = nullptr; + if (node->get_boolean()) { + } else if (OB_FAIL(get_addition_pro_value(key_words, origin_schema, add_array))) { + LOG_WARN( "fail to add type add_pro", K(key_words), K(ret)); + } else if (OB_FAIL(generate_schema_and_record(ObJsonSchemaItem::ADDITIONAL_PRO, add_array, + schema_vec_stk, is_composition, comp_array))) { + LOG_WARN( "fail to add additonal schema", K(key_words), K(ret)); + } + } else if (node->json_type() == ObJsonNodeType::J_OBJECT && node->element_count() > 0) { + // property with sub_schema, just record, deal with it later + key_words.additional_pro_ = 1; + } + } + } + return ret; +} + + +int ObJsonSchemaTree::add_required_key(ObJsonNode* pro, ObJsonNode* required, ObJsonArray* pro_key_array) +{ + INIT_SUCC(ret); + if (OB_ISNULL(pro_key_array) || OB_ISNULL(required) || required->json_type() != ObJsonNodeType::J_ARRAY) { + // didn't define, its normal, do not need to add, ignore + } else if (OB_ISNULL(pro)) { + ObJsonArray* array_node = static_cast(required); + int size = array_node->element_count(); + for (int i = 0; i < size && OB_SUCC(ret); ++i) { + ObJsonNode* tmp_node = (*array_node)[i]; + bool valid_pattern = false; + if (OB_ISNULL(tmp_node) || tmp_node->json_type() != ObJsonNodeType::J_STRING) { + // not string, ignore + } else { + ObJsonString* tmp_str = static_cast(tmp_node); + ObJsonString* str_node = nullptr; + if (OB_ISNULL(str_node = OB_NEWx(ObJsonString, allocator_, tmp_str->get_str()))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("alloc failed.", K(i), K(size), K(ret)); + } else if (OB_FAIL(pro_key_array->append(str_node))) { + LOG_WARN("append failed.", K(i), K(size), K(ret)); + } + } + } + } else if (pro->json_type() == ObJsonNodeType::J_OBJECT) { + ObJsonArray* array_node = static_cast(required); + ObJsonObject* object_node = static_cast(pro); + int size = array_node->element_count(); + for (int i = 0; i < size && OB_SUCC(ret); ++i) { + ObJsonNode* tmp_node = (*array_node)[i]; + bool valid_pattern = false; + if (OB_ISNULL(tmp_node) || tmp_node->json_type() != ObJsonNodeType::J_STRING) { + // not string, ignore + } else { + ObJsonString* str_node = static_cast(tmp_node); + if (OB_FAIL(object_node->add(str_node->get_str(), typeless_, true, false, false, true))) { + if (ret == OB_ERR_DUPLICATE_KEY) { + ret = OB_SUCCESS; + } // ignore dup key + } + } + } // add required key definition into properties + } + return ret; +} + +/* + mysql adaptation, bugfix: 53161405 + in oracle mode and standard json schema, the keyword additionalProperties is relative properties and patternProperties. + other properties are both additionalProperties, when additionalProperties is false, these definition are illegal. + but in mysql mode, properties (string type) defined in the required keyword are also considered legal definition. +*/ +int ObJsonSchemaTree::get_addition_pro_value(const ObJsonSubSchemaKeywords& key_words, ObJsonObject* origin_schema, ObJsonArray*& add_array, bool check_pattern) +{ + INIT_SUCC(ret); + add_array = nullptr; + ObJsonArray* pro_key_array = nullptr; + ObJsonArray* pattern_key_array = nullptr; + ObJsonNode* dep_node = origin_schema->get_value(ObJsonSchemaItem::DEPENDENCIES); + ObJsonNode* required_node = lib::is_mysql_mode() ? origin_schema->get_value(ObJsonSchemaItem::REQUIRED) : nullptr; + if (OB_ISNULL(add_array = OB_NEWx(ObJsonArray, allocator_, allocator_)) + || OB_ISNULL(pro_key_array = OB_NEWx(ObJsonArray, allocator_, allocator_)) + || OB_ISNULL(pattern_key_array = OB_NEWx(ObJsonArray, allocator_, allocator_))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("alloc comp_array failed.", K(ret)); + } else if (OB_NOT_NULL(required_node) && required_node->json_type() != ObJsonNodeType::J_ARRAY + && OB_FALSE_IT(required_node = nullptr)) { + } else if (OB_NOT_NULL(dep_node) && dep_node->json_type() != ObJsonNodeType::J_OBJECT + && OB_FALSE_IT(dep_node = nullptr)) { + } else if (key_words.properties_ == 0 && OB_ISNULL(dep_node) && OB_NOT_NULL(required_node)) { + if (OB_FAIL(add_required_key(nullptr, required_node, pro_key_array))) { + LOG_WARN("add required key failed.", K(ret)); + } + } else if ((key_words.properties_ == 1 && OB_ISNULL(dep_node)) + || (OB_NOT_NULL(dep_node) && key_words.properties_ == 0)) { + // if properties and dependencies only defined one + ObJsonNode* node = (dep_node == nullptr) ? origin_schema->get_value(ObJsonSchemaItem::PROPERTIES) : dep_node; + if (OB_FAIL(add_required_key(node, required_node, pro_key_array))) { + LOG_WARN("add required key failed.", K(ret)); + } else if (OB_FAIL(ObJsonSchemaUtils::collect_key(node, allocator_, pro_key_array, str_buf_))) { + LOG_WARN("add key failed.", K(ret)); + } + } else if (key_words.properties_ == 1 && OB_NOT_NULL(dep_node)) { + // if defined properties and dependencies at the same time + ObJsonNode* pro_node = origin_schema->get_value(ObJsonSchemaItem::PROPERTIES); + if (OB_FAIL(add_required_key(pro_node, required_node, pro_key_array))) { + LOG_WARN("add required key failed.", K(ret)); + } else if (OB_FAIL(ObJsonSchemaUtils::collect_key(pro_node, allocator_, pro_key_array, str_buf_))) { + LOG_WARN("add key failed.", K(ret)); + } else { + ObJsonObject* dep_obj = static_cast(dep_node); + ObJsonObject* pro_obj = static_cast(pro_node); + int size = dep_obj->element_count(); + for (int i = 0; i < size && OB_SUCC(ret); ++i) { + ObJsonNode* tmp_node = nullptr; + ObString key; + if (OB_FAIL(dep_obj->get_key_by_idx(i, key))) { + LOG_WARN("get key failed.", K(size), K(i), K(key), K(ret)); + } else if (OB_NOT_NULL(tmp_node = pro_obj->get_value(key))) { // duplicate key, do nothing + } else { + ObJsonString* str_node = nullptr; + if (OB_ISNULL(str_node = OB_NEWx(ObJsonString, allocator_, key))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("alloc failed.", K(size), K(i), K(key), K(ret)); + } else if (OB_FAIL(pro_key_array->append(str_node))) { + LOG_WARN("append failed.", K(size), K(i), K(key), K(ret)); + } + } + } // add dep key + } // collect pro key first + } + + // add pattern node + if (OB_FAIL(ret)) { + } else if (key_words.pattern_pro_ == 1) { + ObJsonNode* patter_node = origin_schema->get_value(ObJsonSchemaItem::PATTERN_PRO); + if (OB_FAIL(ObJsonSchemaUtils::collect_key(patter_node, allocator_, pattern_key_array, str_buf_, check_pattern))) { + LOG_WARN("add key failed.", K(ret)); + } + } + + if (OB_FAIL(ret)) { + } else if (OB_FAIL(add_array->append(pro_key_array))) { + LOG_WARN("append failed.", K(ret)); + } else if (OB_FAIL(add_array->append(pattern_key_array))) { + LOG_WARN("append failed.", K(ret)); + } + return ret; +} + +int ObJsonSchemaTree::check_keywords_of_array(ObJsonObject* origin_schema, + ObIArray &schema_vec_stk, + ObJsonSubSchemaKeywords& key_words, + const bool& is_composition, + ObJsonArray* comp_array) +{ + INIT_SUCC(ret); + ObJsonNode* node = nullptr; + int tuple_items_size = 0; + if (OB_FAIL(handle_positive_int_keywords(ObJsonSchemaItem::MAX_ITEMS, origin_schema, + schema_vec_stk, is_composition, comp_array)) + || OB_FAIL(handle_positive_int_keywords(ObJsonSchemaItem::MIN_ITEMS, origin_schema, + schema_vec_stk, is_composition, comp_array))) { + LOG_WARN( "fail to add type max/min items", K(ret)); + } else if (OB_NOT_NULL(node = origin_schema->get_value(ObJsonSchemaItem::ITEMS))) { + // property with sub_schema, just record, deal with it later + if (node->json_type() == ObJsonNodeType::J_OBJECT && node->element_count() > 0) { + // list item, valid for all element in array + key_words.items_ = 1; + } else if (node->json_type() == ObJsonNodeType::J_ARRAY && node->element_count() > 0) { + if (OB_FAIL(ObJsonSchemaUtils::is_all_children_subschema(node))) { + // tuple items, each element has its own schema, check if all schema element + // if not, raise error + LOG_WARN("illegal subschema in item array", K(ret)); + } else { + key_words.tuple_items_ = 1; + tuple_items_size = node->element_count(); + } + } // not object or array, ignore in mysql, raise error in oracle + } // check keyword: items + + if (OB_FAIL(ret)) { + } else if (key_words.tuple_items_ != 1) { + // if not tuple items, all items in array shoud obey item schema, there is no additonal items + // so, the key_word: additional_items is meaningless, ignore it + key_words.additional_items_ = 0; + } else if (OB_NOT_NULL(node = origin_schema->get_value(ObJsonSchemaItem::ADDITIONAL_ITEMS))) { + if (node->json_type() == ObJsonNodeType::J_BOOLEAN) { + // if ADDITIONAL_ITEMS == true, do not need check + ObJsonInt* tuple_size = nullptr; + if (node->get_boolean()) { + } else if (OB_ISNULL(tuple_size = OB_NEWx(ObJsonInt, allocator_, tuple_items_size))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("fail to init schema tree.", K(ret)); + } else if (OB_FAIL(generate_schema_and_record(ObJsonSchemaItem::ADDITIONAL_ITEMS, tuple_size, + schema_vec_stk, is_composition, comp_array))) { + LOG_WARN( "fail to add additonal schema", K(ret)); + } + } else if (node->json_type() == ObJsonNodeType::J_OBJECT && node->element_count() > 0) { + // property with sub_schema, just record, deal with it later + key_words.additional_items_ = 1; + } + } + + if (OB_FAIL(ret)) { + } else if (OB_NOT_NULL(node = origin_schema->get_value(ObJsonSchemaItem::UNIQUE_ITEMS))) { + if (node->json_type() == ObJsonNodeType::J_BOOLEAN && node->get_boolean()) { + if (OB_FAIL(generate_schema_and_record(ObJsonSchemaItem::UNIQUE_ITEMS, node, schema_vec_stk, is_composition, comp_array))) { + LOG_WARN( "fail to add type schema", K(ret)); + } + } + } + return ret; +} + +int ObJsonSchemaTree::handle_keywords_with_subschemas(ObJsonSubSchemaKeywords& key_words, ObJsonObject* json_schema, + ObIArray &schema_vec_stk, + bool is_composition, ObJsonArray* comp_array) +{ + INIT_SUCC(ret); + ObArray pro_array; + pro_array.set_block_size(SCHEMA_DEFAULT_PAGE_SIZE); + + // deal with property + ObJsonObject* pro_schema = nullptr; + if (key_words.properties_ == 1) { + pro_schema = json_schema; + if (OB_FAIL(handle_properties(pro_schema, is_composition, comp_array, pro_array))) { + LOG_WARN("fail to handle properties", K(ret)); + } + } + if (OB_SUCC(ret) && key_words.pattern_pro_ == 1) { + if (OB_FAIL(handle_pattern_properties(json_schema, pro_schema, is_composition, comp_array, pro_array))) { + LOG_WARN("fail to handle properties", K(ret)); + } + } + if (OB_SUCC(ret) && key_words.additional_pro_ == 1) { + if (OB_FAIL(handle_additional_properties(key_words, json_schema, is_composition, comp_array))) { + LOG_WARN("fail to handle properties", K(ret)); + } + } + pro_array.destroy(); + // the key_words.items_ and key_words.tuple_items_ wouldn't be true at the same time + if (OB_FAIL(ret)) { + } else if (key_words.items_ == 1) { + if (OB_FAIL(handle_array_schema(json_schema, is_composition, comp_array, false))) { + LOG_WARN("fail to handle items", K(ret)); + } + } else if (key_words.tuple_items_ == 1) { + if (OB_FAIL(handle_array_tuple_schema(json_schema, is_composition, comp_array))) { + LOG_WARN("fail to handle tuple items", K(ret)); + } else if (key_words.additional_items_ == 1 + && OB_FAIL(handle_array_schema(json_schema, is_composition, comp_array, true))) { + LOG_WARN("fail to handle additional items", K(ret)); + } + } + if (OB_SUCC(ret) && key_words.dep_schema_ == 1) { + if (!is_composition + && OB_FAIL(handle_unnested_dependencies(json_schema))) { + LOG_WARN("fail to handle unnested_dependent_schemas.", K(ret)); + } else if (is_composition + && OB_FAIL(handle_nested_dependencies(json_schema, comp_array))) { + LOG_WARN("fail to handle nested_dependent_schemas.", K(ret)); + } + } + if (OB_SUCC(ret) && key_words.all_of_ == 1) { + if (!is_composition + && OB_FAIL(handle_unnested_composition(ObJsonSchemaItem::ALLOF, json_schema))) { + LOG_WARN("fail to handle unnested_composition_allOf.", K(ret)); + } else if (is_composition + && OB_FAIL(handle_nested_composition(ObJsonSchemaItem::ALLOF, json_schema, comp_array))) { + LOG_WARN("fail to handle nested_composition_allOf.", K(ret)); + } + } + if (OB_SUCC(ret) && key_words.any_of_ == 1) { + if (!is_composition + && OB_FAIL(handle_unnested_composition(ObJsonSchemaItem::ANYOF, json_schema))) { + LOG_WARN("fail to handle unnested_composition_anyOf.", K(ret)); + } else if (is_composition + && OB_FAIL(handle_nested_composition(ObJsonSchemaItem::ANYOF, json_schema, comp_array))) { + LOG_WARN("fail to handle nested_composition_anyOf.", K(ret)); + } + } + if (OB_SUCC(ret) && key_words.one_of_ == 1) { + if (!is_composition + && OB_FAIL(handle_unnested_composition(ObJsonSchemaItem::ONEOF, json_schema))) { + LOG_WARN("fail to handle unnested_composition_oneOf.", K(ret)); + } else if (is_composition + && OB_FAIL(handle_nested_composition(ObJsonSchemaItem::ONEOF, json_schema, comp_array))) { + LOG_WARN("fail to handle nested_composition_oneOf.", K(ret)); + } + } + if (OB_SUCC(ret) && key_words.not_ == 1) { + if (!is_composition + && OB_FAIL(handle_unnested_not(json_schema))) { + LOG_WARN("fail to handle unnested_composition_not.", K(ret)); + } else if (is_composition + && OB_FAIL(handle_nested_not(json_schema, comp_array))) { + LOG_WARN("fail to handle nested_composition_not.", K(ret)); + } + } + return ret; +} + +int ObJsonSchemaTree::handle_unnested_dependencies(ObJsonObject* json_schema) +{ + INIT_SUCC(ret); + ObJsonObject* dep_schema_value = nullptr; + if (OB_FAIL(ObJsonSchemaUtils::json_doc_move_to_key(ObJsonSchemaItem::DEPENDENCIES, json_schema))) { + LOG_WARN("json doc move to key failed", K(ret)); + } else if (OB_ISNULL(dep_schema_value = OB_NEWx(ObJsonObject, allocator_, allocator_))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("alloc nested_value failed.", K(ret)); + } else { + int key_size = json_schema->element_count(); + ObJsonNode* value = nullptr; + ObJsonArray* comp_array = nullptr; + for (int i = 0; OB_SUCC(ret) && i < key_size; ++i) { + ObString key; + ObJsonObject* origin_schema = nullptr; + if (OB_FAIL(json_schema->get_value_by_idx(i, key, value))) { + LOG_WARN("fail to get key-value.", K(i), K(ret)); + } else if (value->json_type() != ObJsonNodeType::J_OBJECT) { + // value is not subschema, ignore in mysql, raise error in oracle + } else if (OB_FALSE_IT(origin_schema = static_cast(value))) { + } else if (OB_ISNULL(comp_array = OB_NEWx(ObJsonArray, allocator_, allocator_))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("alloc comp_array failed.", K(i), K(ret)); + } else if (OB_FAIL(inner_build_schema_tree(origin_schema, true, comp_array))) { + LOG_WARN("recursive failed. ", K(i), K(ret)); + } else if (OB_FAIL(dep_schema_value->add(key, comp_array, true, true, false))) { + LOG_WARN("fail to add dep_schema_value.", K(i), K(ret)); + } + } + if (OB_FAIL(ret)) { + } else if (OB_FAIL(json_schema_add_dep_value(dep_schema_value))) { + LOG_WARN("fail to add dependentSchemas.", K(ret)); + } + } + return ret; +} + +int ObJsonSchemaTree::handle_nested_dependencies(ObJsonObject* json_schema, ObJsonArray* comp_array) +{ + INIT_SUCC(ret); + ObJsonObject* dep_key = nullptr; + ObJsonObject* dep_value = nullptr; + if (OB_FAIL(ObJsonSchemaUtils::json_doc_move_to_key(ObJsonSchemaItem::DEPENDENCIES, json_schema))) { + LOG_WARN("json doc move to key failed", K(ret)); + } else if (OB_ISNULL(dep_key = OB_NEWx(ObJsonObject, allocator_, allocator_))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("alloc nested_key failed.", K(ret)); + } else if (OB_ISNULL(dep_value = OB_NEWx(ObJsonObject, allocator_, allocator_))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("alloc nested_value failed.", K(ret)); + } else if (OB_FAIL(dep_key->add(ObJsonSchemaItem::DEPENDENTSCHEMAS, dep_value, true, false, false))) { + LOG_WARN("fail to add nested node.", K(ret)); + } else { + int key_size = json_schema->element_count(); + ObJsonNode* value = nullptr; + for (int i = 0; OB_SUCC(ret) && i < key_size; ++i) { + ObString key; + ObJsonObject* origin_schema = nullptr; + ObJsonArray* sub_dep_array = nullptr; + if (OB_FAIL(json_schema->get_value_by_idx(i, key, value))) { + LOG_WARN("fail to get key-value.", K(i), K(key), K(ret)); + } else if (value->json_type() != ObJsonNodeType::J_OBJECT) { + // value is not subschema, ignore in mysql, raise error in oracle + } else if (OB_FALSE_IT(origin_schema = static_cast(value))) { + } else if (OB_ISNULL(sub_dep_array = OB_NEWx(ObJsonArray, allocator_, allocator_))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("alloc comp_array failed.", K(i), K(ret)); + } else if (OB_FAIL(inner_build_schema_tree(origin_schema, true, sub_dep_array))) { + LOG_WARN("recursion failed. ", K(i), K(ret)); + } else if (OB_FAIL(dep_value->add(key, sub_dep_array, true, false, false))) { + LOG_WARN("json schema stk move to key failed", K(i), K(key), K(ret)); + } + } + + if (OB_FAIL(ret)) { + } else if (OB_FAIL(comp_array->append(dep_key))) { + LOG_WARN("fail to add nested dependencies key.", K(ret)); + } + } + return ret; +} + +int ObJsonSchemaTree::handle_properties(ObJsonObject*& json_schema, bool is_composition, + ObJsonArray* comp_array, ObIArray& pro_array) +{ + INIT_SUCC(ret); + if (OB_FAIL(all_move_to_key(ObJsonSchemaItem::PROPERTIES, json_schema))) { + LOG_WARN("fail to move to properties.", K(ret)); + } else if (OB_FAIL(ObJsonSchemaUtils::record_schema_array(cur_schema_stk_, pro_array))) { + LOG_WARN("fail to record properties.", K(ret)); + } else { + int key_size = json_schema->element_count(); + ObString key; + ObJsonNode* value = nullptr; + for (int i = 0; OB_SUCC(ret) && i < key_size; ++i) { + ObJsonObject* origin_schema = nullptr; + if (OB_FAIL(json_schema->get_value_by_idx(i, key, value))) { + LOG_WARN("fail to get key-value.", K(i), K(ret)); + } else if (value->json_type() != ObJsonNodeType::J_OBJECT) { + // value is not subschema, ignore in mysql, raise error in oracle + } else if (OB_FALSE_IT(origin_schema = static_cast(value))) { + } else if (OB_FAIL(json_schema_move_to_key(key))) { + LOG_WARN("json schema stk move to key failed.", K(i), K(key), K(ret)); + } else if (OB_FAIL(inner_build_schema_tree(origin_schema, is_composition, comp_array))) { + LOG_WARN("recursion failed. ", K(i), K(ret)); + } else if (OB_FAIL(json_schema_back_to_parent())) { + LOG_WARN("fail to back. ", K(i), K(ret)); + } + } + + if (OB_FAIL(ret)) { + } else if (OB_FAIL(json_schema_back_to_parent())) { + LOG_WARN("fail to back.", K(ret)); + } + } + return ret; +} + +int ObJsonSchemaTree::handle_pattern_properties(ObJsonObject* json_schema, ObJsonObject* pro_schema, bool is_composition, + ObJsonArray* comp_array, const ObIArray& pro_array) +{ + INIT_SUCC(ret); + if (OB_FAIL(all_move_to_key(ObJsonSchemaItem::PATTERN_PRO, json_schema))) { + LOG_WARN("fail to move to properties.", K(ret)); + } else { + int key_size = json_schema->element_count(); + ObString key; + ObJsonNode* value = nullptr; + for (int i = 0; OB_SUCC(ret) && i < key_size && i >= 0; ++i) { + ObJsonObject* origin_schema = nullptr; + int origin_schema_stk_size = cur_schema_stk_.size(); + bool valid_pattern = false; + if (OB_FAIL(json_schema->get_value_by_idx(i, key, value))) { + LOG_WARN("fail to get key-value.", K(i), K(ret)); + } else if (value->json_type() != ObJsonNodeType::J_OBJECT) { + // value is not subschema, ignore in mysql, raise error in oracle + } else if (OB_FAIL(ObJsonSchemaUtils::is_valid_pattern(key, str_buf_, valid_pattern))) { + } else if (!valid_pattern) { + if (OB_FAIL(json_schema->remove(key))) { + LOG_WARN("fail to remove illegal pattern.", K(i), K(key), K(ret)); + } else { + --key_size; + --i; + } + } else if (OB_FALSE_IT(origin_schema = static_cast(value))) { + } else if (OB_FAIL(json_schema_move_to_key(key))) { + LOG_WARN("json schema stk move to key failed.", K(i), K(key), K(ret)); + } else if (OB_NOT_NULL(pro_schema) && pro_array.count() > 0 + && OB_FAIL(add_pattern_pro_to_schema(pro_schema, pro_array, key))) { + LOG_WARN("fail to add patter properties.", K(i), K(ret)); + } else if (OB_FAIL(inner_build_schema_tree(origin_schema, is_composition, comp_array))) { + LOG_WARN("recursion failed.", K(i), K(ret)); + } else { + while (origin_schema_stk_size < cur_schema_stk_.size()) { + cur_schema_stk_.pop(); + } + if (OB_FAIL(json_schema_back_to_parent())) { + LOG_WARN("fail to back. ", K(ret)); + } + } + } + + if (OB_FAIL(ret)) { + } else if (OB_FAIL(json_schema_back_to_parent())) { + LOG_WARN("fail to back.", K(ret)); + } + } + return ret; +} + +int ObJsonSchemaTree::handle_additional_properties(ObJsonSubSchemaKeywords& key_words, ObJsonObject* json_schema, + bool is_composition, ObJsonArray* comp_array) +{ + INIT_SUCC(ret); + ObJsonArray* pro_array = nullptr; + if (OB_FAIL(get_addition_pro_value(key_words, json_schema, pro_array, false))) { + LOG_WARN("fail to get add_pro_value.", K(ret)); + } else if (OB_FAIL(ObJsonSchemaUtils::json_doc_move_to_key(ObJsonSchemaItem::ADDITIONAL_PRO, json_schema))) { + LOG_WARN("json doc move to key failed", K(ret)); + } else if (OB_FAIL(json_schema_move_to_array(ObJsonSchemaItem::ADDITIONAL_PRO, pro_array))) { + LOG_WARN("json schema stk move to key failed", K(ret)); + } else if (OB_FAIL(inner_build_schema_tree(json_schema, is_composition, comp_array))) { + LOG_WARN("recursion failed.", K(ret)); + } else if (OB_FAIL(json_schema_back_to_grandpa())) { + LOG_WARN("fail to back.", K(ret)); + } + return ret; +} + +int ObJsonSchemaTree::handle_array_schema(ObJsonObject* json_schema, bool is_composition, + ObJsonArray*comp_array, bool is_additonal) +{ + INIT_SUCC(ret); + ObString key_word = is_additonal ? ObJsonSchemaItem::ADDITIONAL_ITEMS : ObJsonSchemaItem::ITEMS; + if (!is_additonal) { + if (OB_FAIL(all_move_to_key(key_word, json_schema))) { + LOG_WARN("fail to move to properties.", K(ret)); + } else if (OB_FAIL(inner_build_schema_tree(json_schema, is_composition, comp_array))) { + LOG_WARN("recursion failed.", K(ret)); + } else if (OB_FAIL(json_schema_back_to_parent())) { + LOG_WARN("fail to back.", K(ret)); + } + } else { + ObJsonArray* array_schema = nullptr; + ObJsonBuffer buf(allocator_); + if (OB_FAIL(ObJsonSchemaUtils::json_doc_move_to_array(ObJsonSchemaItem::ITEMS, json_schema, array_schema))) { + LOG_WARN("json doc move to key failed", K(ret)); + } else if (OB_FAIL(ObJsonSchemaUtils::json_doc_move_to_key(key_word, json_schema))) { + LOG_WARN("json doc move to key failed", K(key_word), K(ret)); + } else if (OB_FAIL(json_schema_move_to_key(key_word))) { + LOG_WARN("json schema stk move to key failed", K(ret)); + } + if (OB_FAIL(ret)) { + } else if (OB_ISNULL(array_schema)) { + ret = OB_BAD_NULL_ERROR; + LOG_WARN("shouldn't be null.", K(ret)); + } else { + int size = array_schema->element_count(); + buf.reset(); + if (OB_FAIL(ObJsonSchemaUtils::get_index_str(size, buf))) { + LOG_WARN("fail to get key", K(buf), K(ret)); + } else if (OB_FAIL(json_schema_move_to_key(ObString(buf.length(), buf.ptr())))) { + LOG_WARN("json schema stk move to key failed.", K(buf), K(ret)); + } else if (OB_FAIL(inner_build_schema_tree(json_schema, is_composition, comp_array))) { + LOG_WARN("recursion failed. ", K(ret)); + } else if (OB_FAIL(json_schema_back_to_parent())) { + LOG_WARN("fail to back. ", K(ret)); + } + } + + if (OB_FAIL(ret)) { + } else if (OB_FAIL(json_schema_back_to_parent())) { + LOG_WARN("fail to back.", K(ret)); + } + } + return ret; +} + +int ObJsonSchemaTree::handle_array_tuple_schema(ObJsonObject* json_schema, bool is_composition, ObJsonArray*comp_array) +{ + INIT_SUCC(ret); + ObJsonArray* array_schema = nullptr; + ObJsonBuffer buf(allocator_); + if (OB_FAIL(ObJsonSchemaUtils::json_doc_move_to_array(ObJsonSchemaItem::ITEMS, json_schema, array_schema))) { + LOG_WARN("json doc move to key failed", K(ret)); + } else if (OB_FAIL(json_schema_move_to_key(ObJsonSchemaItem::TUPLE_ITEMS))) { + LOG_WARN("json schema stk move to key failed", K(ret)); + } + if (OB_FAIL(ret)) { + } else if (OB_ISNULL(array_schema)) { + ret = OB_BAD_NULL_ERROR; + LOG_WARN("shouldn't be null.", K(ret)); + } else { + int size = array_schema->element_count(); + for (int i = 0; OB_SUCC(ret) && i < size; ++i) { + ObJsonObject* origin_schema = nullptr; + ObJsonNode* value = (*array_schema)[i]; + buf.reset(); + if (value->json_type() != ObJsonNodeType::J_OBJECT) { + ret = OB_ERR_TYPE_OF_JSON_SCHEMA; + LOG_WARN("json schema must be object", K(ret), K(i), K(value->json_type())); + } else if (OB_FALSE_IT(origin_schema = static_cast(value))) { + } else if (OB_FAIL(ObJsonSchemaUtils::get_index_str(i, buf))) { + LOG_WARN("fail to get key", K(i), K(buf), K(ret)); + } else if (OB_FAIL(json_schema_move_to_key(ObString(buf.length(), buf.ptr())))) { + LOG_WARN("json schema stk move to key failed.", K(i), K(buf), K(ret)); + } else if (OB_FAIL(inner_build_schema_tree(origin_schema, is_composition, comp_array))) { + LOG_WARN("recursion failed. ", K(i), K(ret)); + } else if (OB_FAIL(json_schema_back_to_parent())) { + LOG_WARN("fail to back. ", K(i), K(ret)); + } + } + } + + if (OB_FAIL(ret)) { + } else if (OB_FAIL(json_schema_back_to_parent())) { + LOG_WARN("fail to back.", K(ret)); + } + return ret; +} + +// if is unnested compisiton, add Composition key word and its value +int ObJsonSchemaTree::handle_unnested_composition(const ObString& key_word, ObJsonObject* json_schema) +{ + INIT_SUCC(ret); + ObJsonArray* array_schema = nullptr; + if (OB_FAIL(ObJsonSchemaUtils::json_doc_move_to_array(key_word, json_schema, array_schema))) { + LOG_WARN("json doc move to key failed", K(key_word), K(ret)); + } else if (OB_ISNULL(array_schema)) { + ret = OB_BAD_NULL_ERROR; + LOG_WARN("shouldn't be null.", K(ret)); + } else { + int size = array_schema->element_count(); + for (int i = 0; OB_SUCC(ret) && i < size; ++i) { + ObJsonObject* origin_schema = nullptr; + ObJsonArray* comp_array = nullptr; + ObJsonNode* value = (*array_schema)[i]; + ObString idx_str; + if (value->json_type() != ObJsonNodeType::J_OBJECT) { + ret = OB_ERR_TYPE_OF_JSON_SCHEMA; + LOG_WARN("json schema must be object", K(ret), K(i), K(value->json_type())); + } else if (OB_FALSE_IT(origin_schema = static_cast(value))) { + } else if (OB_ISNULL(comp_array = OB_NEWx(ObJsonArray, allocator_, allocator_))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("alloc comp_array failed.", K(i), K(ret)); + } else if (OB_FAIL(inner_build_schema_tree(origin_schema, true, comp_array))) { + LOG_WARN("recursion failed.", K(i), K(ret)); + } else if (OB_FAIL(json_schema_add_comp_value(key_word, comp_array))) { + LOG_WARN("json schema stk move to key failed", K(i), K(key_word), K(ret)); + } + } + } + return ret; +} + +int ObJsonSchemaTree::handle_unnested_not(ObJsonObject* json_schema) +{ + INIT_SUCC(ret); + if (OB_FAIL(ObJsonSchemaUtils::json_doc_move_to_key(ObJsonSchemaItem::NOT, json_schema))) { + LOG_WARN("fail to move to properties.", K(ret)); + } else { + ObJsonArray* comp_array = nullptr; + if (OB_ISNULL(comp_array = OB_NEWx(ObJsonArray, allocator_, allocator_))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("alloc comp_array failed.", K(ret)); + } else if (OB_FAIL(inner_build_schema_tree(json_schema, true, comp_array))) { + LOG_WARN("recursion failed.", K(ret)); + } else { + int size = cur_schema_stk_.size(); + for (int i = 0; i < size && OB_SUCC(ret); ++i) { + ObJsonObject* cur_schema = cur_schema_stk_.at(i); + ObJsonNode* tmp_node = nullptr; + bool update_old_key = false; + if (OB_ISNULL(tmp_node = cur_schema->get_value(ObJsonSchemaItem::NOT))) { + if (OB_FAIL(cur_schema->add(ObJsonSchemaItem::NOT, comp_array, true, false, false))) { + LOG_WARN("fail to add composition node.", K(i), K(ret)); + } + } else if (tmp_node->json_type() != ObJsonNodeType::J_ARRAY) { + ret = OB_ERR_WRONG_VALUE; + LOG_WARN("must be object type.", K(ret), K(i)); + } else { + int arr_size = comp_array->element_count(); + ObJsonArray* old_arr = static_cast(tmp_node); + for (int i = 0; i < arr_size; ++i) { + if (OB_FAIL(old_arr->append((*comp_array)[i]))) { + LOG_WARN("fail to append.", K(ret), K(i)); + } + } + } + } + } + } + return ret; +} + +int ObJsonSchemaTree::handle_nested_composition(const ObString& key_word, ObJsonObject* json_schema, ObJsonArray* comp_array) +{ + INIT_SUCC(ret); + ObJsonArray* array_schema = nullptr; + ObJsonObject* nested_key = nullptr; + ObJsonArray* nested_value = nullptr; + if (OB_FAIL(ObJsonSchemaUtils::json_doc_move_to_array(key_word, json_schema, array_schema))) { + LOG_WARN("json doc move to key failed", K(ret)); + } else if (OB_ISNULL(array_schema)) { + ret = OB_BAD_NULL_ERROR; + LOG_WARN("shouldn't be null.", K(ret)); + } else if (OB_ISNULL(nested_key = OB_NEWx(ObJsonObject, allocator_, allocator_))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("alloc nested_key failed.", K(ret)); + } else if (OB_ISNULL(nested_value = OB_NEWx(ObJsonArray, allocator_, allocator_))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("alloc nested_value failed.", K(ret)); + } else if (OB_FAIL(nested_key->add(key_word, nested_value, true, false, false))) { + LOG_WARN("fail to add nested node.", K(key_word), K(ret)); + } else { + int size = array_schema->element_count(); + for (int i = 0; OB_SUCC(ret) && i < size; ++i) { + ObJsonObject* origin_schema = nullptr; + ObJsonArray* sub_comp_array = nullptr; + ObJsonNode* value = (*array_schema)[i]; + ObString idx_str; + if (value->json_type() != ObJsonNodeType::J_OBJECT) { + ret = OB_ERR_TYPE_OF_JSON_SCHEMA; + LOG_WARN("json schema must be object", K(ret), K(i), K(value->json_type())); + } else if (OB_FALSE_IT(origin_schema = static_cast(value))) { + } else if (OB_ISNULL(sub_comp_array = OB_NEWx(ObJsonArray, allocator_, allocator_))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("alloc comp_array failed.", K(i), K(ret)); + } else if (OB_FAIL(inner_build_schema_tree(origin_schema, true, sub_comp_array))) { + LOG_WARN("recursion failed. ", K(i), K(ret)); + } else if (OB_FAIL(nested_value->append(sub_comp_array))) { + LOG_WARN("json schema stk move to key failed", K(i), K(ret)); + } + } + if (OB_FAIL(ret)) { + } else if (OB_FAIL(comp_array->append(nested_key))) { + LOG_WARN("fail to add nested key.", K(ret)); + } + } + return ret; +} + +int ObJsonSchemaTree::handle_nested_not(ObJsonObject* json_schema, ObJsonArray* comp_array) +{ + INIT_SUCC(ret); + ObJsonArray* array_schema = nullptr; + ObJsonObject* nested_key = nullptr; + ObJsonArray* nested_value = nullptr; + if (OB_FAIL(ObJsonSchemaUtils::json_doc_move_to_key(ObJsonSchemaItem::NOT, json_schema))) { + LOG_WARN("fail to move to properties.", K(ret)); + } else if (OB_ISNULL(nested_key = OB_NEWx(ObJsonObject, allocator_, allocator_))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("alloc nested_key failed.", K(ret)); + } else if (OB_ISNULL(nested_value = OB_NEWx(ObJsonArray, allocator_, allocator_))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("alloc nested_value failed.", K(ret)); + } else if (OB_FAIL(nested_key->add(ObJsonSchemaItem::NOT, nested_value, true, false, false))) { + LOG_WARN("fail to add nested node.", K(ret)); + } else if (OB_FAIL(inner_build_schema_tree(json_schema, true, nested_value))) { + LOG_WARN("recursion failed. ", K(ret)); + } else if (OB_FAIL(comp_array->append(nested_key))) { + LOG_WARN("fail to add nested key.", K(ret)); + } + return ret; +} + +int ObJsonSchemaTree::all_move_to_key(const ObString& key, ObJsonObject*& json_schema) +{ + INIT_SUCC(ret); + if (OB_FAIL(ObJsonSchemaUtils::json_doc_move_to_key(key, json_schema))) { + LOG_WARN("json doc move to key failed", K(key), K(ret)); + } else if (OB_FAIL(json_schema_move_to_key(key))) { + LOG_WARN("json schema stk move to key failed", K(key), K(ret)); + } + return ret; +} + +int ObJsonSchemaTree::json_schema_move_to_key(const ObString& key) +{ + INIT_SUCC(ret); + int size = cur_schema_stk_.size(); + for (int i = 0; i < size && OB_SUCC(ret); ++i) { + ObJsonObject* cur_schema = cur_schema_stk_.at(i); + ObJsonNode* tmp_node = nullptr; + if (OB_ISNULL(tmp_node = cur_schema->get_value(key))) { + ObJsonObject* key_value = nullptr; + if (OB_ISNULL(key_value = OB_NEWx(ObJsonObject, allocator_, allocator_))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("fail to alloc composition node.", K(key), K(i), K(size), K(ret)); + } else if (OB_FAIL(cur_schema->add(key, key_value, true, false, false))) { + LOG_WARN("fail to add composition node.", K(key), K(i), K(size), K(ret)); + } else { + cur_schema = key_value; + } + } else if (tmp_node->json_type() != ObJsonNodeType::J_OBJECT) { + ret = OB_ERR_WRONG_VALUE; + LOG_WARN("must be object type.", K(ret), K(size), K(i), K(key)); + } else { + cur_schema = static_cast(tmp_node); + } + + if (OB_FAIL(ret)) { + } else if (OB_FAIL(cur_schema_stk_.set(i, cur_schema))) { + LOG_WARN("fail to move.", K(ret), K(size), K(i), K(key)); + } + } + return ret; +} + +int ObJsonSchemaTree::json_schema_move_to_array(const ObString& key, ObJsonArray* array_val) +{ + INIT_SUCC(ret); + int size = cur_schema_stk_.size(); + for (int i = 0; i < size && OB_SUCC(ret); ++i) { + ObJsonObject* cur_schema = cur_schema_stk_.at(i); + ObJsonArray* key_value = nullptr; + ObJsonNode* tmp_node = nullptr; + ObJsonObject* object_val = nullptr; + if (OB_ISNULL(tmp_node = cur_schema->get_value(key))) { + if (OB_ISNULL(key_value = OB_NEWx(ObJsonArray, allocator_, allocator_))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("fail to alloc composition node.", K(key), K(i), K(size), K(ret)); + } else if (OB_FAIL(cur_schema->add(key, key_value, true, false, false))) { + LOG_WARN("fail to add composition node.", K(key), K(i), K(size), K(ret)); + } + } else if (tmp_node->json_type() != ObJsonNodeType::J_ARRAY) { + ret = OB_ERR_WRONG_VALUE; + LOG_WARN("must be object type.", K(ret), K(i), K(key)); + } else { + key_value = static_cast(tmp_node); + } + + if (OB_FAIL(ret)) { + } else if (OB_FAIL(key_value->append(array_val))) { + LOG_WARN("fail to append array val.", K(ret), K(i), K(key)); + } else if (OB_ISNULL(object_val = OB_NEWx(ObJsonObject, allocator_, allocator_))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("fail to alloc composition node.", K(key), K(i), K(size), K(ret)); + } else if (OB_FAIL(key_value->append(object_val))) { + LOG_WARN("fail to append obj val.", K(ret), K(key), K(i), K(size)); + } else if (OB_FAIL(cur_schema_stk_.set(i, object_val))) { + LOG_WARN("fail to move.", K(key), K(i), K(size), K(ret)); + } + } + return ret; +} + +int ObJsonSchemaTree::json_schema_add_comp_value(const ObString& key, ObJsonArray* new_array_val) +{ + INIT_SUCC(ret); + int size = cur_schema_stk_.size(); + for (int i = 0; i < size && OB_SUCC(ret); ++i) { + ObJsonObject* cur_schema = cur_schema_stk_.at(i); + ObJsonNode* tmp_node = nullptr; + ObJsonArray* comp_array = nullptr; + if (OB_ISNULL(tmp_node = cur_schema->get_value(key))) { + ObJsonArray* key_value = nullptr; + if (OB_ISNULL(key_value = OB_NEWx(ObJsonArray, allocator_, allocator_))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("fail to alloc composition node.", K(key), K(i), K(size), K(ret)); + } else if (OB_FAIL(cur_schema->add(key, key_value, true, false, false))) { + LOG_WARN("fail to add composition node.", K(key), K(i), K(size), K(ret)); + } else { + comp_array = key_value; + } + } else if (tmp_node->json_type() != ObJsonNodeType::J_ARRAY) { + ret = OB_ERR_WRONG_VALUE; + LOG_WARN("must be object type.", K(ret), K(i), K(key)); + } else { + comp_array = static_cast(tmp_node); + } + + if (OB_FAIL(ret)) { + } else if (OB_FAIL(comp_array->append(new_array_val))) { + LOG_WARN("fail to move.", K(ret), K(i), K(key)); + } + } + return ret; +} + +int ObJsonSchemaTree::json_schema_add_dep_value(ObJsonObject* dep_val) +{ + INIT_SUCC(ret); + int size = cur_schema_stk_.size(); + for (int i = 0; i < size && OB_SUCC(ret); ++i) { + ObJsonObject* cur_schema = cur_schema_stk_.at(i); + ObJsonNode* tmp_node = nullptr; + ObJsonObject* origin_dep = nullptr; + if (OB_ISNULL(tmp_node = cur_schema->get_value(ObJsonSchemaItem::DEPENDENTSCHEMAS))) { + if (OB_FAIL(cur_schema->add(ObJsonSchemaItem::DEPENDENTSCHEMAS, dep_val, true, false, false))) { + LOG_WARN("fail to add composition node.", K(i), K(size), K(ret)); + } + } else if (tmp_node->json_type() != ObJsonNodeType::J_OBJECT) { + ret = OB_ERR_WRONG_VALUE; + LOG_WARN("must be object type.", K(i), K(size), K(ret)); + } else { + origin_dep = static_cast(tmp_node); + int new_key_size = dep_val->element_count(); + for (int i = 0; i < new_key_size && OB_SUCC(ret); ++i) { + ObString key; + ObJsonNode* val = nullptr; + ObJsonNode* origin_val = nullptr; + ObJsonArray* arr_val; + if (OB_FAIL(dep_val->get_value_by_idx(i, key, val))) { + LOG_WARN("fail to get object.", K(i), K(size), K(ret)); + } else if (OB_ISNULL(origin_val = origin_dep->get_value(key))) { + if (OB_FAIL(origin_dep->add(key, val, true, false, false))) { + LOG_WARN("fail to add key-value.", K(ret), K(i), K(key), K(val)); + } + } else if (origin_val->json_type() != ObJsonNodeType::J_ARRAY) { + ret = OB_ERR_WRONG_VALUE; + LOG_WARN("must be array type.", K(ret), K(i)); + } else { + arr_val = static_cast(origin_val); + if (OB_FAIL(arr_val->append(val))) { + LOG_WARN("fail to add key-value.", K(ret), K(i), K(key), K(val)); + } + } + } + } + } + return ret; +} + +int ObJsonSchemaTree::json_schema_back_to_parent() +{ + INIT_SUCC(ret); + int size = cur_schema_stk_.size(); + for (int i = 0; i < size && OB_SUCC(ret); ++i) { + ObJsonObject* cur_schema = cur_schema_stk_.at(i); + ObJsonNode* parent = cur_schema->get_parent(); + if (OB_ISNULL(parent)) { + ret = OB_BAD_NULL_ERROR; + LOG_WARN("should have parent", K(ret), K(i)); + } else if (parent->json_type() != ObJsonNodeType::J_OBJECT) { + ret = OB_ERR_WRONG_VALUE; + LOG_WARN("must be object type.", K(ret), K(parent->json_type())); + } else if (OB_FAIL(cur_schema_stk_.set(i, static_cast(parent)))) { + LOG_WARN("fail to move.", K(ret), K(i)); + } + } + return ret; +} + +int ObJsonSchemaTree::json_schema_back_to_grandpa() +{ + INIT_SUCC(ret); + int size = cur_schema_stk_.size(); + for (int i = 0; i < size && OB_SUCC(ret); ++i) { + ObJsonObject* cur_schema = cur_schema_stk_.at(i); + ObJsonNode* parent = cur_schema->get_parent(); + ObJsonNode* grandpa = nullptr; + if (OB_ISNULL(parent)) { + ret = OB_BAD_NULL_ERROR; + LOG_WARN("should have parent", K(ret), K(i)); + } else if (OB_FALSE_IT(grandpa = parent->get_parent())) { + } else if (OB_ISNULL(grandpa)) { + ret = OB_BAD_NULL_ERROR; + LOG_WARN("should have parent", K(ret), K(i)); + } else if (grandpa->json_type() != ObJsonNodeType::J_OBJECT) { + ret = OB_ERR_WRONG_VALUE; + LOG_WARN("must be object type.", K(ret), K(parent->json_type())); + } else if (OB_FAIL(cur_schema_stk_.set(i, static_cast(grandpa)))) { + LOG_WARN("fail to move.", K(ret), K(i)); + } + } + return ret; +} + +// if a key is valid for pattern properties and properties at the same time, +// the key should check the schemas both defined on pattern properties and properties +int ObJsonSchemaTree::add_pattern_pro_to_schema(ObJsonObject* pro_schema, const ObIArray& pro_array, const ObString& pattern_text) +{ + INIT_SUCC(ret); + int arr_size = pro_array.count(); + int pro_schema_size = pro_schema->element_count(); + for (int i = 0; i < pro_schema_size && OB_SUCC(ret); ++i) { + ObString key; + bool regex_ans = false; + if (OB_FAIL(pro_schema->get_key_by_idx(i, key))) { + LOG_WARN("fail to get key-value.", K(i), K(key), K(ret)); + } else if (OB_FAIL(ObJsonSchemaUtils::if_regex_match(key, pattern_text, str_buf_, regex_ans)) || !regex_ans) { + // doesn't match, do nothing + } else { + // search match key, add its value to cur_schema_stk_ + for (int i = 0; i < arr_size && OB_SUCC(ret); ++i) { + ObJsonObject* json_schema = pro_array.at(i); + ObJsonNode* match_val = nullptr; + if (OB_ISNULL(match_val = json_schema->get_value(key))) { + ret = OB_BAD_NULL_ERROR; + LOG_WARN("shouldn't be null", K(ret), K(i)); + } else if (match_val->json_type() != ObJsonNodeType::J_OBJECT) { + ret = OB_ERR_WRONG_VALUE; + LOG_WARN("must be object type.", K(ret), K(match_val->json_type())); + } else if (OB_FAIL(cur_schema_stk_.push(static_cast(match_val)))) { + LOG_WARN("fail to push.", K(i), K(arr_size), K(ret)); + } + } + } + } + return ret; +} + +int ObJsonSchemaValidator::get_json_or_schema_point(ObJsonBuffer& json_pointer, bool is_schema_pointer) +{ + INIT_SUCC(ret); + ObString res = ObString::make_empty_string(); + ObStack* stack_ptr = nullptr; + if (is_schema_pointer) { + stack_ptr = &schema_pointer_; + } else { + stack_ptr = &json_pointer_; + } + int size = stack_ptr->size(); + for (int i = 0; i < size && OB_SUCC(ret); ++i) { + if (OB_FAIL(json_pointer.append(stack_ptr->at(i)))) { + LOG_WARN("fail to append.", K(i), K(json_pointer), K(ret)); + } else if (i + 1 < size && OB_FAIL(json_pointer.append("/"))) { + LOG_WARN("fail to append.", K(ret)); + } + } + return ret; +} + +int ObJsonSchemaValidator::schema_validator(ObIJsonBase *json_doc, bool& is_valid) +{ + INIT_SUCC(ret); + ObIJsonBase *json_schema = nullptr; + ObArray schema_vec; + schema_vec.set_block_size(SCHEMA_DEFAULT_PAGE_SIZE); + ObArray ans_map; + ans_map.set_block_size(SCHEMA_DEFAULT_PAGE_SIZE); + is_valid = true; + + if (OB_ISNULL(schema_map_) || OB_ISNULL(json_doc)) { + ret = OB_BAD_NULL_ERROR; + LOG_WARN("shouldn't be null.", KPC(schema_map_), KPC(json_doc), K(ret)); + } else if (schema_map_->json_type() != ObJsonNodeType::J_ARRAY || schema_map_->element_count() <= 1) { + ret = OB_ERR_WRONG_VALUE; + LOG_WARN("must be array.", K(ret), K(schema_map_->json_type()), K(schema_map_->element_count())); + } else if (OB_FAIL(schema_map_->get_array_element(0, json_schema))) { + LOG_WARN("fail to get json schema.", K(ret)); + } else if (OB_ISNULL(json_schema) || json_schema->json_type() != ObJsonNodeType::J_OBJECT) { + ret = OB_BAD_NULL_ERROR; + LOG_WARN("shouldn't be null or other type.", KPC(json_schema), K(ret)); + } else if (OB_FAIL(json_pointer_.push(ObJsonSchemaItem::ROOT))) { + LOG_WARN("fail to push root.", K(ret)); + } else if (OB_FAIL(schema_pointer_.push(ObJsonSchemaItem::ROOT))) { + LOG_WARN("fail to push root.", K(ret)); + } else if (OB_FAIL(schema_vec.push_back(json_schema))) { + LOG_WARN("fail to push.", K(ret)); + } else { + int size = schema_map_->element_count(); + // schema validation only seek, do not need reserve parent stack + if (json_doc->is_bin()) { + ObJsonBin* j_bin = static_cast(json_doc); + j_bin->set_seek_flag(true); + } + for (int i = 0; i < size && OB_SUCC(ret); ++i) { + ans_map.push_back(ObJsonSchemaAns::JS_NOT_CHCECKED); + } + if (OB_FAIL(ret)) { + } else if (OB_FAIL(inner_schema_validator(json_doc, schema_vec, ans_map, is_valid))) { + LOG_WARN("fail to validate.", K(ret)); + } + } + return ret; +} + +int ObJsonSchemaValidator::get_ans_by_id(ObIJsonBase* j_id, ObIArray &ans_map, bool& ans) +{ + INIT_SUCC(ret); + int size = ans_map.count(); + int id = j_id->get_int(); + if (id < size && id > 1) { + if (ans_map.at(id) == ObJsonSchemaAns::JS_FALSE) { + ans = false; + } else { + ans = true; + } + } else { + ret = OB_INVALID_INDEX; + } + return ret; +} + +int ObJsonSchemaValidator::inner_schema_validator(ObIJsonBase *json_doc, ObIArray &schema_vec, ObIArray &ans_map, bool& is_valid) +{ + INIT_SUCC(ret); + ObJsonNodeType json_type = ObJsonNodeType::J_ERROR; + bool need_recursive = false; + if (!is_valid) { + } else if (OB_ISNULL(json_doc) || schema_vec.count() < 1) { + ret = OB_BAD_NULL_ERROR; + LOG_WARN("shouldn't be null.", KPC(json_doc), K(ret), K(schema_vec.count())); + } else if (OB_FAIL(check_all_schema_def(json_doc, schema_vec, is_valid))) { + LOG_WARN("fail in check schema.", K(ret)); + } else if (!is_valid) { + } else if (OB_FAIL(check_all_composition_def(json_doc, schema_vec, ans_map))) { // check and fill composition ans + LOG_WARN("fail in check composition.", K(ret)); + } else if (OB_FALSE_IT(json_type = json_doc->json_type())) { + } else if (json_type == ObJsonNodeType::J_OBJECT) { // recursion + if (OB_FAIL(ObJsonSchemaUtils::need_check_recursive(schema_vec, need_recursive, false))) { + LOG_WARN("fail to check recursive keywords.", K(ret)); + } else if (!need_recursive) { + // didn't define recursive keywords + } else if (OB_FAIL(schema_pointer_.push(ObJsonSchemaItem::PROPERTIES))) { + LOG_WARN("fail to push schema pointer.", K(ret)); + } else if (OB_FAIL(object_recursive_validator(json_doc, schema_vec, ans_map, is_valid))) { + LOG_WARN("fail in check object child.", K(ret)); + } else if (failed_keyword_.empty()) { + schema_pointer_.pop(); + } + } else if (json_type == ObJsonNodeType::J_ARRAY) { + if (OB_FAIL(ObJsonSchemaUtils::need_check_recursive(schema_vec, need_recursive, true))) { + LOG_WARN("fail to check recursive keywords.", K(ret)); + } else if (!need_recursive) { + // didn't define recursive keywords + } else if (OB_FAIL(schema_pointer_.push(ObJsonSchemaItem::ITEMS))) { + LOG_WARN("fail to push schema pointer.", K(ret)); + } else if (OB_FAIL(array_recursive_validator(json_doc, schema_vec, ans_map, is_valid))) { + LOG_WARN("fail in check array child.", K(ret)); + } else if (failed_keyword_.empty()) { + schema_pointer_.pop(); + } + } // else: is scalar, end of validation + + // get composition ans + if (OB_FAIL(ret) || !is_valid) { + } else if (OB_FAIL(get_schema_composition_ans(json_doc, schema_vec, ans_map, is_valid))) { + LOG_WARN("fail to get_schema_composition_ans.", K(ret)); + } + return ret; +} + +int ObJsonSchemaValidator::get_schema_composition_ans(ObIJsonBase *json_doc, ObIArray &schema_vec, ObIArray &ans_map, bool& is_valid) +{ + INIT_SUCC(ret); + // check dep_schema, if is object + if (!is_valid) { + } else if (OB_FAIL(composition_ans_recorded_ + && get_vec_schema_composition_ans(json_doc, ObJsonSchemaComp::JS_COMP_DEP, ObJsonSchemaItem::DEPENDENTSCHEMAS, schema_vec, ans_map, is_valid))) { + LOG_WARN("fail to check comp.", K(ret)); + } else if (!is_valid) { + failed_keyword_ = ObJsonSchemaItem::DEPENDENCIES; + } else if (composition_ans_recorded_ + && OB_FAIL(get_vec_schema_composition_ans(json_doc, ObJsonSchemaComp::JS_COMP_ALLOF, ObJsonSchemaItem::ALLOF, schema_vec, ans_map, is_valid))) { + LOG_WARN("fail to check comp.", K(ret)); + } else if (!is_valid) { + failed_keyword_ = ObJsonSchemaItem::ALLOF; + } else if (composition_ans_recorded_ + && OB_FAIL(get_vec_schema_composition_ans(json_doc, ObJsonSchemaComp::JS_COMP_ANYOF, ObJsonSchemaItem::ANYOF, schema_vec, ans_map, is_valid))) { + LOG_WARN("fail to check comp.", K(ret)); + } else if (!is_valid) { + failed_keyword_ = ObJsonSchemaItem::ANYOF; + } else if (OB_FAIL(composition_ans_recorded_ + && get_vec_schema_composition_ans(json_doc, ObJsonSchemaComp::JS_COMP_ONEOF, ObJsonSchemaItem::ONEOF, schema_vec, ans_map, is_valid))) { + LOG_WARN("fail to check comp.", K(ret)); + } else if (!is_valid) { + failed_keyword_ = ObJsonSchemaItem::ONEOF; + } else if (OB_FAIL(get_vec_schema_composition_ans(json_doc, ObJsonSchemaComp::JS_COMP_NOT, ObJsonSchemaItem::NOT, schema_vec, ans_map, is_valid))) { + LOG_WARN("fail to check comp.", K(ret)); + } else if (!is_valid) { + failed_keyword_ = ObJsonSchemaItem::NOT; + } + return ret; +} + +int ObJsonSchemaValidator::get_vec_schema_composition_ans(ObIJsonBase *json_doc, ObJsonSchemaComp comp_idx, const ObString& key, ObIArray &schema_vec, ObIArray &ans_map, bool& is_valid) +{ + INIT_SUCC(ret); + // check dep_schema, if is object + if (!is_valid) { + } else { + int dep_size = 0; + int vec_size = schema_vec.count(); + for (int i = 0; i < vec_size && OB_SUCC(ret) && is_valid; ++i) { + ObIJsonBase *tmp_schema = schema_vec.at(i); + ObIJsonBase *dep_schema = nullptr; + if (OB_FAIL(tmp_schema->get_object_value(key, dep_schema))) { + if (ret == OB_SEARCH_NOT_FOUND) { // didn't define property, its normal + ret = OB_SUCCESS; + } + } else if (OB_ISNULL(dep_schema)) { + } else if (dep_schema->element_count() == 0) { + if (comp_idx == ObJsonSchemaComp::JS_COMP_NOT) { // not: default result is false + is_valid = false; + } + } else { + switch (comp_idx) { + case ObJsonSchemaComp::JS_COMP_DEP : { + if (OB_FAIL(check_dep_schema(json_doc, dep_schema, ans_map, is_valid))) { + LOG_WARN("fail to check dep.", K(ret)); + } + break; + } + case ObJsonSchemaComp::JS_COMP_ALLOF : { + if (OB_FAIL(check_allof_schema(json_doc, dep_schema, ans_map, is_valid))) { + LOG_WARN("fail to check allof.", K(ret)); + } + break; + } + case ObJsonSchemaComp::JS_COMP_ONEOF : { + if (OB_FAIL(check_oneof_schema(json_doc, dep_schema, ans_map, is_valid))) { + LOG_WARN("fail to check oneof.", K(ret)); + } + break; + } + case ObJsonSchemaComp::JS_COMP_ANYOF : { + if (OB_FAIL(check_anyof_schema(json_doc, dep_schema, ans_map, is_valid))) { + LOG_WARN("fail to check anyof.", K(ret)); + } + break; + } + case ObJsonSchemaComp::JS_COMP_NOT : { + if (OB_FAIL(check_not_schema(json_doc, dep_schema, ans_map, is_valid))) { + LOG_WARN("fail to check not.", K(ret)); + } + break; + } + default: { + ret = OB_ERR_UNEXPECTED; + break; + } + } // end switch + } // need to check comp + } + } + return ret; +} + +int ObJsonSchemaValidator::check_dep_schema(ObIJsonBase *json_doc, ObIJsonBase* dep_schema, ObIArray &ans_map, bool& is_valid) +{ + INIT_SUCC(ret); + if (dep_schema->json_type() != ObJsonNodeType::J_OBJECT) { + ret = OB_ERR_WRONG_VALUE; + LOG_WARN("must be object.", K(ret)); + } else { + int size = dep_schema->element_count(); + for (int i = 0; i < size && OB_SUCC(ret) && is_valid; ++i) { + ObIJsonBase *tmp_schema = nullptr; + int tmp_schema_size = 0; + ObString dep_key; + ObIJsonBase *dep_value = nullptr; + if (OB_FAIL(dep_schema->get_object_value(i, dep_key, tmp_schema))) { + LOG_WARN("fail to get schema array.", K(i), K(ret)); + } else if (OB_ISNULL(tmp_schema)) { + ret = OB_BAD_NULL_ERROR; + } else if (OB_FAIL(json_doc->get_object_value(dep_key, dep_value))) { + if (ret == OB_SEARCH_NOT_FOUND) { + ret = OB_SUCCESS; // didn't found, do not need check + } + } else if (tmp_schema->json_type() != ObJsonNodeType::J_ARRAY) { + ret = OB_ERR_WRONG_VALUE; + LOG_WARN("must be array.", K(i), K(ret)); + } else if (OB_FALSE_IT(tmp_schema_size = tmp_schema->element_count())) { + } else if (tmp_schema_size == 0) { + } else if (OB_FAIL(check_single_comp_array(json_doc, tmp_schema, ans_map, is_valid))) { + LOG_WARN("fail to check schema array.", K(i), K(ret)); + } else if (!is_valid) { + failed_keyword_ = ObJsonSchemaItem::DEPENDENCIES; + } + } + } + return ret; +} + +int ObJsonSchemaValidator::get_single_element_and_check(ObIJsonBase *json_doc, const int& idx, ObIJsonBase* dep_schema, ObIArray &ans_map, bool& is_valid) +{ + INIT_SUCC(ret); + ObIJsonBase *tmp_schema = nullptr; + int tmp_schema_size = 0; + if (OB_FAIL(dep_schema->get_array_element(idx, tmp_schema))) { + LOG_WARN("fail to get schema array.", K(ret)); + } else if (OB_ISNULL(tmp_schema)) { + ret = OB_BAD_NULL_ERROR; + } else if (tmp_schema->json_type() != ObJsonNodeType::J_ARRAY) { + ret = OB_ERR_WRONG_VALUE; + LOG_WARN("must be array.", K(tmp_schema->json_type()), K(ret)); + } else if (OB_FALSE_IT(tmp_schema_size = tmp_schema->element_count())) { + } else if (tmp_schema_size == 0) { + } else if (OB_FAIL(check_single_comp_array(json_doc, tmp_schema, ans_map, is_valid))) { + LOG_WARN("fail to chekc schema array.", K(ret)); + } + return ret; +} + + +int ObJsonSchemaValidator::check_allof_schema(ObIJsonBase *json_doc, ObIJsonBase* allof_schema, ObIArray &ans_map, bool& is_valid) +{ + INIT_SUCC(ret); + if (allof_schema->json_type() != ObJsonNodeType::J_ARRAY) { + ret = OB_ERR_WRONG_VALUE; + LOG_WARN("must be array.", K(ret)); + } else { + int size = allof_schema->element_count(); + for (int i = 0; i < size && OB_SUCC(ret) && is_valid; ++i) { + if (OB_FAIL(get_single_element_and_check(json_doc, i, allof_schema, ans_map, is_valid))) { + LOG_WARN("fail to check.", K(i), K(ret)); + } else if (!is_valid) { + failed_keyword_ = ObJsonSchemaItem::ALLOF; + } + } + } + return ret; +} + +int ObJsonSchemaValidator::check_anyof_schema(ObIJsonBase *json_doc, ObIJsonBase* anyof_schema, ObIArray &ans_map, bool& is_valid) +{ + INIT_SUCC(ret); + if (anyof_schema->json_type() != ObJsonNodeType::J_ARRAY) { + ret = OB_ERR_WRONG_VALUE; + LOG_WARN("must be array.", K(anyof_schema->json_type()), K(ret)); + } else { + int size = anyof_schema->element_count(); + bool anyof = true; + bool end_seek = false; + for (int i = 0; i < size && OB_SUCC(ret) && !end_seek; ++i) { + anyof = true; + if (OB_FAIL(get_single_element_and_check(json_doc, i, anyof_schema, ans_map, anyof))) { + LOG_WARN("fail to check.", K(i), K(ret)); + } else if (anyof) { + end_seek = true; + } + } + is_valid = anyof; + if (OB_FAIL(ret)) { + } else if (!is_valid) { + failed_keyword_ = ObJsonSchemaItem::ANYOF; + } + } + return ret; +} + +int ObJsonSchemaValidator::check_oneof_schema(ObIJsonBase *json_doc, ObIJsonBase* oneof_schema, ObIArray &ans_map, bool& is_valid) +{ + INIT_SUCC(ret); + if (oneof_schema->json_type() != ObJsonNodeType::J_ARRAY) { + ret = OB_ERR_WRONG_VALUE; + LOG_WARN("must be array.", K(ret)); + } else { + int size = oneof_schema->element_count(); + bool oneof = false; + bool end_seek = false; + for (int i = 0; i < size && OB_SUCC(ret) && !end_seek; ++i) { + bool tmp_ans = true; + if (OB_FAIL(get_single_element_and_check(json_doc, i, oneof_schema, ans_map, tmp_ans))) { + LOG_WARN("fail to check.", K(i), K(ret)); + } else if (tmp_ans) { + if (oneof) { + is_valid = false; + failed_keyword_ = ObJsonSchemaItem::ONEOF; + end_seek = true; + } else { + oneof = true; // already found one + } + } + } + if (OB_SUCC(ret) && is_valid) { + is_valid = oneof; // prevent the situation where all conditions are false + } + } + return ret; +} + +int ObJsonSchemaValidator::check_not_schema(ObIJsonBase *json_doc, ObIJsonBase* not_schema, ObIArray &ans_map, bool& is_valid) +{ + INIT_SUCC(ret); + bool tmp_ans = true; + if (not_schema->json_type() != ObJsonNodeType::J_ARRAY) { + ret = OB_ERR_WRONG_VALUE; + LOG_WARN("must be array.", K(not_schema->json_type()), K(ret)); + } else if (OB_FAIL(check_single_comp_array(json_doc, not_schema, ans_map, tmp_ans))) { + LOG_WARN("fail to chekc schema array.", K(ret)); + } else { + is_valid = !tmp_ans; + } + return ret; +} + +int ObJsonSchemaValidator::check_single_composition_schema(ObIJsonBase *json_doc, ObIJsonBase* single_schema, ObIArray &ans_map, bool& is_valid) +{ + INIT_SUCC(ret); + if (single_schema->element_count() != 1) { + ret = OB_ERR_WRONG_VALUE; + LOG_WARN("must be one element.", K(single_schema->element_count()), K(ret)); + } else { + ObString key; + ObIJsonBase* ele = nullptr; + if (OB_FAIL(single_schema->get_object_value(0, key, ele))) { + LOG_WARN("fail to get value.", K(ret)); + } else if (OB_ISNULL(ele)) { + ret = OB_BAD_NULL_ERROR; + } else if (ele->element_count() == 0) { + } else if (key.length() < strlen(ObJsonSchemaItem::NOT)) { + ret = OB_ERR_WRONG_VALUE; + } else { + switch(key[2]) { + case 'l': { + // allOf + if (OB_FAIL(check_allof_schema(json_doc, ele, ans_map, is_valid))) { + LOG_WARN("fail to check of.", K(key), K(ret)); + } + break; + } + case 'y': { + // anyOf + if (OB_FAIL(check_anyof_schema(json_doc, ele, ans_map, is_valid))) { + LOG_WARN("fail to check of.", K(key), K(ret)); + } + break; + } + case 'e': { + // oneOf + if (OB_FAIL(check_oneof_schema(json_doc, ele, ans_map, is_valid))) { + LOG_WARN("fail to check of.", K(key), K(ret)); + } + break; + } + case 'p': { + // dependentSchema + if (OB_FAIL(check_dep_schema(json_doc, ele, ans_map, is_valid))) { + LOG_WARN("fail to check of.", K(key), K(ret)); + } + break; + } + case 't': { + // not + if (OB_FAIL(check_not_schema(json_doc, ele, ans_map, is_valid))) { + LOG_WARN("fail to check of.", K(key), K(ret)); + } + break; + } + default: { + ret = OB_ERR_WRONG_VALUE; + break; + } + } + } + } + return ret; +} + +int ObJsonSchemaValidator::check_single_comp_array(ObIJsonBase *json_doc, ObIJsonBase* allof_val, ObIArray &ans_map, bool& is_valid) +{ + INIT_SUCC(ret); + int size = allof_val->element_count(); + for (int i = 0; i < size && OB_SUCC(ret) && is_valid; ++i) { + ObIJsonBase* tmp_val = nullptr; + bool ans = true; + if (OB_FAIL(allof_val->get_array_element(i, tmp_val))) { + LOG_WARN("fail to get value.", K(i), K(size), K(ret)); + } else if (OB_ISNULL(tmp_val)) { + ret = OB_BAD_NULL_ERROR; + } else if (tmp_val->json_type() == ObJsonNodeType::J_INT) { + if (OB_FAIL(get_ans_by_id(tmp_val, ans_map, ans))) { + LOG_WARN("fail to get ans.", K(i), K(size), K(ret)); + } + } else if (tmp_val->json_type() == ObJsonNodeType::J_OBJECT) { + if (OB_FAIL(check_single_composition_schema(json_doc, tmp_val, ans_map, ans))) { + LOG_WARN("fail to get ans.", K(i), K(size), K(ret)); + } + } + if (OB_SUCC(ret)) { + is_valid = (is_valid && ans); + } + } + return ret; +} + +int ObJsonSchemaValidator::object_recursive_validator(ObIJsonBase *json_doc, ObIArray &schema_vec, ObIArray &ans_map, bool& is_valid) +{ + INIT_SUCC(ret); + int object_size = json_doc->element_count(); + ObIJsonBase *tmp_value = nullptr; + ObArray recursive_schema_vec; + recursive_schema_vec.set_block_size(SCHEMA_DEFAULT_PAGE_SIZE); + for (int i = 0; i < object_size && OB_SUCC(ret) && is_valid; ++i) { + ObString key; + if (OB_FAIL(json_doc->get_object_value(i, key, tmp_value))) { + LOG_WARN("fail to get object value.", K(i), K(object_size), K(key), K(ret)); + } else if (OB_FAIL(collect_schema_by_key(key, json_doc, schema_vec, recursive_schema_vec))) { + LOG_WARN("fail to collect schema.", K(i), K(object_size), K(key), K(ret)); + } else if (recursive_schema_vec.count() > 0) { + if (OB_FAIL(json_pointer_.push(key))) { + LOG_WARN("fail to push schema.", K(i), K(object_size), K(key), K(ret)); + } else if (OB_FAIL(schema_pointer_.push(key))) { + LOG_WARN("fail to push schema.", K(i), K(object_size), K(key), K(ret)); + } else if (OB_FAIL(inner_schema_validator(tmp_value, recursive_schema_vec, ans_map, is_valid))) { + LOG_WARN("fail to validate.", K(i), K(object_size), K(key), K(ret)); + } else { + if (failed_keyword_.empty()) { + json_pointer_.pop(); + schema_pointer_.pop(); + } + recursive_schema_vec.reuse(); + } + } // recursive_schema_vec.count() == 0, no schema definition, do not need check + } + return ret; +} + +int ObJsonSchemaValidator::collect_schema_by_key(const ObString& key, ObIJsonBase *json_doc, ObIArray &schema_vec, ObIArray &recursive_vec) +{ + INIT_SUCC(ret); + int size = schema_vec.count(); + for (int i = 0; i < size && OB_SUCC(ret); ++i) { + ObIJsonBase* tmp_schema = schema_vec.at(i); + ObIJsonBase* property = nullptr; + ObIJsonBase* tmp_value = nullptr; + if (OB_FAIL(tmp_schema->get_object_value(ObJsonSchemaItem::PROPERTIES, property))) { + if (ret == OB_SEARCH_NOT_FOUND) { // didn't define property, its normal + ret = OB_SUCCESS; + } + } else if (OB_FAIL(property->get_object_value(key, tmp_value))) { + if (ret == OB_SEARCH_NOT_FOUND) { // didn't define key, its normal + ret = OB_SUCCESS; + } + } else if (OB_ISNULL(tmp_value)) { + } else if (tmp_value->json_type() != ObJsonNodeType::J_OBJECT) { + ret = OB_ERR_WRONG_VALUE; + } else if (OB_FAIL(recursive_vec.push_back(tmp_value))) { + LOG_WARN("fail to push back.", K(i), K(size), K(key), K(ret)); + } + + if (OB_FAIL(ret)) { + } else if (OB_NOT_NULL(tmp_value)) { // found in properties, don't need to check pattern properties + } else if (OB_FALSE_IT(property = nullptr)) { + } else if (OB_FAIL(tmp_schema->get_object_value(ObJsonSchemaItem::PATTERN_PRO, property)) || OB_ISNULL(property)) { + if (ret == OB_SEARCH_NOT_FOUND) { // didn't define pattern property, its normal + ret = OB_SUCCESS; + } + } else if (OB_FAIL(collect_schema_by_pattern_key(key, property, recursive_vec))) { + LOG_WARN("fail to collect.", K(i), K(size), K(key), K(ret)); + } + + if (OB_FAIL(ret)) { + } else if (OB_FALSE_IT(property = nullptr)) { + } else if (OB_FAIL(tmp_schema->get_object_value(ObJsonSchemaItem::ADDITIONAL_PRO, property))) { + if (ret == OB_SEARCH_NOT_FOUND) { // didn't define addtional property, its normal + ret = OB_SUCCESS; + } + } else if (OB_FAIL(collect_schema_by_add_key(key, json_doc, property, recursive_vec))) { + LOG_WARN("fail to collect.", K(i), K(size), K(key), K(ret)); + } + } + return ret; +} + +int ObJsonSchemaValidator::collect_schema_by_idx(const int& idx, const ObString& idx_str, ObIArray &schema_vec, ObIArray &recursive_vec) +{ + INIT_SUCC(ret); + int size = schema_vec.count(); + for (int i = 0; i < size && OB_SUCC(ret); ++i) { + ObIJsonBase* tmp_schema = schema_vec.at(i); + ObIJsonBase* items = nullptr; + ObIJsonBase* tmp_value = nullptr; + if (OB_FAIL(tmp_schema->get_object_value(ObJsonSchemaItem::ITEMS, items))) { + if (ret == OB_SEARCH_NOT_FOUND) { // didn't define property, its normal + ret = OB_SUCCESS; + } + } else if (OB_ISNULL(items)) { + } else if (items->json_type() != ObJsonNodeType::J_OBJECT) { + ret = OB_ERR_WRONG_VALUE; + } else if (OB_FAIL(recursive_vec.push_back(items))) { + LOG_WARN("fail to push back.", K(i), K(size), K(ret)); + } + + if (OB_FAIL(ret)) { + } else if (OB_FALSE_IT(items = nullptr)) { + } else if (OB_FAIL(tmp_schema->get_object_value(ObJsonSchemaItem::TUPLE_ITEMS, items)) || OB_ISNULL(items)) { + if (ret == OB_SEARCH_NOT_FOUND) { // didn't define tuple items, its normal + ret = OB_SUCCESS; + } + } else if (OB_FAIL(items->get_object_value(idx_str, tmp_value))) { + if (ret == OB_SEARCH_NOT_FOUND) { // didn't define idx i in tuple, its normal + ret = OB_SUCCESS; + } + } else if (OB_ISNULL(tmp_value)) { + } else if (tmp_value->json_type() != ObJsonNodeType::J_OBJECT) { + ret = OB_ERR_WRONG_VALUE; + } else if (OB_FAIL(recursive_vec.push_back(tmp_value))) { + LOG_WARN("fail to push back.", K(i), K(size), K(ret)); + } + + if (OB_FAIL(ret)) { + } else if (OB_FALSE_IT(items = nullptr)) { + } else if (OB_FAIL(tmp_schema->get_object_value(ObJsonSchemaItem::ADDITIONAL_ITEMS, items)) || OB_ISNULL(items)) { + if (ret == OB_SEARCH_NOT_FOUND) { // didn't define addtional property, its normal + ret = OB_SUCCESS; + } + } else if (OB_FAIL(collect_schema_by_add_idx(idx, items, recursive_vec))) { + LOG_WARN("fail to collect.", K(i), K(size), K(ret)); + } + } + return ret; +} + +int ObJsonSchemaValidator::collect_schema_by_pattern_key(const ObString& key, ObIJsonBase* schema_vec, ObIArray &recursive_vec) +{ + INIT_SUCC(ret); + if (OB_ISNULL(schema_vec)) { // didn't define pattern, its normal + } else if (schema_vec->json_type() != ObJsonNodeType::J_OBJECT) { + ret = OB_ERR_WRONG_VALUE; + LOG_WARN("should be object.", K(ret)); + } else { + int size = schema_vec->element_count(); + for (int i = 0; i < size && OB_SUCC(ret); ++i) { + ObString pattern; + ObIJsonBase* tmp_value = nullptr; + bool regex_ans = false; + if (OB_FAIL(schema_vec->get_object_value(i, pattern, tmp_value))) { + LOG_WARN("fail to get.",K(i), K(size), K(ret)); + } else if (OB_ISNULL(tmp_value)) { + } else if (tmp_value->json_type() != ObJsonNodeType::J_OBJECT) { + ret = OB_ERR_WRONG_VALUE; + } else if (OB_SUCC(ObJsonSchemaUtils::if_regex_match(key, pattern, str_buf_, regex_ans)) && regex_ans) { + if (OB_FAIL(recursive_vec.push_back(tmp_value))) { + LOG_WARN("fail to collect.", K(i), K(size), K(ret)); + } + } // if not match, ignore + } + } + return ret; +} + +int ObJsonSchemaValidator::collect_schema_by_add_key(const ObString& key, ObIJsonBase *json_doc, ObIJsonBase* schema_vec, ObIArray &recursive_vec) +{ + INIT_SUCC(ret); + if (OB_ISNULL(schema_vec)) { // didn't define add_pro, its normal + } else if (schema_vec->json_type() != ObJsonNodeType::J_ARRAY) { + ret = OB_ERR_WRONG_VALUE; + LOG_WARN("should be array.", K(ret)); + } else { + int size = schema_vec->element_count(); + if (size % ObJsonSchemaTree::ADDITIONAL_PRO_ARRAY_COUNT != 0) { + ret = OB_ERR_WRONG_VALUE; + } + for (int i = 0; i + 1 < size && OB_SUCC(ret); i += 2) { + ObIJsonBase* tmp_value = nullptr; + bool is_valid = true; + if (OB_FAIL(schema_vec->get_array_element(i, tmp_value))) { + LOG_WARN("fail to get.", K(i), K(size), K(ret)); + } else if (OB_FAIL(check_add_pro_in_schema(json_doc, tmp_value, is_valid, key))) { + LOG_WARN("fail to check.", K(i), K(size), K(ret)); + } else if (!is_valid) { + tmp_value = nullptr; + if (OB_FAIL(schema_vec->get_array_element(i + 1, tmp_value))) { + LOG_WARN("fail to get.", K(i), K(size), K(ret)); + } else if (OB_ISNULL(tmp_value) || tmp_value->json_type() != ObJsonNodeType::J_OBJECT) { + ret = OB_ERR_WRONG_VALUE; + } else if (OB_FAIL(recursive_vec.push_back(tmp_value))) { + LOG_WARN("fail to collect.", K(i), K(size), K(ret)); + } + } // if node add_pro, ignore + } + } + return ret; +} + +int ObJsonSchemaValidator::array_recursive_validator(ObIJsonBase *json_doc, ObIArray &schema_vec, ObIArray &ans_map, bool& is_valid) +{ + INIT_SUCC(ret); + int array_size = json_doc->element_count(); + ObIJsonBase *tmp_value = nullptr; + ObArray recursive_schema_vec; + recursive_schema_vec.set_block_size(SCHEMA_DEFAULT_PAGE_SIZE); + ObJsonBuffer buf(allocator_); + for (int i = 0; i < array_size && OB_SUCC(ret) && is_valid; ++i) { + ObString idx_key; + buf.reuse(); + if (OB_FAIL(json_doc->get_array_element(i, tmp_value))) { + LOG_WARN("fail to get object value.", K(i), K(array_size), K(ret)); + } else if (OB_FAIL(ObJsonSchemaUtils::get_index_str(i, buf))) { + LOG_WARN("fail get index.", K(i), K(array_size), K(ret)); + } else if (OB_FALSE_IT(idx_key = ObString(buf.length(), buf.ptr()))) { + } else if (OB_FAIL(collect_schema_by_idx(i, idx_key, schema_vec, recursive_schema_vec))) { + LOG_WARN("fail to collect schema.", K(i), K(array_size), K(ret)); + } else if (recursive_schema_vec.count() > 0) { + if (OB_FAIL(json_pointer_.push(idx_key))) { + LOG_WARN("fail to push schema.", K(i), K(array_size), K(idx_key), K(ret)); + } else if (OB_FAIL(schema_pointer_.push(idx_key))) { + LOG_WARN("fail to push schema.", K(i), K(array_size), K(idx_key), K(ret)); + } else if (OB_FAIL(inner_schema_validator(tmp_value, recursive_schema_vec, ans_map, is_valid))) { + LOG_WARN("fail to validate.", K(i), K(array_size), K(idx_key), K(ret)); + } else { + if (failed_keyword_.empty()) { + json_pointer_.pop(); + schema_pointer_.pop(); + } + recursive_schema_vec.reuse(); + } + } // recursive_schema_vec.count() == 0, no schema definition, do not need check + } + return ret; +} + +int ObJsonSchemaValidator::collect_schema_by_add_idx(const int& idx, ObIJsonBase* schema_vec, ObIArray &recursive_vec) +{ + INIT_SUCC(ret); + if (OB_ISNULL(schema_vec)) { // didn't define add_pro, its normal + } else if (schema_vec->json_type() != ObJsonNodeType::J_OBJECT) { + ret = OB_ERR_WRONG_VALUE; + LOG_WARN("should be array.", K(ret)); + } else { + int size = schema_vec->element_count(); + for (int i = 0; i < size && OB_SUCC(ret); ++i) { + ObIJsonBase* tmp_value = nullptr; + bool is_valid = true; + ObString add_val; + if (OB_FAIL(schema_vec->get_object_value(i, add_val, tmp_value))) { + LOG_WARN("fail to get.", K(i), K(size), K(ret)); + } else { + int64_t add_idx = strtoll(add_val.ptr(), NULL, 10); + if (add_idx <= idx && OB_FAIL(recursive_vec.push_back(tmp_value))) { + LOG_WARN("fail to push schema.", K(i), K(size), K(ret)); + } + } + } + } + return ret; +} + +int ObJsonSchemaValidator::check_all_schema_def(ObIJsonBase *json_doc, ObIArray &schema_vec, bool& is_valid) +{ + INIT_SUCC(ret); + if (!is_valid) { + } else { + ObArray schema_def; + schema_def.set_block_size(SCHEMA_DEFAULT_PAGE_SIZE); + if (OB_FAIL(ObJsonSchemaUtils::get_specific_type_of_child(ObJsonSchemaItem::SCHEMA, ObJsonNodeType::J_OBJECT, schema_vec, schema_def))) { + if (ret == OB_ERR_JSON_KEY_NOT_FOUND) { // there is no child in schema, do not need check + ret = OB_SUCCESS; + } + } else { + ObIJsonBase *schema = nullptr; + ObJsonSchemaType valid_type; + valid_type.flags_ = 0; + switch (json_doc->json_type()) { + case ObJsonNodeType::J_NULL: + case ObJsonNodeType::J_BOOLEAN: { // null or boolean, only check type and enum + if (json_doc->json_type() == ObJsonNodeType::J_NULL) { + valid_type.null_ = 1; + } else { + valid_type.boolean_ = 1; + } + if (OB_FAIL(check_public_key_words(json_doc, schema_def, valid_type, is_valid))) { + LOG_WARN("fail in check null/boolean.", K(valid_type), K(ret)); + } + break; + } + // number or integer + case ObJsonNodeType::J_DECIMAL: + case ObJsonNodeType::J_INT: + case ObJsonNodeType::J_UINT: + case ObJsonNodeType::J_DOUBLE: + case ObJsonNodeType::J_OFLOAT: + case ObJsonNodeType::J_ODOUBLE: + case ObJsonNodeType::J_ODECIMAL: + case ObJsonNodeType::J_OINT: + case ObJsonNodeType::J_OLONG: { + if (OB_FAIL(ObJsonSchemaUtils::set_valid_number_type_by_mode(json_doc, valid_type))) { + LOG_WARN("fail in set valid type.", K(valid_type), K(json_doc->json_type())); + } else if (OB_FAIL(check_public_key_words(json_doc, schema_def, valid_type, is_valid))) { + LOG_WARN("fail in check public key words.", K(valid_type), K(ret)); + } else if (is_valid && OB_FAIL(check_number_and_integer(json_doc, schema_def, is_valid))) { + LOG_WARN("fail in check number/integer key words.", K(valid_type), K(ret)); + } + break; + } + // type enclosed in double quotes treat as string + case ObJsonNodeType::J_DATE: + case ObJsonNodeType::J_TIME: + case ObJsonNodeType::J_DATETIME: + case ObJsonNodeType::J_TIMESTAMP: + case ObJsonNodeType::J_STRING: + case ObJsonNodeType::J_OBINARY: + case ObJsonNodeType::J_OOID: + case ObJsonNodeType::J_ORAWHEX: + case ObJsonNodeType::J_ORAWID: + case ObJsonNodeType::J_ODAYSECOND: + case ObJsonNodeType::J_OYEARMONTH: { + valid_type.string_ = 1; + if (OB_FAIL(check_public_key_words(json_doc, schema_def, valid_type, is_valid))) { + LOG_WARN("fail in check public key words.", K(valid_type), K(ret)); + } else if (is_valid && OB_FAIL(check_string_type(json_doc, schema_def, is_valid))) { + LOG_WARN("fail in check number/integer key words.", K(valid_type), K(ret)); + } + break; + } + case ObJsonNodeType::J_OBJECT: { + valid_type.object_ = 1; + if (OB_FAIL(check_public_key_words(json_doc, schema_def, valid_type, is_valid))) { + LOG_WARN("fail in check public key words.", K(valid_type), K(ret)); + } else if (is_valid && OB_FAIL(check_object_type(json_doc, schema_def, is_valid))) { + LOG_WARN("fail in check number/integer key words.", K(valid_type), K(ret)); + } + break; + } + case ObJsonNodeType::J_ARRAY: { + valid_type.array_ = 1; + if (OB_FAIL(check_public_key_words(json_doc, schema_def, valid_type, is_valid))) { + LOG_WARN("fail in check public key words.", K(valid_type), K(ret)); + } else if (is_valid && OB_FAIL(check_array_type(json_doc, schema_def, is_valid))) { + LOG_WARN("fail in check number/integer key words.", K(valid_type), K(ret)); + } + break; + } + default: { + // not one of type definition, check only public keywords, + // other type-related keywords return default ans——true + if (OB_FAIL(check_public_key_words(json_doc, schema_def, valid_type, is_valid))) { + LOG_WARN("fail in check public key words.", K(valid_type), K(ret)); + } + break; + } + } + } + } + return ret; +} + +int ObJsonSchemaValidator::get_composition_schema_def(int idx, ObIJsonBase *schema_vec, ObIJsonBase *&schema, + ObIArray &ans_map, int& schema_id) +{ + INIT_SUCC(ret); + schema_id = 0; + bool found = false; + schema = nullptr; + schema_id = schema_vec->get_int(); + int schema_def_id = schema_id; + if (ans_map.at(schema_id) == ObJsonSchemaAns::JS_FALSE) { + ret = OB_ITER_END; + } + while (schema_def_id > 0 && OB_SUCC(ret) && !found) { + if (OB_SUCC(schema_map_->get_array_element(schema_def_id, schema))) { + if (OB_NOT_NULL(schema) && schema->json_type() == ObJsonNodeType::J_OBJECT) { + found = true; + } else { + --schema_def_id; + } + } + } + + if (OB_FAIL(ret)) { + } else if (!found) { + ret = OB_SEARCH_NOT_FOUND; + LOG_WARN("didn't found schema.", K(ret)); + } + return ret; +} + +int ObJsonSchemaValidator::record_comp_ans(const int& def_id, const bool& ans, ObIArray &ans_map) +{ + INIT_SUCC(ret); + int size = ans_map.count(); + composition_ans_recorded_ = true; + if (def_id > 0 && def_id < size) { + if (ans_map.at(def_id) == ObJsonSchemaAns::JS_FALSE) { + } else { + ans_map.at(def_id) = ans ? ObJsonSchemaAns::JS_TRUE : ObJsonSchemaAns::JS_FALSE; + } + } else { + ret = OB_ERROR_OUT_OF_RANGE; + } + return ret; +} + +int ObJsonSchemaValidator::check_all_composition_def(ObIJsonBase *json_doc, ObIArray &schema_vec, ObIArray &ans_map) +{ + INIT_SUCC(ret); + ObArray composition_def; + composition_def.set_block_size(SCHEMA_DEFAULT_PAGE_SIZE); + bool is_valid = true; + if (OB_FAIL(ObJsonSchemaUtils::get_all_composition_def(schema_vec, composition_def))) { + if (ret == OB_ERR_JSON_KEY_NOT_FOUND) { // there is no child in schema, do not need check + ret = OB_SUCCESS; + } + } else if (composition_def.size() > 0) { + is_valid = true; + int size = composition_def.count(); + ObIJsonBase *comp_schema = nullptr; + int def_id = 0; + switch (json_doc->json_type()) { + case ObJsonNodeType::J_NULL: + case ObJsonNodeType::J_BOOLEAN: { // null or boolean, only check type and enum + bool is_null = (json_doc->json_type() == ObJsonNodeType::J_NULL); + for (int i = 0; i < size && OB_SUCC(ret); ++i) { + ObIJsonBase *tmp_comp = composition_def.at(i); + is_valid = true; + if (OB_FAIL(get_composition_schema_def(i, tmp_comp, comp_schema, ans_map, def_id))) { + if (ret == OB_ITER_END) { // already checked, and ans is false, escape + ret = OB_SUCCESS; + } + } else if (OB_FAIL(check_null_or_boolean(json_doc, comp_schema, is_null, is_valid))) { + LOG_WARN("fail to check null.", K(ret)); + } else if (OB_FAIL(record_comp_ans(def_id, is_valid, ans_map))) { + LOG_WARN("fail to deal with ans.", K(ret)); + } + } + break; + } + // number or integer + case ObJsonNodeType::J_DECIMAL: + case ObJsonNodeType::J_INT: + case ObJsonNodeType::J_UINT: + case ObJsonNodeType::J_DOUBLE: + case ObJsonNodeType::J_OFLOAT: + case ObJsonNodeType::J_ODOUBLE: + case ObJsonNodeType::J_ODECIMAL: + case ObJsonNodeType::J_OINT: + case ObJsonNodeType::J_OLONG: { + for (int i = 0; i < size && OB_SUCC(ret); ++i) { + ObIJsonBase *tmp_comp = composition_def.at(i); + is_valid = true; + if (OB_FAIL(get_composition_schema_def(i, tmp_comp, comp_schema, ans_map, def_id))) { + if (ret == OB_ITER_END) { // already checked, and ans is false, escape + ret = OB_SUCCESS; + } + } else if (OB_FAIL(check_number_and_integer(json_doc, comp_schema, is_valid))) { + LOG_WARN("fail to check number.", K(ret)); + } else if (OB_FAIL(record_comp_ans(def_id, is_valid, ans_map))) { + LOG_WARN("fail to deal with ans.", K(ret)); + } + } + break; + } + // type enclosed in double quotes treat as string + case ObJsonNodeType::J_DATE: + case ObJsonNodeType::J_TIME: + case ObJsonNodeType::J_DATETIME: + case ObJsonNodeType::J_TIMESTAMP: + case ObJsonNodeType::J_STRING: + case ObJsonNodeType::J_OBINARY: + case ObJsonNodeType::J_OOID: + case ObJsonNodeType::J_ORAWHEX: + case ObJsonNodeType::J_ORAWID: + case ObJsonNodeType::J_ODAYSECOND: + case ObJsonNodeType::J_OYEARMONTH: { + for (int i = 0; i < size && OB_SUCC(ret); ++i) { + ObIJsonBase *tmp_comp = composition_def.at(i); + is_valid = true; + if (OB_FAIL(get_composition_schema_def(i, tmp_comp, comp_schema, ans_map, def_id))) { + if (ret == OB_ITER_END) { // already checked, and ans is false, escape + ret = OB_SUCCESS; + } + } else if (OB_FAIL(check_string_type(json_doc, comp_schema, is_valid))) { + LOG_WARN("fail to check string.", K(ret)); + } else if (OB_FAIL(record_comp_ans(def_id, is_valid, ans_map))) { + LOG_WARN("fail to deal with ans.", K(ret)); + } + } + break; + } + case ObJsonNodeType::J_OBJECT: { + for (int i = 0; i < size && OB_SUCC(ret); ++i) { + ObIJsonBase *tmp_comp = composition_def.at(i); + is_valid = true; + if (OB_FAIL(get_composition_schema_def(i, tmp_comp, comp_schema, ans_map, def_id))) { + if (ret == OB_ITER_END) { // already checked, and ans is false, escape + ret = OB_SUCCESS; + } + } else if (OB_FAIL(check_object_type(json_doc, comp_schema, is_valid))) { + LOG_WARN("fail to check object.", K(ret)); + } else if (OB_FAIL(record_comp_ans(def_id, is_valid, ans_map))) { + LOG_WARN("fail to deal with ans.", K(ret)); + } + } + break; + } + case ObJsonNodeType::J_ARRAY: { + for (int i = 0; i < size && OB_SUCC(ret); ++i) { + ObIJsonBase *tmp_comp = composition_def.at(i); + is_valid = true; + if (OB_FAIL(get_composition_schema_def(i, tmp_comp, comp_schema, ans_map, def_id))) { + if (ret == OB_ITER_END) { // already checked, and ans is false, escape + ret = OB_SUCCESS; + } + } else if (OB_FAIL(check_array_type(json_doc, comp_schema, is_valid))) { + LOG_WARN("fail to check array.", K(ret)); + } else if (OB_FAIL(record_comp_ans(def_id, is_valid, ans_map))) { + LOG_WARN("fail to deal with ans.", K(ret)); + } + } + break; + } + default: { + // not one of type definition, check only public keywords, + // ObJsonSchemaType set 0 + ObJsonSchemaType comp_type; + comp_type.flags_ = 0; + for (int i = 0; i < size && OB_SUCC(ret); ++i) { + ObIJsonBase *tmp_comp = composition_def.at(i); + is_valid = true; + if (OB_FAIL(get_composition_schema_def(i, tmp_comp, comp_schema, ans_map, def_id))) { + if (ret == OB_ITER_END) { // already checked, and ans is false, escape + ret = OB_SUCCESS; + } + } else if (OB_FAIL(check_public_key_words(json_doc, comp_schema, comp_type, is_valid))) { + LOG_WARN("fail to check null.", K(ret)); + } else if (OB_FAIL(record_comp_ans(def_id, is_valid, ans_map))) { + LOG_WARN("fail to deal with ans.", K(ret)); + } + } + break; + } + } + } + return ret; +} + +// only check one of them: type, enum + +int ObJsonSchemaValidator::check_public_key_words(ObIJsonBase *json_doc, ObIJsonBase *schema, const ObJsonSchemaType& valid_type, bool& is_valid) +{ + INIT_SUCC(ret); + ObIJsonBase* value = nullptr; + ObString key_word; + if (OB_FAIL(ObJsonSchemaUtils::get_single_key_value(schema, key_word, value))) { + LOG_WARN("fail to get key value.", K(key_word), K(ret)); + } else if (key_word.length() == strlen(ObJsonSchemaItem::TYPE)) { + if (key_word[0] == '$') { + // ref + if (OB_FAIL(check_ref(json_doc, value, is_valid))) { + LOG_WARN("fail to check enum.", K(key_word), K(ret)); + } + } else if (key_word[0] == 't') { + // type : value must be int + is_valid = check_type(valid_type, value); + } else if (key_word[0] == 'e') { + // enum + if (OB_FAIL(check_enum(json_doc, value, is_valid))) { + LOG_WARN("fail to check enum.", K(key_word), K(ret)); + } + } + } // no other key words need to check + return ret; +} + +int ObJsonSchemaValidator::check_null_or_boolean(ObIJsonBase *json_doc, ObIJsonBase *schema, bool is_null, bool& is_valid) +{ + INIT_SUCC(ret); + ObJsonSchemaType valid_type; + valid_type.flags_ = 0; + if (is_null) { + valid_type.null_ = 1; + } else { + valid_type.boolean_ = 1; + } + if (OB_FAIL(check_public_key_words(json_doc, schema, valid_type, is_valid))) { + LOG_WARN("fail to check type/enum.", K(ret)); + } + return ret; +} + +// check both of them: type, enum +int ObJsonSchemaValidator::check_public_key_words(ObIJsonBase *json_doc, ObIArray &schema_vec, ObJsonSchemaType valid_type, bool& is_valid) +{ + INIT_SUCC(ret); + is_valid = true; + int size = schema_vec.count(); + for (int i = 0; i < size && OB_SUCC(ret) && is_valid; ++i) { + ObIJsonBase* tmp_schema = schema_vec.at(i); + ObIJsonBase* def_value = nullptr; + bool has_ref = false; + if (OB_FAIL(tmp_schema->get_object_value(ObJsonSchemaItem::REF, def_value))) { + if (ret == OB_SEARCH_NOT_FOUND) { + ret = OB_SUCCESS; // didn't found, its normal + } + } else if (OB_ISNULL(def_value)) { // didn't define, ignore + } else if (OB_FAIL(check_ref(json_doc, def_value, is_valid))) { + } else if (!is_valid) { + failed_keyword_ = ObJsonSchemaItem::REF; + } + if (has_ref || OB_FAIL(ret) || !is_valid) { + } else if (OB_FAIL(tmp_schema->get_object_value(ObJsonSchemaItem::TYPE, def_value))) { + if (ret == OB_SEARCH_NOT_FOUND) { + ret = OB_SUCCESS; // didn't found, its normal + } + } else if (OB_ISNULL(def_value)) { // didn't define, ignore + } else if (check_type(valid_type, def_value)) {// type is true, do nothing + } else { + is_valid = false; + failed_keyword_ = ObJsonSchemaItem::TYPE; + } + if (has_ref || OB_FAIL(ret) || !is_valid) { + } else if (OB_FALSE_IT(def_value = nullptr)) { + } else if (OB_FAIL(tmp_schema->get_object_value(ObJsonSchemaItem::ENUM, def_value))) { + if (ret == OB_SEARCH_NOT_FOUND) { + ret = OB_SUCCESS; // didn't found, its normal + } + } else if (OB_ISNULL(def_value)) { + } else if (OB_FAIL(check_enum(json_doc, def_value, is_valid))) { + } else if (!is_valid) { + failed_keyword_ = ObJsonSchemaItem::ENUM; + } + } + return ret; +} + +// upper bound: maximum, exclusiveMaximum +// lower bound: maximum, exclusiveMaximum +int ObJsonSchemaValidator::check_boundary_key_words(const ObString& key, ObIJsonBase *json_doc, + ObIJsonBase *schema, int& res) +{ + INIT_SUCC(ret); + ObIJsonBase* def_value = nullptr; + res = 0; + if (OB_FAIL(schema->get_object_value(key, def_value))) { + } else if (OB_ISNULL(def_value)) { + ret = OB_SEARCH_NOT_FOUND; + } else if (OB_FAIL(json_doc->compare(*def_value, res))) { + LOG_WARN("fail to compare.", K(ret)); + } + return ret; +} + +// check both of them: +// minimum/maximum +// multipleOf +// exclusiveMinimum/exclusiveMaximum +int ObJsonSchemaValidator::check_number_and_integer(ObIJsonBase *json_doc, ObIArray &schema_vec, bool& is_valid) +{ + INIT_SUCC(ret); + int size = schema_vec.count(); + for (int i = 0; i < size && OB_SUCC(ret) && is_valid; ++i) { + ObIJsonBase* tmp_schema = schema_vec.at(i); + int res = 0; + if (OB_FAIL(check_boundary_key_words(ObJsonSchemaItem::MAXMUM, json_doc, tmp_schema, res))) { + if (ret == OB_SEARCH_NOT_FOUND) { + ret = OB_SUCCESS; + } + } else if (res > 0) { + is_valid = false; + failed_keyword_ = ObJsonSchemaItem::MAXMUM; + } + if (OB_FAIL(ret) || !is_valid) { + } else if (OB_FAIL(check_boundary_key_words(ObJsonSchemaItem::MINMUM, json_doc, tmp_schema, res))) { + if (ret == OB_SEARCH_NOT_FOUND) { + ret = OB_SUCCESS; + } + } else if (res < 0) { + is_valid = false; + failed_keyword_ = ObJsonSchemaItem::MINMUM; + } + if (OB_FAIL(ret) || !is_valid) { + } else if (OB_FAIL(check_boundary_key_words(ObJsonSchemaItem::EXCLUSIVE_MAXMUM, json_doc, tmp_schema, res))) { + if (ret == OB_SEARCH_NOT_FOUND) { + ret = OB_SUCCESS; + } + } else if (res >= 0) { + is_valid = false; + failed_keyword_ = ObJsonSchemaItem::MAXMUM; + } + if (OB_FAIL(ret) || !is_valid) { + } else if (OB_FAIL(check_boundary_key_words(ObJsonSchemaItem::EXCLUSIVE_MINMUM, json_doc, tmp_schema, res))) { + if (ret == OB_SEARCH_NOT_FOUND) { + ret = OB_SUCCESS; + } + } else if (res <= 0) { + is_valid = false; + failed_keyword_ = ObJsonSchemaItem::MINMUM; + } + + if (OB_FAIL(ret) || !is_valid) { + } else { + ObIJsonBase* def_value = nullptr; + if (OB_FAIL(tmp_schema->get_object_value(ObJsonSchemaItem::MULTIPLE_OF, def_value))) { + if (ret == OB_SEARCH_NOT_FOUND) { + ret = OB_SUCCESS; // didn't found, its normal + } + } else if (OB_ISNULL(def_value)) { + } else if (def_value->is_json_number(def_value->json_type())) { + if (!check_multiple_of(json_doc, def_value)) { + is_valid = false; + failed_keyword_ = ObJsonSchemaItem::MULTIPLE_OF; + } + } else if (def_value->json_type() == ObJsonNodeType::J_ARRAY) { + int array_size = def_value->element_count(); + for (int i = 0; i < array_size && OB_SUCC(ret) && is_valid; ++i) { + ObIJsonBase* tmp_array = nullptr; + if (OB_FAIL(def_value->get_array_element(i, tmp_array))) { + LOG_WARN("fail to get array value.", K(i), K(array_size), K(ret)); + } else if (OB_ISNULL(tmp_array)) { + ret = OB_BAD_NULL_ERROR; + LOG_WARN("shouldn't be null.", K(i), K(array_size), K(ret)); + } else if (!check_multiple_of(json_doc, def_value)) { + is_valid = false; + failed_keyword_ = ObJsonSchemaItem::MULTIPLE_OF; + } + } + } else { + ret = OB_ERR_WRONG_VALUE; + LOG_WARN("must be object type.", K(ret), K(def_value->json_type())); + } + } + } + return ret; +} + +// only check one of them: +// type/enum +// minimum/maximum +// multipleOf +// exclusiveMinimum/exclusiveMaximum +int ObJsonSchemaValidator::check_number_and_integer(ObIJsonBase *json_doc, ObIJsonBase *schema, bool& is_valid) +{ + INIT_SUCC(ret); + is_valid = true; + ObIJsonBase* value = nullptr; + ObString key_word; + if (OB_FAIL(ObJsonSchemaUtils::get_single_key_value(schema, key_word, value))) { + LOG_WARN("fail to get key value.", K(ret)); + } else { + ObJsonSchemaType valid_type; + valid_type.flags_ = 0; + switch (key_word.length()) { + case JS_TYPE_LEN: { + if (key_word[0] == 't' && OB_FAIL(ObJsonSchemaUtils::set_valid_number_type_by_mode(json_doc, valid_type))) { + LOG_WARN("fail to set valid type.", K(key_word), K(ret)); + } else if (OB_FAIL(check_public_key_words(key_word[0], valid_type, json_doc, value, is_valid))) { + LOG_WARN("fail to check key value.", K(key_word), K(ret)); + } + break; + } + case JS_PATTERN_LEN: { + int res = 0; + if (key_word[2] == 'x') { // maximum + if (OB_FAIL(json_doc->compare(*value, res))) { + LOG_WARN("fail to compare.", K(key_word), K(ret)); + } else if (res > 0) { + is_valid = false; + } + } else if (key_word[2] == 'n') { + if (OB_FAIL(json_doc->compare(*value, res))) { + LOG_WARN("fail to compare.", K(key_word), K(ret)); + } else if (res < 0) { + is_valid = false; + } + } + break; + } + case JS_EXCLUSIVE_LEN: { + int res = 0; + if (key_word[11] == 'a') { // exclusiveMaximum + if (OB_FAIL(json_doc->compare(*value, res))) { + LOG_WARN("fail to compare.", K(key_word), K(ret)); + } else if (res >= 0) { + is_valid = false; + } + } else if (key_word[11] == 'i') { //exclusiveMinimum + if (OB_FAIL(json_doc->compare(*value, res))) { + LOG_WARN("fail to compare.", K(key_word), K(ret)); + } else if (res <= 0) { + is_valid = false; + } + } + break; + } + case JS_MULTIPLE_LEN: { + if (value->is_json_number(value->json_type())) { + is_valid = check_multiple_of(json_doc, value); + } else if (value->json_type() == ObJsonNodeType::J_ARRAY) { + int array_size = value->element_count(); + for (int i = 0; i < array_size && OB_SUCC(ret) && is_valid; ++i) { + ObIJsonBase* tmp_array = nullptr; + if (OB_FAIL(value->get_array_element(i, tmp_array))) { + LOG_WARN("fail to get array value.", K(key_word), K(ret), K(i), K(array_size)); + } else if (OB_ISNULL(tmp_array)) { + ret = OB_BAD_NULL_ERROR; + LOG_WARN("shouldn't be null.", K(ret), K(i), K(array_size)); + } else if (tmp_array->is_json_number(tmp_array->json_type())) { + is_valid = check_multiple_of(json_doc, tmp_array); + } + } + } + break; + } + default: { + // not key_words for number type, its normal, ignore + break; + } + } + } + // no other key words need to check + return ret; +} + +// minLength/maxLength +// pattern +int ObJsonSchemaValidator::check_string_type(ObIJsonBase *json_doc, ObIArray &schema_vec, bool& is_valid) +{ + INIT_SUCC(ret); + int size = schema_vec.count(); + for (int i = 0; i < size && OB_SUCC(ret) && is_valid; ++i) { + ObIJsonBase* tmp_schema = schema_vec.at(i); + ObIJsonBase* def_value = nullptr; + if (OB_FAIL(tmp_schema->get_object_value(ObJsonSchemaItem::MAX_LEN, def_value))) { + if (ret == OB_SEARCH_NOT_FOUND) { + ret = OB_SUCCESS; // didn't found, its normal + } + } else if (OB_ISNULL(def_value)) { + } else if (json_doc->get_data_length() > def_value->get_int()) { + is_valid = false; + failed_keyword_ = ObJsonSchemaItem::MAX_LEN; + } + + if (OB_FAIL(ret) || !is_valid) { + } else if (OB_FALSE_IT(def_value = nullptr)) { + } else if (OB_FAIL(tmp_schema->get_object_value(ObJsonSchemaItem::MIN_LEN, def_value))) { + if (ret == OB_SEARCH_NOT_FOUND) { + ret = OB_SUCCESS; // didn't found, its normal + } + } else if (OB_ISNULL(def_value)) { + } else if (json_doc->get_data_length() < def_value->get_int()) { + is_valid = false; + failed_keyword_ = ObJsonSchemaItem::MIN_LEN; + } + + if (OB_FAIL(ret) || !is_valid) { + } else if (OB_FALSE_IT(def_value = nullptr)) { + } else if (OB_FAIL(tmp_schema->get_object_value(ObJsonSchemaItem::PATTERN, def_value))) { + if (ret == OB_SEARCH_NOT_FOUND) { + ret = OB_SUCCESS; // didn't found, its normal + } + } else if (OB_ISNULL(def_value)) { + } else if (def_value->json_type() == ObJsonNodeType::J_STRING) { + if (!check_pattern_keywords(json_doc, def_value)) { + is_valid = false; + failed_keyword_ = ObJsonSchemaItem::PATTERN; + } + } else if (def_value->json_type() == ObJsonNodeType::J_ARRAY) { + int array_size = def_value->element_count(); + for (int i = 0; i < array_size && OB_SUCC(ret) && is_valid; ++i) { + ObIJsonBase* tmp_array = nullptr; + if (OB_FAIL(def_value->get_array_element(i, tmp_array))) { + LOG_WARN("fail to get array value.", K(i), K(array_size), K(ret)); + } else if (OB_ISNULL(tmp_array)) { + ret = OB_BAD_NULL_ERROR; + LOG_WARN("shouldn't be null.", K(i), K(array_size), K(ret)); + } else if (!check_pattern_keywords(json_doc, tmp_array)) { + is_valid = false; + failed_keyword_ = ObJsonSchemaItem::PATTERN; + } + } + } else { + ret = OB_ERR_WRONG_VALUE; + LOG_WARN("must be object type.", K(ret), K(def_value->json_type())); + } + } + return ret; +} + +int ObJsonSchemaValidator::check_public_key_words(const char key_start, ObJsonSchemaType &valid_type, ObIJsonBase *json_doc, ObIJsonBase *schema, bool& is_valid) +{ + INIT_SUCC(ret); + if (key_start == '$') { + // ref + if (OB_FAIL(check_ref(json_doc, schema, is_valid))) { + LOG_WARN("fail to check ref.", K(key_start), K(valid_type), K(ret)); + } + } else if (key_start == 't') { + // type : value must be int + is_valid = check_type(valid_type, schema); + } else if (key_start == 'e') { + // enum + if (OB_FAIL(check_enum(json_doc, schema, is_valid))) { + LOG_WARN("fail to check enum.", K(key_start), K(valid_type), K(ret)); + } + } + return ret; +} + +// only check one of them: +// type/enum +// minLength/maxLength +// pattern +int ObJsonSchemaValidator::check_string_type(ObIJsonBase *json_doc, ObIJsonBase *schema, bool& is_valid) +{ + INIT_SUCC(ret); + is_valid = true; + ObIJsonBase* value = nullptr; + ObString key_word; + ObJsonSchemaType valid_type; + if (OB_FAIL(ObJsonSchemaUtils::get_single_key_value(schema, key_word, value))) { + LOG_WARN("fail to get key value.", K(key_word), K(valid_type), K(ret)); + } else if (OB_NOT_NULL(value)) { + valid_type.flags_ = 0; + switch (key_word.length()) { + case JS_TYPE_LEN: { + valid_type.string_ = 1; + if (OB_FAIL(check_public_key_words(key_word[0], valid_type, json_doc, value, is_valid))) { + LOG_WARN("fail to check key value.", K(key_word), K(valid_type), K(ret)); + } + break; + } + case JS_PATTERN_LEN: { + if (key_word[0] == 'p') { + if (value->json_type() == ObJsonNodeType::J_STRING) { + is_valid = check_pattern_keywords(json_doc, value); + } else if (value->json_type() == ObJsonNodeType::J_ARRAY) { + int array_size = value->element_count(); + for (int i = 0; i < array_size && OB_SUCC(ret) && is_valid; ++i) { + ObIJsonBase* tmp_array = nullptr; + if (OB_FAIL(value->get_array_element(i, tmp_array))) { + LOG_WARN("fail to get array value.", K(key_word), K(valid_type), K(ret), K(i), K(array_size)); + } else if (OB_ISNULL(tmp_array)) { + ret = OB_BAD_NULL_ERROR; + LOG_WARN("shouldn't be null.", K(key_word), K(valid_type), K(ret), K(i), K(array_size)); + } else if (tmp_array->json_type() == ObJsonNodeType::J_STRING) { + is_valid = check_pattern_keywords(json_doc, tmp_array); + } + } + } + } + break; + } + case JS_STRMAX_LEN: { + if (key_word[1] == 'a') { // maxLength + int data_len = json_doc->get_data_length(); + int schema_len = value->get_int(); // must be int + is_valid = (data_len <= schema_len); + } else if (key_word[1] == 'i') { // minLength + int data_len = json_doc->get_data_length(); + int schema_len = value->get_int(); + is_valid = (data_len >= schema_len); + } + break; + } + default: { + // not key_words for number type, its normal, ignore + break; + } + } + } + // no other key words need to check + return ret; +} + +// check: +// minProperties/maxProperties +// dependencies +// required +// additionalProperties +int ObJsonSchemaValidator::check_object_type(ObIJsonBase *json_doc, ObIArray &schema_vec, bool& is_valid) +{ + INIT_SUCC(ret); + int size = schema_vec.count(); + for (int i = 0; i < size && OB_SUCC(ret) && is_valid; ++i) { + ObIJsonBase* tmp_schema = schema_vec.at(i); + ObIJsonBase* def_value = nullptr; + if (OB_FAIL(tmp_schema->get_object_value(ObJsonSchemaItem::MAX_PROPERTIES, def_value))) { + if (ret == OB_SEARCH_NOT_FOUND) { + ret = OB_SUCCESS; // didn't found, its normal + } + } else if (OB_ISNULL(def_value)) { + } else if (json_doc->element_count() > def_value->get_int()) { + is_valid = false; + failed_keyword_ = ObJsonSchemaItem::MAX_PROPERTIES; + } + + if (OB_FAIL(ret) || !is_valid) {// minProperties + } else if (OB_FALSE_IT(def_value = nullptr)) { + } else if (OB_FAIL(tmp_schema->get_object_value(ObJsonSchemaItem::MIN_PROPERTIES, def_value))) { + if (ret == OB_SEARCH_NOT_FOUND) { + ret = OB_SUCCESS; // didn't found, its normal + } + } else if (OB_ISNULL(def_value)) { + } else if (json_doc->element_count() < def_value->get_int()) { + is_valid = false; + failed_keyword_ = ObJsonSchemaItem::MIN_PROPERTIES; + } + + if (OB_FAIL(ret) || !is_valid) {// required + } else if (OB_FALSE_IT(def_value = nullptr)) { + } else if (OB_FAIL(tmp_schema->get_object_value(ObJsonSchemaItem::REQUIRED, def_value))) { + if (ret == OB_SEARCH_NOT_FOUND) { + ret = OB_SUCCESS; // didn't found, its normal + } + } else if (OB_ISNULL(def_value)) { + } else if (OB_FAIL(check_required(json_doc, def_value, is_valid))) { + LOG_WARN("fail to check key value.", K(ret)); + } else if (!is_valid) { + failed_keyword_ = ObJsonSchemaItem::REQUIRED; + } + + if (OB_FAIL(ret) || !is_valid) {// dep_required + } else if (OB_FALSE_IT(def_value = nullptr)) { + } else if (OB_FAIL(tmp_schema->get_object_value(ObJsonSchemaItem::DEPENDENTREQUIRED, def_value))) { + if (ret == OB_SEARCH_NOT_FOUND) { + ret = OB_SUCCESS; // didn't found, its normal + } + } else if (OB_ISNULL(def_value)) { + } else if (def_value->json_type() == ObJsonNodeType::J_OBJECT) { + if (OB_FAIL(check_dep_required(json_doc, def_value, is_valid))) { + LOG_WARN("fail to check key value.", K(ret)); + } else if (!is_valid) { + failed_keyword_ = ObJsonSchemaItem::DEPENDENCIES; + } + } else if (def_value->json_type() == ObJsonNodeType::J_ARRAY) { + int array_size = def_value->element_count(); + for (int i = 0; i < array_size && OB_SUCC(ret) && is_valid; ++i) { + ObIJsonBase* tmp_array = nullptr; + if (OB_FAIL(def_value->get_array_element(i, tmp_array))) { + LOG_WARN("fail to get array value.", K(i), K(array_size), K(ret)); + } else if (OB_ISNULL(tmp_array)) { + ret = OB_BAD_NULL_ERROR; + LOG_WARN("shouldn't be null.", K(i), K(array_size), K(ret)); + } else if (OB_FAIL(check_dep_required(json_doc, def_value, is_valid))) { + LOG_WARN("fail to check key value.", K(i), K(array_size), K(ret)); + } else if (!is_valid) { + failed_keyword_ = ObJsonSchemaItem::DEPENDENCIES; + } + } + } else { + ret = OB_ERR_WRONG_VALUE; + LOG_WARN("must be object type.", K(ret), K(def_value->json_type())); + } + + if (OB_FAIL(ret) || !is_valid) {// additional properties + } else if (OB_FALSE_IT(def_value = nullptr)) { + } else if (OB_FAIL(tmp_schema->get_object_value(ObJsonSchemaItem::ADDITIONAL_PRO, def_value))) { + if (ret == OB_SEARCH_NOT_FOUND) { + ret = OB_SUCCESS; // didn't found, its normal + } + } else if (OB_ISNULL(def_value)) { + } else if (OB_FAIL(check_add_pro_in_schema(json_doc, def_value, is_valid))) { + LOG_WARN("fail to check key value.", K(ret)); + } else if (!is_valid) { + failed_keyword_ = ObJsonSchemaItem::ADDITIONAL_PRO; + } + } + + return ret; +} + +// only check one of them: +// minProperties/maxProperties +// dependencies +// required +// additionalProperties +int ObJsonSchemaValidator::check_object_type(ObIJsonBase *json_doc, ObIJsonBase *schema, bool& is_valid) +{ + INIT_SUCC(ret); + is_valid = true; + ObIJsonBase* value = nullptr; + ObString key_word; + ObJsonSchemaType valid_type; + if (OB_FAIL(ObJsonSchemaUtils::get_single_key_value(schema, key_word, value))) { + LOG_WARN("fail to get key value.", K(ret)); + } else { + valid_type.flags_ = 0; + switch (key_word.length()) { + case JS_TYPE_LEN: { + valid_type.object_ = 1; + if (OB_FAIL(check_public_key_words(key_word[0], valid_type, json_doc, value, is_valid))) { + LOG_WARN("fail to check key value.", K(key_word), K(ret)); + } + break; + } + case JS_PROMAX_LEN: { + int res = 0; + if (key_word[1] == 'a') { // maxProperties + int data_size = json_doc->element_count(); + int schema_size = value->get_int(); // must be int + is_valid = (data_size <= data_size); + } else if (key_word[1] == 'i') { // minProperties + int data_size = json_doc->element_count(); + int schema_size = value->get_int(); + is_valid = (data_size >= data_size); + } + break; + } + case JS_REQUIRED_LEN: { + if (key_word[0] != 'r') { + } else if (OB_FAIL(check_required(json_doc, value, is_valid))) { + LOG_WARN("fail to check required.", K(key_word), K(ret)); + } + break; + } + case JS_DEP_REQUIRED_LEN: { + if (value->json_type() == ObJsonNodeType::J_OBJECT) { + if (OB_FAIL(check_dep_required(json_doc, value, is_valid))) { + LOG_WARN("fail to check dep required.", K(key_word), K(ret)); + } + } else if (value->json_type() == ObJsonNodeType::J_ARRAY) { + int array_size = value->element_count(); + for (int i = 0; i < array_size && OB_SUCC(ret) && is_valid; ++i) { + ObIJsonBase* tmp_array = nullptr; + if (OB_FAIL(value->get_array_element(i, tmp_array))) { + LOG_WARN("fail to get array value.", K(i), K(array_size), K(ret)); + } else if (OB_ISNULL(tmp_array)) { + ret = OB_BAD_NULL_ERROR; + LOG_WARN("shouldn't be null.", K(ret), K(i)); + } else if (tmp_array->json_type() == ObJsonNodeType::J_OBJECT) { + if (OB_FAIL(check_dep_required(json_doc, value, is_valid))) { + LOG_WARN("fail to check dep required.", K(i), K(array_size), K(ret)); + } + } + } + } + break; + } + case JS_ADD_PRO_LEN: { + if (OB_FAIL(check_add_pro_in_schema(json_doc, value, is_valid))) { + LOG_WARN("fail to check dep required.", K(ret)); + } + break; + } + default: { + // not key_words for number type, its normal, ignore + break; + } + } + } + // no other key words need to check + return ret; +} + +// check: +// additionalItems +// uniqueItems +// minItems/ maxItems +int ObJsonSchemaValidator::check_array_type(ObIJsonBase *json_doc, ObIArray &schema_vec, bool& is_valid) +{ + INIT_SUCC(ret); + int size = schema_vec.count(); + for (int i = 0; i < size && OB_SUCC(ret) && is_valid; ++i) { + ObIJsonBase* tmp_schema = schema_vec.at(i); + ObIJsonBase* def_value = nullptr; + if (OB_FAIL(tmp_schema->get_object_value(ObJsonSchemaItem::MAX_ITEMS, def_value))) { + if (ret == OB_SEARCH_NOT_FOUND) { + ret = OB_SUCCESS; // didn't found, its normal + } + } else if (OB_ISNULL(def_value)) { + } else if (json_doc->element_count() > def_value->get_int()) { + is_valid = false; + failed_keyword_ = ObJsonSchemaItem::MAX_ITEMS; + } + + if (OB_FAIL(ret) || !is_valid) {// minItems + } else if (OB_FALSE_IT(def_value = nullptr)) { + } else if (OB_FAIL(tmp_schema->get_object_value(ObJsonSchemaItem::MIN_ITEMS, def_value))) { + if (ret == OB_SEARCH_NOT_FOUND) { + ret = OB_SUCCESS; // didn't found, its normal + } + } else if (OB_ISNULL(def_value)) { + } else if (json_doc->element_count() < def_value->get_int()) { + is_valid = false; + failed_keyword_ = ObJsonSchemaItem::MIN_ITEMS; + } + + if (OB_FAIL(ret) || !is_valid) {// addItems + } else if (OB_FALSE_IT(def_value = nullptr)) { + } else if (OB_FAIL(tmp_schema->get_object_value(ObJsonSchemaItem::ADDITIONAL_ITEMS, def_value))) { + if (ret == OB_SEARCH_NOT_FOUND) { + ret = OB_SUCCESS; // didn't found, its normal + } + } else if (OB_ISNULL(def_value)) { + } else if (json_doc->element_count() > def_value->get_int()) { + is_valid = false; + failed_keyword_ = ObJsonSchemaItem::ADDITIONAL_ITEMS; + } + + if (OB_FAIL(ret) || !is_valid) {// uniqueItems + } else if (OB_FALSE_IT(def_value = nullptr)) { + } else if (OB_FAIL(tmp_schema->get_object_value(ObJsonSchemaItem::UNIQUE_ITEMS, def_value))) { + if (ret == OB_SEARCH_NOT_FOUND) { + ret = OB_SUCCESS; // didn't found, its normal + } + } else if (OB_ISNULL(def_value)) { + } else if (OB_FAIL(check_unique_items(json_doc, def_value, is_valid))) { + LOG_WARN("fail to check key value.", K(ret)); + } else if (!is_valid) { + failed_keyword_ = ObJsonSchemaItem::UNIQUE_ITEMS; + } + } + return ret; +} + + +// only check one of them: +// additionalItems +// uniqueItems +// minItems/ maxItems +int ObJsonSchemaValidator::check_array_type(ObIJsonBase *json_doc, ObIJsonBase *schema, bool& is_valid) +{ + INIT_SUCC(ret); + is_valid = true; + ObIJsonBase* value = nullptr; + ObString key_word; + ObJsonSchemaType valid_type; + if (OB_FAIL(ObJsonSchemaUtils::get_single_key_value(schema, key_word, value))) { + LOG_WARN("fail to get key value.", K(key_word), K(ret)); + } else { + valid_type.flags_ = 0; + switch (key_word.length()) { + case JS_TYPE_LEN: { + valid_type.array_ = 1; + if (OB_FAIL(check_public_key_words(key_word[0], valid_type, json_doc, value, is_valid))) { + LOG_WARN("fail to check key value.", K(key_word), K(ret)); + } + break; + } + case JS_STRMAX_LEN: { + int res = 0; + if (key_word[1] == 'a') { // maxItems + int data_size = json_doc->element_count(); + int schema_size = value->get_int(); + is_valid = (data_size <= data_size); + } else if (key_word[1] == 'i') { + int data_size = json_doc->element_count(); + int schema_size = value->get_int(); + is_valid = (data_size >= data_size); + } + break; + } + case JS_ADD_ITEMS_LEN: { + int data_size = json_doc->element_count(); + int schema_size = value->get_int(); + is_valid = (data_size <= data_size); + break; + } + case JS_UNIQUE_ITEMS_LEN: { + if (OB_FAIL(check_unique_items(json_doc, value, is_valid))) { + LOG_WARN("fail to check dep required.", K(ret)); + } + break; + } + default: { + // not key_words for number type, its normal, ignore + break; + } + } + } + // no other key words need to check + return ret; +} + +bool ObJsonSchemaValidator::check_type(const ObJsonSchemaType& real_type, ObIJsonBase *schema_value) +{ + bool ret_bool = false; + ObJsonSchemaType defined_type; + defined_type.flags_ = schema_value->get_uint(); + if ((defined_type.flags_ & real_type.flags_) != 0) { + ret_bool = true; + } + return ret_bool; +} + +bool ObJsonSchemaValidator::check_multiple_of(ObIJsonBase *json_doc, ObIJsonBase *schema) +{ + bool ret_bool = true; + double json_val = 0.0; + double multi_val = 0.0; + int ret = OB_SUCCESS; + double ans; + if (OB_FAIL(ObJsonSchemaUtils::get_json_number(json_doc, json_val))) { + ret_bool = false; + } else if (OB_FAIL(ObJsonSchemaUtils::get_json_number(schema, multi_val))) { + ret_bool = false; + } else { + ret_bool = ObJsonSchemaUtils::check_multiple_of(json_val, multi_val); + } + return ret_bool; +} + +bool ObJsonSchemaUtils::check_multiple_of(const double& json_val, const double& multi_val) +{ + bool ret_bool = true; + double abs_json = abs(json_val); + double abs_multi = abs(multi_val); + double mod_ans = floor(abs_json / abs_multi); + double res = abs_json - mod_ans * abs_multi; + if (res > 0.0) { + ret_bool = false; + } + return ret_bool; +} + +int ObJsonSchemaUtils::set_valid_number_type_by_mode(ObIJsonBase *json_doc, ObJsonSchemaType& valid_type) +{ + INIT_SUCC(ret); + switch (json_doc->json_type()) { + case ObJsonNodeType::J_INT: + case ObJsonNodeType::J_UINT: + case ObJsonNodeType::J_OINT: { + // both oracle mode and mysql mode will be valid for integer and number type + valid_type.integer_ = 1; + valid_type.number_ = 1; + } + case ObJsonNodeType::J_DECIMAL: + case ObJsonNodeType::J_DOUBLE: + case ObJsonNodeType::J_OFLOAT: + case ObJsonNodeType::J_ODOUBLE: + case ObJsonNodeType::J_ODECIMAL: + case ObJsonNodeType::J_OLONG: { + if (lib::is_mysql_mode()) { + // in mysql mode, only check nodetype + // all double is not integer, which does not meet the standards of json schema + valid_type.number_ = 1; + } else { + // in oracle mode and json schema standard, will check the value of double + // for example, 1.0 is integer, but 1.1 is not integer + double json_val = 0.0; + if (OB_FAIL(ObJsonSchemaUtils::get_json_number(json_doc, json_val))) { + } else if (fmod(json_val, 1.0) == 0) { + valid_type.integer_ = 1; + valid_type.number_ = 1; + } else { + valid_type.number_ = 1; + } + } + break; + } + default: { // not number, set nothing + break; + } + } + return ret; +} + +bool ObJsonSchemaValidator::check_pattern_keywords(ObIJsonBase *json_doc, ObIJsonBase *schema) +{ + bool ret_bool = true; + ObString text(json_doc->get_data_length(), json_doc->get_data()); + ObString pattern(schema->get_data_length(), schema->get_data()); + ObJsonSchemaUtils::if_regex_match(text, pattern, str_buf_, ret_bool); + return ret_bool; +} + +int ObJsonSchemaValidator::check_ref(ObIJsonBase *json_doc, ObIJsonBase *schema_value, bool& is_valid) +{ + INIT_SUCC(ret); + if (schema_value->json_type() == ObJsonNodeType::J_STRING) { + ObString ref_str = ObString(schema_value->get_data_length(), schema_value->get_data()); + if (OB_FAIL(check_single_ref(json_doc, ref_str, is_valid))) { + LOG_WARN("fail to check ref.", K(ret)); + } + } else if (schema_value->json_type() == ObJsonNodeType::J_ARRAY) { + int size = schema_value->element_count(); + for (int i = 0; i < size && OB_SUCC(ret) && is_valid; ++i) { + ObIJsonBase* tmp_array = nullptr; + if (OB_FAIL(schema_value->get_array_element(i, tmp_array))) { + LOG_WARN("fail to get array value.", K(i), K(size), K(ret)); + } else if (OB_ISNULL(tmp_array)) { + ret = OB_BAD_NULL_ERROR; + LOG_WARN("shouldn't be null.", K(ret), K(i)); + } else if (tmp_array->json_type() == ObJsonNodeType::J_STRING) { + ObString ref_str = ObString(tmp_array->get_data_length(), tmp_array->get_data()); + if (OB_FAIL(check_single_ref(json_doc, ref_str, is_valid))) { + LOG_WARN("fail to check ref.", K(i), K(size), K(ret)); + } + } + } + } + return ret; +} + +int ObJsonSchemaValidator::check_single_ref(ObIJsonBase *json_doc, const ObString& ref_key, bool& is_valid) +{ + INIT_SUCC(ret); + ObIJsonBase * ref = nullptr; + ObIJsonBase * ref_value = nullptr; + if (ref_key.compare(ObJsonSchemaItem::ROOT) == 0) { + if (json_pointer_.count() == 1) { // means check root, then current ans is final ans, return true + ref_value = nullptr; + is_valid = true; + } else { + ref_value = schema_map_; + } + } else if (OB_FAIL(schema_map_->get_array_element(1, ref))) { + LOG_WARN("fail to get ref.", K(ret)); + } else if (ref->json_type() != ObJsonNodeType::J_OBJECT) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("should be OBJECT.", K(ref->json_type()), K(ret)); + } else if (OB_FAIL(ref->get_object_value(ref_key, ref_value)) || OB_ISNULL(ref_value)) { + // didn't found, invalid, but not raise error + ret = OB_SUCCESS; + } else if (ref_value->json_type() != ObJsonNodeType::J_ARRAY) { + if (ref_value->json_type() == ObJsonNodeType::J_INT) { + // typeless_, means check self now + if (json_pointer_.count() == 1) { // means check root, then current ans is final ans, return true + ref_value = nullptr; + is_valid = true; + } else { + ref_value = schema_map_; + } + } else { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("should be OBJECT.", K(ref_value->json_type()), K(ret)); + } + } + if (OB_SUCC(ret) && OB_NOT_NULL(ref_value)) { + ObJsonSchemaValidator ref_validator(allocator_, ref_value); + if (OB_FAIL(ref_validator.schema_validator(json_doc, is_valid))) { + LOG_WARN("fail to check.", K(ret)); + } + } + return ret; +} + +// for enum, schema value must be array, checked in parsing schema tree +int ObJsonSchemaValidator::check_enum(ObIJsonBase *json_doc, ObIJsonBase *schema_value, bool& is_valid) +{ + INIT_SUCC(ret); + is_valid = false; + int size = schema_value->element_count(); + if (size == 0) { + is_valid = true; + } else { + for (int i = 0; i < size && OB_SUCC(ret) && !is_valid; ++i) { + ObIJsonBase* node = nullptr; + int res = -1; + if (OB_FAIL(schema_value->get_array_element(i, node))) { + LOG_WARN("fail to get array.", K(ret), K(i)); + } else if (OB_ISNULL(node)) { + ret = OB_BAD_NULL_ERROR; + LOG_WARN("shouldn't be null.", K(ret), K(i)); + } else if (OB_FAIL(json_doc->compare(*node, res))) { + LOG_WARN("fail to compare.", K(ret), K(i)); + } else { + is_valid = (res == 0); + } + } + } + return ret; +} + +int ObJsonSchemaValidator::check_required(ObIJsonBase *json_doc, ObIJsonBase *schema_value, bool& is_valid) +{ + INIT_SUCC(ret); + is_valid = true; + if (schema_value->json_type() != ObJsonNodeType::J_ARRAY) { + ret = OB_ERR_WRONG_VALUE; + LOG_WARN("must be object type.", K(ret), K(schema_value->json_type())); + } else { + int size = schema_value->element_count(); + for (int i = 0; i < size && OB_SUCC(ret) && is_valid; ++i) { + ObIJsonBase * required_key = nullptr; + if (OB_FAIL(schema_value->get_array_element(i, required_key))) { + LOG_WARN("fail to get required key.", K(ret), K(i)); + } else if (OB_ISNULL(required_key)) { + ret = OB_BAD_NULL_ERROR; + LOG_WARN("shouldn't be null.", K(ret), K(i)); + } else if (required_key->json_type() == ObJsonNodeType::J_STRING) { + ObIJsonBase *value = nullptr; + ObString key(required_key->get_data_length(), required_key->get_data()); + if (OB_FAIL(json_doc->get_object_value(key, value)) || OB_ISNULL(value)) { + // didn't found, invalid, but not raise error + ret = OB_SUCCESS; + is_valid = false; + } + } // if not string, don't check key + } + } + return ret; +} + +int ObJsonSchemaValidator::check_dep_required(ObIJsonBase *json_doc, ObIJsonBase *schema_value, bool& is_valid) +{ + INIT_SUCC(ret); + is_valid = true; + if (schema_value->json_type() != ObJsonNodeType::J_OBJECT) { + ret = OB_ERR_WRONG_VALUE; + LOG_WARN("must be object type.", K(ret), K(schema_value->json_type())); + } else { + int size = schema_value->element_count(); + for (int i = 0; i < size && OB_SUCC(ret) && is_valid; ++i) { + ObIJsonBase * dep_required_value = nullptr; + ObIJsonBase * tmp_value = nullptr; + ObString dep_key; + if (OB_FAIL(schema_value->get_object_value(i, dep_key, dep_required_value))) { + LOG_WARN("fail to get required key.", K(ret), K(i)); + } else if (OB_ISNULL(dep_required_value)) { + ret = OB_BAD_NULL_ERROR; + LOG_WARN("shouldn't be null.", K(ret), K(i)); + } else if (OB_FAIL(json_doc->get_object_value(dep_key, tmp_value))) { + // didn't found, do not need to check dependencies + ret = OB_SUCCESS; + } else if (dep_required_value->json_type() != ObJsonNodeType::J_ARRAY) { + } else if (OB_FAIL(check_required(json_doc, dep_required_value, is_valid))) { + LOG_WARN("fail to check dep required.", K(ret), K(i)); + } + } + } + return ret; +} + +int ObJsonSchemaValidator::check_add_pro_in_schema(ObIJsonBase *json_doc, ObIJsonBase *schema, bool& is_valid, ObString defined_key) +{ + INIT_SUCC(ret); + // collect all key, add and of conflict, if not conflict, check pattern + is_valid = true; + ObSortedVector properties_vec; + ObStringCmp cmp; + ObStringUnique unique; + int array_size = schema->element_count(); + int size = json_doc->element_count(); + if (array_size % ObJsonSchemaTree::ADDITIONAL_PRO_ARRAY_COUNT != 0) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("wrong number.", K(ret)); + } else { + for (int i = 0; i < array_size && OB_SUCC(ret); i += 2) { + ObIJsonBase *tmp_value = nullptr; + if (OB_FAIL(schema->get_array_element(i, tmp_value))) { + LOG_WARN("fail to get.", K(i), K(array_size), K(ret)); + } else if (OB_ISNULL(tmp_value) || tmp_value->json_type() != ObJsonNodeType::J_ARRAY) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("wrong number.", K(i), K(array_size), K(ret)); + } else { + int pro_num = tmp_value->element_count(); + for (int i = 0; i < pro_num && OB_SUCC(ret); ++i) { + ObIJsonBase *tmp_str = nullptr; + if (OB_FAIL(tmp_value->get_array_element(i, tmp_str))) { + LOG_WARN("fail to get.", K(i), K(pro_num), K(ret)); + } else if (OB_ISNULL(tmp_str) || tmp_str->json_type() != ObJsonNodeType::J_STRING) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("wrong number.", K(i), K(pro_num), K(ret)); + } else { + ObString str = ObString(tmp_str->get_data_length(), tmp_str->get_data()); + ObSortedVector::iterator pos = properties_vec.end(); + if (OB_FAIL(properties_vec.insert_unique(str, pos, cmp, unique))) { + if (ret == OB_CONFLICT_VALUE) { + ret = OB_SUCCESS; // confilict means found duplicated nodes, it is not an error. + } + } + } + } // add all properties key + } + } // check all properties vector + + if (OB_FAIL(ret)) { + } else if (defined_key.empty()) { + for (int i = 0; i < size && OB_SUCC(ret) && is_valid; ++i) { + ObIJsonBase * tmp_value = nullptr; + ObString doc_key; + bool found = false; + ObSortedVector::iterator pos = properties_vec.end(); + if (OB_FAIL(json_doc->get_object_value(i, doc_key, tmp_value))) { + LOG_WARN("fail to get key.", K(ret), K(i)); + } else if (OB_FAIL(properties_vec.insert_unique(doc_key, pos, cmp, unique))) { + if (ret == OB_CONFLICT_VALUE) { + ret = OB_SUCCESS; // confilict means found duplicated nodes, it is not an error. + } + } else if (OB_FAIL(check_pattern_key_in_add_pro(doc_key, schema, found))) { + LOG_WARN("fail to check pattern key.", K(ret), K(i)); + } else if (!found) { + is_valid = false; + } + } + } else { + ObSortedVector::iterator pos = properties_vec.end(); + if (OB_FAIL(properties_vec.insert_unique(defined_key, pos, cmp, unique))) { + if (ret == OB_CONFLICT_VALUE) { + ret = OB_SUCCESS; // confilict means found duplicated nodes, it is not an error. + is_valid = true; + } + } else if (OB_FAIL(check_pattern_key_in_add_pro(defined_key, schema, is_valid))) { + LOG_WARN("fail to check pattern key.", K(ret)); + } + } + } + return ret; +} + +int ObJsonSchemaValidator::check_pattern_key_in_add_pro(const ObString& key, ObIJsonBase *schema, bool& found) +{ + INIT_SUCC(ret); + int array_size = schema->element_count(); + found = false; + for (int i = 1; i < array_size && OB_SUCC(ret) && !found; i += 2) { + ObIJsonBase *tmp_value = nullptr; + if (OB_FAIL(schema->get_array_element(i, tmp_value))) { + LOG_WARN("fail to get.", K(i), K(array_size), K(ret)); + } else if (OB_ISNULL(tmp_value) || tmp_value->json_type() != ObJsonNodeType::J_ARRAY) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("wrong number.", K(i), K(array_size), K(ret)); + } else { + int pro_num = tmp_value->element_count(); + for (int i = 0; i < pro_num && OB_SUCC(ret) && !found; ++i) { + ObIJsonBase *tmp_str = nullptr; + if (OB_FAIL(tmp_value->get_array_element(i, tmp_str))) { + LOG_WARN("fail to get.", K(i), K(pro_num), K(ret)); + } else if (OB_ISNULL(tmp_str) || tmp_str->json_type() != ObJsonNodeType::J_STRING) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("wrong number.", K(i), K(pro_num), K(ret)); + } else { + ObString reg_str = ObString(tmp_str->get_data_length(), tmp_str->get_data()); + bool regex_ans = false; + if (OB_FAIL(ObJsonSchemaUtils::if_regex_match(key, reg_str, str_buf_, regex_ans))) { + } else { + found = regex_ans; + } + } + } // add all pattern properties key + } + } // check all pattern properties vector + return ret; +} + +int ObJsonSchemaValidator::check_unique_items(ObIJsonBase *json_doc, ObIJsonBase *schema, bool& is_valid) +{ + INIT_SUCC(ret); + is_valid = true; + ObSortedVector dup; + ObJsonContentCmp cmp; + ObJsonContentUnique unique; + int array_size = json_doc->element_count(); + for (int i = 0; i < array_size && OB_SUCC(ret); ++i) { + ObSortedVector::iterator pos = dup.end(); + ObIJsonBase *tmp_value = nullptr; + if (OB_FAIL(json_doc->get_array_element(i, tmp_value))) { + LOG_WARN("fail to get.", K(i), K(array_size), K(ret)); + } else if (OB_ISNULL(tmp_value)) { + ret = OB_BAD_NULL_ERROR; + LOG_WARN("wrong number.", K(i), K(array_size), K(ret)); + } else if (OB_FAIL(dup.insert_unique(tmp_value, pos, cmp, unique))) { + if (ret == OB_CONFLICT_VALUE) { + ret = OB_SUCCESS; + is_valid = false; + } + } + } + return ret; +} + +ObIJsonBase* ObJsonSchemaCache::schema_at(size_t idx) +{ + ObIJsonBase* ptr = nullptr; + if (idx < schema_arr_ptr_.size()) { + ptr = schema_arr_ptr_[idx]; + } + return ptr; +} + +void ObJsonSchemaCache::set_allocator(common::ObIAllocator *allocator) +{ + if (allocator != nullptr && size() == 0) { + allocator_ = allocator; + } +} + +bool ObJsonSchemaCache::is_match(ObString& in_str, size_t idx) +{ + bool result = false; + if (idx < size()) { + ObString schema_str = schema_str_[idx]; + if (!schema_str.empty()) { + if (in_str.length() == schema_str.length()) { + result = (in_str.compare(schema_str) == 0); + } else { + result = false; + } + } + } + return result; +} + +int ObJsonSchemaCache::find_and_add_cache(ObIJsonBase*& out_schema, ObString& in_str, int arg_idx, const ObJsonInType& in_type) +{ + INIT_SUCC(ret); + if (!is_match(in_str, arg_idx)) { + ObIJsonBase* in_json = nullptr; + // whether it is Oracle or MySQL, only lowercase true/false is considered a Boolean value + // so, use strict mode + uint32_t parse_flag = ObJsonParser::JSN_STRICT_FLAG; + parse_flag |= ObJsonParser::JSN_SCHEMA_FLAG; + + if (OB_FAIL(ObJsonBaseFactory::get_json_base(allocator_, in_str, in_type, in_type, + in_json, parse_flag))) { + + LOG_WARN("fail to get json base", K(ret), K(in_type)); + } else { + ObJsonSchemaTree json_schema(allocator_); + if (OB_FAIL(json_schema.build_schema_tree(in_json))) { + LOG_WARN("invalid json schema", K(ret)); + } else if (OB_ISNULL(out_schema = json_schema.get_schema_map())) { + ret = OB_BAD_NULL_ERROR; + LOG_WARN("should not be null", K(ret)); + } else { + ret = set_schema(out_schema, in_str, arg_idx, arg_idx); + } + } + } else { + out_schema = schema_at(arg_idx); + } + return ret; +} + +int ObJsonSchemaCache::fill_empty(size_t reserve_size) +{ + INIT_SUCC(ret); + if (reserve_size > schema_arr_ptr_.size()) { + // fill element in vector + if (OB_FAIL(schema_arr_ptr_.reserve(reserve_size))) { + LOG_WARN("fail to reserve for path arr.", K(ret), K(reserve_size)); + } else if (OB_FAIL(stat_arr_.reserve(reserve_size))) { + LOG_WARN("fail to reserve for stat arr.", K(ret), K(reserve_size)); + } else if (OB_FAIL(schema_str_.reserve(reserve_size))) { + LOG_WARN("fail to reserve for stat arr.", K(ret), K(reserve_size)); + } else if (schema_arr_ptr_.size() != stat_arr_.size() || schema_str_.size() != stat_arr_.size()) { + LOG_WARN("Length is not equals.", K(ret), K(reserve_size)); + } + int size = schema_arr_ptr_.size(); + for (size_t cur = size; OB_SUCC(ret) && cur < reserve_size; ++cur) { + if (OB_FAIL(schema_arr_ptr_.push_back(nullptr))) { + LOG_WARN("fail to push NUll to path arr", K(cur), K(reserve_size), K(ret)); + } else if (OB_FAIL(stat_arr_.push_back(ObSchemaCacheStat()))) { + LOG_WARN("fail to push stat to stat arr", K(cur), K(reserve_size), K(ret)); + } else if (OB_FAIL(schema_str_.push_back(ObString::make_empty_string()))) { + LOG_WARN("fail to push stat to stat arr", K(cur), K(reserve_size), K(ret)); + } + } + } + return ret; +} + +int ObJsonSchemaCache::set_schema(ObIJsonBase* j_schema, const ObString& in_str, int arg_idx, int index) +{ + INIT_SUCC(ret); + if (OB_FAIL(fill_empty(arg_idx + 1))) { + LOG_WARN("fail to fill empty.", K(ret), K(arg_idx)); + } else if (index >= schema_arr_ptr_.size()) { + ret = OB_ERROR_OUT_OF_RANGE; + LOG_WARN("index out of range.", K(ret), K(index), K(schema_arr_ptr_.size())); + } else { + schema_arr_ptr_[index] = j_schema; + stat_arr_[index] = ObSchemaCacheStat(INITIALIZED, arg_idx); + schema_str_[index] = in_str; + } + return ret; +} + +int ObJsonSchemaUtils::is_all_children_subschema(ObJsonNode* array_of_subschema) +{ + INIT_SUCC(ret); + if (OB_NOT_NULL(array_of_subschema) && array_of_subschema->json_type() == ObJsonNodeType::J_ARRAY) { + ObJsonArray* array = static_cast(array_of_subschema); + int size = array->element_count(); + for (int i = 0; i < size && OB_SUCC(ret); ++i) { + ObJsonNode* tmp = (*array)[i]; + if (OB_ISNULL(tmp)){ + ret = OB_BAD_NULL_ERROR; + LOG_WARN("shouldn't be null", K(ret), K(i)); + } else if (tmp->json_type() != ObJsonNodeType::J_OBJECT) { + ret = OB_ERR_TYPE_OF_JSON_SCHEMA; + LOG_WARN("json schema must be object", K(ret), K(i), K(tmp->json_type())); + } + } + } + return ret; +} + +int ObJsonSchemaUtils::check_if_composition_legal(ObJsonObject* origin_schema, ObJsonSubSchemaKeywords& key_words) +{ + INIT_SUCC(ret); + if (OB_NOT_NULL(origin_schema)) { + bool is_legal = false; + if (OB_FAIL(check_composition_by_name(ObJsonSchemaItem::ALLOF, origin_schema, is_legal))) { + LOG_WARN("fail to check all of", K(ret)); + } else if (is_legal && OB_FALSE_IT(key_words.all_of_ = 1)) { + } else if (OB_FAIL(check_composition_by_name(ObJsonSchemaItem::ANYOF, origin_schema, is_legal))) { + LOG_WARN("fail to check all of", K(ret)); + } else if (is_legal && OB_FALSE_IT(key_words.any_of_ = 1)) { + } else if (OB_FAIL(check_composition_by_name(ObJsonSchemaItem::ONEOF, origin_schema, is_legal))) { + LOG_WARN("fail to check all of", K(ret)); + } else if (is_legal && OB_FALSE_IT(key_words.one_of_ = 1)) { + } else if (OB_FAIL(check_composition_by_name(ObJsonSchemaItem::NOT, origin_schema, is_legal))) { + LOG_WARN("fail to check all of", K(ret)); + } else if (is_legal && OB_FALSE_IT(key_words.not_ = 1)) { + } + } + return ret; +} + +int ObJsonSchemaUtils::check_composition_by_name(const ObString& key_word, ObJsonObject* origin_schema, bool& is_legal) +{ + INIT_SUCC(ret); + is_legal = false; + ObJsonNode* node = origin_schema->get_value(key_word); + if (OB_ISNULL(node)) { + // didn't define, its normal + } else if (key_word.compare(ObJsonSchemaItem::NOT) == 0) { + if (node->json_type() == ObJsonNodeType::J_OBJECT) { + is_legal = true; + } + } else if (node->json_type() == ObJsonNodeType::J_ARRAY && node->element_count() > 0) { + if (OB_FAIL(ObJsonSchemaUtils::is_all_children_subschema(node))) { + // composition, each element has its own schema, check if all schema element + // if not, raise error + LOG_WARN("illegal subschema in item array", K(ret), K(key_word)); + } else { + is_legal = true; + } + } // not array, ignore in mysql, raise error in oracle + return ret; +} + +int ObJsonSchemaUtils::json_doc_move_to_key(const ObString& key_word, ObJsonObject*& json_schema) +{ + INIT_SUCC(ret); + ObJsonNode* tmp_node = nullptr; + if (OB_ISNULL(json_schema)) { + ret = OB_BAD_NULL_ERROR; + LOG_WARN("shouldn't be null.", K(ret)); + } else if (OB_ISNULL(tmp_node = json_schema->get_value(key_word))) { + ret = OB_SEARCH_NOT_FOUND; + LOG_WARN("didn't find value of key.", K(ret), K(key_word)); + } else if (tmp_node->json_type() != ObJsonNodeType::J_OBJECT) { + ret = OB_ERR_WRONG_VALUE; + LOG_WARN("must be object type.", K(ret), K(key_word)); + } else { + json_schema = static_cast(tmp_node); + } + return ret; +} + +int ObJsonSchemaUtils::json_doc_move_to_array(const ObString& key_word, ObJsonObject* json_schema, ObJsonArray*& array_schema) +{ + INIT_SUCC(ret); + ObJsonNode* tmp_node = nullptr; + if (OB_ISNULL(json_schema)) { + ret = OB_BAD_NULL_ERROR; + LOG_WARN("shouldn't be null.", K(ret)); + } else if (OB_ISNULL(tmp_node = json_schema->get_value(key_word))) { + ret = OB_SEARCH_NOT_FOUND; + LOG_WARN("didn't find value of key.", K(ret), K(key_word)); + } else if (tmp_node->json_type() != ObJsonNodeType::J_ARRAY) { + ret = OB_ERR_WRONG_VALUE; + LOG_WARN("must be object type.", K(ret), K(key_word)); + } else { + array_schema = static_cast(tmp_node); + } + return ret; +} + +int ObJsonSchemaUtils::record_schema_array(ObStack& stk, ObIArray& array) +{ + INIT_SUCC(ret); + int size = stk.size(); + // copy + for (int i = 0; i < size && OB_SUCC(ret); ++i) { + ObJsonObject* cur_schema = stk.at(i); + ret = array.push_back(cur_schema); + } + return ret; +} + +int ObJsonSchemaUtils::get_index_str(const int& idx, ObStringBuffer& buf) +{ + INIT_SUCC(ret); + char res_ptr[OB_MAX_DECIMAL_PRECISION] = {0}; + char* ptr = nullptr; + if (OB_ISNULL(ptr = ObCharset::lltostr(idx, res_ptr, 10, 1))) { + ret = OB_BAD_NULL_ERROR; + LOG_WARN("fail to transform the index(lltostr)", K(ret), K(index)); + } else { + buf.append(res_ptr, static_cast(ptr - res_ptr)); + } + return ret; +} + +int ObJsonSchemaUtils::collect_key(ObJsonNode* obj, ObIAllocator *allocator, ObJsonArray* array, ObJsonBuffer& buf, bool pattern_key) { + INIT_SUCC(ret); + if (obj->json_type() != ObJsonNodeType::J_OBJECT) { + ret = OB_ERR_WRONG_VALUE; + LOG_WARN("must be object type.", K(obj->json_type()), K(ret)); + } else { + ObJsonObject* object_node = static_cast(obj); + int size = object_node->element_count(); + for (int i = 0; i < size && OB_SUCC(ret); ++i) { + ObString key; + ObJsonString* str_node = nullptr; + bool valid_pattern = false; + if (OB_FAIL(object_node->get_key_by_idx(i, key))) { + LOG_WARN("get key failed.", K(i), K(size), K(ret)); + } else if (pattern_key + && (OB_FAIL((ObJsonSchemaUtils::is_valid_pattern(key, buf, valid_pattern))) || !valid_pattern)) { + } else if (OB_ISNULL(str_node = OB_NEWx(ObJsonString, allocator, key))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("alloc failed.", K(i), K(size), K(ret)); + } else if (OB_FAIL(array->append(str_node))) { + LOG_WARN("append failed.", K(i), K(size), K(ret)); + } + } + } + return ret; +} + +int ObJsonSchemaUtils::get_specific_type_of_child(const ObString& key, ObJsonNodeType expect_type, ObIArray &src, ObIArray &res) +{ + INIT_SUCC(ret); + int src_size = src.count(); + for (int i = 0; i < src_size && OB_SUCC(ret); ++i) { + ObIJsonBase* tmp_src = src.at(i); + ObIJsonBase* tmp_res = nullptr; + if (OB_ISNULL(tmp_src)) { + ret = OB_BAD_NULL_ERROR; + LOG_WARN("null value", K(ret)); + } else if (OB_FAIL(tmp_src->get_object_value(key, tmp_res))) { + if (ret == OB_SEARCH_NOT_FOUND) { + ret = OB_SUCCESS; // didn't found, its normal + } + } else if (OB_ISNULL(tmp_res)) { + ret = OB_BAD_NULL_ERROR; + LOG_WARN("null value", K(ret)); + } else if (tmp_res->json_type() != expect_type) { + ret = OB_ERR_WRONG_VALUE; + LOG_WARN("must be object type.", K(ret), K(tmp_src->json_type()), K(expect_type)); + } else if (OB_FAIL(res.push_back(tmp_res))) { + LOG_WARN("fail to push key-value.", K(ret)); + } + } + if (res.count() > 0) { + } else { + ret = OB_ERR_JSON_KEY_NOT_FOUND; + } + return ret; +} + +int ObJsonSchemaUtils::get_all_composition_def(ObIArray &src, ObIArray &res) +{ + INIT_SUCC(ret); + int src_size = src.count(); + for (int i = 0; i < src_size && OB_SUCC(ret); ++i) { + ObIJsonBase* tmp_src = src.at(i); + ObIJsonBase* tmp_comp = nullptr; + if (OB_ISNULL(tmp_src)) { + ret = OB_BAD_NULL_ERROR; + LOG_WARN("null value", K(ret)); + } else if (OB_FAIL(tmp_src->get_object_value(ObJsonSchemaItem::COMPOSITION, tmp_comp))) { + if (ret == OB_SEARCH_NOT_FOUND) { + ret = OB_SUCCESS; // didn't found, its normal + } + } else if (OB_ISNULL(tmp_comp)) { + ret = OB_BAD_NULL_ERROR; + LOG_WARN("null value", K(ret)); + } else if (tmp_comp->json_type() != ObJsonNodeType::J_ARRAY) { + ret = OB_ERR_WRONG_VALUE; + LOG_WARN("must be object type.", K(ret), K(tmp_src->json_type())); + } else { + int comp_size = tmp_comp->element_count(); + for (int i = 0; i < comp_size && OB_SUCC(ret); ++i) { + ObIJsonBase* tmp_def = nullptr; + if (OB_FAIL(tmp_comp->get_array_element(i, tmp_def))) { + LOG_WARN("fail to get", K(i), K(comp_size), K(ret)); + } else if (OB_ISNULL(tmp_def)) { + ret = OB_BAD_NULL_ERROR; + LOG_WARN("null value", K(i), K(comp_size), K(ret)); + } else if (tmp_def->json_type() != ObJsonNodeType::J_INT) { + ret = OB_ERR_WRONG_VALUE; + LOG_WARN("must be object type.", K(ret), K(tmp_src->json_type())); + } else if (OB_FAIL(res.push_back(tmp_def))) { + LOG_WARN("fail to push", K(i), K(comp_size), K(ret)); + } + } + } + } + if (OB_FAIL(ret)) { + } else if (res.count() > 0) { + } else { + ret = OB_ERR_JSON_KEY_NOT_FOUND; + } + return ret; +} + +int ObJsonSchemaUtils::get_json_number(ObIJsonBase* json_doc, double& res) +{ + INIT_SUCC(ret); + ObJsonNodeType json_type = json_doc->json_type(); + if (json_doc->is_json_number(json_type)) { + ret = json_doc->to_double(res); + } else { + ret = OB_ERR_UNEXPECTED; + } + return ret; +} + +int ObJsonSchemaUtils::get_single_key_value(ObIJsonBase *single_schema, ObString& key_words, ObIJsonBase *&value) +{ + INIT_SUCC(ret); + if (OB_ISNULL(single_schema)) { + ret = OB_BAD_NULL_ERROR; + LOG_WARN("null value", K(ret)); + } else if (single_schema->element_count() != 1) { + ret = OB_ERR_WRONG_VALUE; + LOG_WARN("must be object type.", K(ret), K(single_schema->element_count())); + } else if (OB_FAIL(single_schema->get_object_value(0, key_words, value))) { + LOG_WARN("fail to get keywords and value.", K(key_words), K(ret)); + } else if (OB_ISNULL(value) || key_words.empty()) { + ret = OB_BAD_NULL_ERROR; + LOG_WARN("shouldn't be null", KPC(value), K(ret)); + } + return ret; +} + +// ObExprRegexContext need session_info, so use std::regex +// but illegal regex text will thrown exception "regex_error" +int ObJsonSchemaUtils::is_valid_pattern(const ObString& regex_text, ObJsonBuffer& buf, bool& ans) +{ + INIT_SUCC(ret); + ans = false; + int len = regex_text.length(); + buf.reuse(); + if (OB_FAIL(buf.append(regex_text)) || OB_FAIL(buf.append("\0"))) { + } else { + char* start = buf.ptr(); + try { + std::regex pattern(start, len); + ans = true; + } catch (const std::regex_error& e) { + ans = false; + } + } + return ret; +} + +int ObJsonSchemaUtils::if_regex_match(const ObString& text, const ObString& regex_text, ObJsonBuffer& buf, bool& ans) +{ + INIT_SUCC(ret); + ans = false; + buf.reuse(); + int r_len = regex_text.length(); + int t_len = text.length(); + if (OB_FAIL(buf.append(regex_text)) || OB_FAIL(buf.append("\n"))) { + } else { + if (OB_FAIL(buf.append(text)) || OB_FAIL(buf.append("\0"))) { + } else { + char* r_start = buf.ptr(); + char* t_start = buf.ptr() + r_len + 1; + std::regex pattern(r_start, r_len); + std::smatch results; + ans = std::regex_search(t_start, pattern); + } + } + return ret; +} + +ObString ObJsonSchemaUtils::get_pointer_key(ObString& ref_str, bool& end_while) +{ + ObString key; + char* start = ref_str.ptr(); + int key_len = 0; + int total_len = ref_str.length(); + bool get_key = false; + for (int i = 0; i < total_len && !get_key; ++i) { + if (ref_str[i] != '/') { + ++key_len; + } else { + get_key = true; + } + } + if (get_key) { + if (key_len + 1 < total_len) { + key = ObString(key_len, start); + ref_str = ObString(total_len - key_len - 1, start + key_len + 1); + } else { + end_while = true; + } + } else { + key = ref_str; + end_while = true; // last key + } + return key; +} + +bool ObJsonSchemaUtils::is_legal_json_pointer_name(const ObString& name) +{ + bool ret_bool = true; + if (name.length() == 1 && name.compare(ObJsonSchemaItem::ROOT) == 0) { + } else { + int len = name.length(); + for (int i = 0; i < len && ret_bool; ++i) { + if (i == 0 && !is_legal_json_pointer_start(name[i])) { + ret_bool = false; + } else if (!is_legal_json_pointer_char(name[i])) { + ret_bool = false; + } + } + } + return ret_bool; +} + +bool ObJsonSchemaUtils::is_legal_json_pointer_start(const char& ch) +{ + bool ret_bool = true; + if (!ObJsonPathUtil::letter_or_not(ch) && ch != '-' && ch != '_') { + ret_bool = false; + } + return ret_bool; +} + +bool ObJsonSchemaUtils::is_legal_json_pointer_char(const char& ch) +{ + bool ret_bool = true; + if (!is_legal_json_pointer_start(ch) && !ObJsonPathUtil::is_digit(ch)) { + ret_bool = false; + } + return ret_bool; +} + +int ObJsonSchemaUtils::need_check_recursive(ObIArray &schema_vec, bool& need_recursive, bool is_array_keywords) +{ + INIT_SUCC(ret); + need_recursive = false; + int size = schema_vec.count(); + for (int i = 0; i < size && !need_recursive && OB_SUCC(ret); ++i) { + ObIJsonBase* tmp_schema = schema_vec.at(i); + ObIJsonBase* tmp_value = nullptr; + if (is_array_keywords) { + if (OB_FAIL(tmp_schema->get_object_value(ObJsonSchemaItem::ITEMS, tmp_value)) || OB_ISNULL(tmp_value)) { + if (ret == OB_SEARCH_NOT_FOUND || OB_ISNULL(tmp_value)) { + ret = OB_SUCCESS; + tmp_value = nullptr; + // didn't found items, check tuple items + if (OB_FAIL(tmp_schema->get_object_value(ObJsonSchemaItem::TUPLE_ITEMS, tmp_value)) || OB_ISNULL(tmp_value)) { + if (ret == OB_SEARCH_NOT_FOUND || OB_ISNULL(tmp_value)) { + ret = OB_SUCCESS; + tmp_value = nullptr; + // didn't found items and tuple items, check additional items + if (OB_FAIL(tmp_schema->get_object_value(ObJsonSchemaItem::ADDITIONAL_ITEMS, tmp_value)) || OB_ISNULL(tmp_value)) { + if (ret == OB_SEARCH_NOT_FOUND || OB_ISNULL(tmp_value)) { + ret = OB_SUCCESS; + } + } else { + // found add items + need_recursive = true; + } + } + } else { + // found tuple items + need_recursive = true; + } + } + } else { // found items + need_recursive = true; + } + } else if (OB_FAIL(tmp_schema->get_object_value(ObJsonSchemaItem::PROPERTIES, tmp_value)) || OB_ISNULL(tmp_value)) { + if (ret == OB_SEARCH_NOT_FOUND || OB_ISNULL(tmp_value)) { + ret = OB_SUCCESS; + tmp_value = nullptr; + // didn't found properties, check pattern pro + if (OB_FAIL(tmp_schema->get_object_value(ObJsonSchemaItem::PATTERN_PRO, tmp_value)) || OB_ISNULL(tmp_value)) { + if (ret == OB_SEARCH_NOT_FOUND || OB_ISNULL(tmp_value)) { + ret = OB_SUCCESS; + tmp_value = nullptr; + // didn't found properties and pattern pro, check additional pro + if (OB_FAIL(tmp_schema->get_object_value(ObJsonSchemaItem::ADDITIONAL_PRO, tmp_value)) || OB_ISNULL(tmp_value)) { + if (ret == OB_SEARCH_NOT_FOUND || OB_ISNULL(tmp_value)) { + ret = OB_SUCCESS; + } + } else { + // found add pro + need_recursive = true; + } + } + } else { + // found pattern pro + need_recursive = true; + } + } + } else { // found properties + need_recursive = true; + } + } + return ret; +} + +} // namespace common +} // namespace oceanbase \ No newline at end of file diff --git a/deps/oblib/src/lib/json_type/ob_json_schema.h b/deps/oblib/src/lib/json_type/ob_json_schema.h new file mode 100644 index 0000000000..13d611ce4c --- /dev/null +++ b/deps/oblib/src/lib/json_type/ob_json_schema.h @@ -0,0 +1,449 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + * This file contains interface support for the json tree abstraction. + */ + +#ifndef OCEANBASE_SQL_OB_JSON_SCHEMA +#define OCEANBASE_SQL_OB_JSON_SCHEMA +#include "lib/xml/ob_multi_mode_interface.h" +#include "ob_json_base.h" +#include "ob_json_tree.h" + +namespace oceanbase { +namespace common { + +enum ObJsonSchemaAns { + JS_NOT_CHCECKED = 0, + JS_TRUE = 1, + JS_FALSE = 2 +}; + +enum ObJsonSchemaComp { + JS_COMP_DEP = 0, + JS_COMP_ALLOF = 1, + JS_COMP_ONEOF = 2, + JS_COMP_ANYOF = 3, + JS_COMP_NOT = 4, + JS_COMP_MAX = 5 +}; + +// use uint64, cause ObJsonUint is uint64 +typedef union ObJsonSchemaType { + struct { + uint64_t null_ : 1; + uint64_t boolean_ : 1; + uint64_t string_ : 1; + uint64_t number_ : 1; + uint64_t integer_ : 1; + uint64_t object_ : 1; + uint64_t array_ : 1; + uint64_t error_type_ : 1; + uint64_t reserved_ : 56; + }; + + uint64_t flags_; + int64_t to_string(char *buf, const int64_t buf_len) const + { + int64_t pos = 0; + databuff_printf(buf, buf_len, pos, "falgs_ = %ld", flags_); + return pos; + } +} ObJsonSchemaType; + +typedef union ObJsonSubSchemaKeywords { + struct { + uint16_t dep_schema_ : 1; + uint16_t all_of_ : 1; + uint16_t one_of_ : 1; + uint16_t any_of_ : 1; + uint16_t not_ : 1; + uint16_t properties_ : 1; + uint16_t pattern_pro_ : 1; + uint16_t additional_pro_ : 1; + uint16_t items_ : 1; + uint16_t tuple_items_ : 1; + uint16_t additional_items_ : 1; + uint16_t reserved_ : 5; + }; + + uint16_t flags_; + int64_t to_string(char *buf, const int64_t buf_len) const + { + int64_t pos = 0; + databuff_printf(buf, buf_len, pos, "falgs_ = %d", flags_); + return pos; + } +} ObJsonSubSchemaKeywords; + +struct ObJsonContentCmp { + bool operator()(const ObIJsonBase *a, const ObIJsonBase *b) { + bool is_smaller = false; + ObJsonNodeType a_type = a->json_type(); + ObJsonNodeType b_type = b->json_type(); + if (a_type == b_type) { + int res = 0; + a->compare(*b, res); + is_smaller = (res < 0); + } else { + is_smaller = (a->json_type() < b->json_type()); + } + return is_smaller; + } +}; + +struct ObStringCmp { + int operator()(const ObString a, const ObString b) { + return a < b; + } +}; + +struct ObStringUnique { + int operator()(const ObString a, const ObString b) { + return a == b; + } +}; + +struct ObJsonContentUnique { + bool operator()(const ObIJsonBase *a, const ObIJsonBase *b) { + bool is_eq = false; + ObJsonNodeType a_type = a->json_type(); + ObJsonNodeType b_type = b->json_type(); + if (a_type == b_type) { + int res = 0; + a->compare(*b, res); + is_eq = (res == 0); + } else { + is_eq = false; + } + return is_eq; + } +}; + +static const int64_t SCHEMA_DEFAULT_PAGE_SIZE = (1LL << 10); // 1k + +class ObJsonSchemaTree +{ +public: + explicit ObJsonSchemaTree(ObIAllocator *allocator) + : allocator_(allocator), + root_doc_(nullptr), + ref_(nullptr), + schema_map_(nullptr), + cur_schema_stk_(allocator_), + typeless_(nullptr), + str_buf_(allocator), + serial_num_(0), + json_ptr_("#") + { + } + explicit ObJsonSchemaTree(ObIAllocator *allocator, ObJsonObject *root_doc) + : allocator_(allocator), + root_doc_(root_doc), + ref_(nullptr), + schema_map_(nullptr), + cur_schema_stk_(allocator_), + typeless_(nullptr), + str_buf_(allocator), + serial_num_(0), + json_ptr_("#") + { + } + explicit ObJsonSchemaTree(ObIAllocator *allocator, ObJsonObject *root_doc, ObString json_ptr) + : allocator_(allocator), + root_doc_(root_doc), + ref_(nullptr), + schema_map_(nullptr), + cur_schema_stk_(allocator_), + typeless_(nullptr), + str_buf_(allocator), + serial_num_(0), + json_ptr_(json_ptr) + { + } + virtual ~ObJsonSchemaTree() {} + int build_schema_tree(ObIJsonBase *json_doc); + void set_root_doc(ObJsonObject *root_doc) {root_doc_ = root_doc;} + OB_INLINE ObJsonArray* get_schema_map() {return schema_map_;} +private: + int inner_build_schema_tree(ObJsonObject* origin_schema, // origin json schema,must be object + bool is_composition, // true: add schema in composition + // false: add schema in schema + ObJsonArray* comp_array = nullptr); // array of composition + int get_schema_vec(ObIArray &schema_vec_stk, bool is_composition); + bool if_have_ref(ObJsonObject* origin_schema); + int get_ref_pointer_value(const ObString ref_str, ObJsonObject*& ref_value); + int handle_ref_keywords(ObJsonObject* origin_schema, ObIArray &schema_vec_stk, + const bool& is_composition, ObJsonArray* comp_array); + int generate_schema_and_record(const ObString& key_word, ObJsonNode* value, ObIArray &schema_vec_stk, + const bool& is_composition, ObJsonArray* comp_array); + int generate_schema_info(const ObString& key_word, ObJsonNode* value, ObIArray &schema_vec_stk); + int union_schema_def(const ObString& key_word, ObJsonNode*& value, ObJsonNode* old_key_value, bool& update_old_key); + int union_type(ObJsonNode*& new_value, ObJsonNode* old_value, bool& update_old_key); + int union_array_key_words_value(ObJsonNode*& new_value, ObJsonNode* old_value, bool& update_old_key, bool get_merge = false); + int union_scalar_key_words_value(ObJsonNode*& new_value, ObJsonNode* old_value, bool& update_old_key); + int union_add_pro_value(ObJsonNode*& new_value, ObJsonNode* old_value); + int generate_comp_and_record(const ObString& key_word, ObJsonNode* value, + ObIArray &schema_vec_stk, + ObJsonArray* comp_array); + int handle_keywords_with_specific_type(const ObString& key_word, const ObJsonNodeType& expect_type, + ObJsonObject* origin_schema, ObIArray &schema_vec_stk, + const bool& is_composition, ObJsonArray* comp_array); + // for keywords:maxLength, minLength, maxProperties, minProperties, maxItems, minItems + // these keywords must be positive integer, Otherwise, ignore it and it will not take effect. + int handle_positive_int_keywords(const ObString& key_word, ObJsonObject* origin_schema, + ObIArray &schema_vec_stk, + const bool& is_composition, ObJsonArray* comp_array); + int handle_keywords_with_number_value(const ObString& key_word, ObJsonObject* origin_schema, + ObIArray &schema_vec_stk, + const bool& is_composition, ObJsonArray* comp_array, + bool must_be_positive = false); + int handle_keywords_with_subschemas(ObJsonSubSchemaKeywords& key_words, ObJsonObject* json_schema, + ObIArray &schema_vec_stk, + bool is_composition, ObJsonArray* comp_array); + int handle_properties(ObJsonObject*& json_schema, bool is_composition, ObJsonArray*comp_array, ObIArray& pro_array); + int handle_pattern_properties(ObJsonObject* json_schema, ObJsonObject* pro_schema, bool is_composition, + ObJsonArray* comp_array, const ObIArray& pro_array); + int add_required_key(ObJsonNode* pro, ObJsonNode* required, ObJsonArray* pro_key_array); + int handle_additional_properties(ObJsonSubSchemaKeywords& key_words, ObJsonObject* json_schema, bool is_composition, ObJsonArray* comp_array); + int handle_array_schema(ObJsonObject* json_schema, bool is_composition, ObJsonArray*comp_array, bool is_additonal); + int handle_array_tuple_schema(ObJsonObject* json_schema, bool is_composition, ObJsonArray*comp_array); + int handle_unnested_dependencies(ObJsonObject* json_schema); + + int handle_nested_dependencies(ObJsonObject* json_schema, ObJsonArray*comp_array); + int handle_unnested_composition(const ObString& key_word, ObJsonObject* json_schema); + + int handle_nested_composition(const ObString& key_word, ObJsonObject* json_schema, ObJsonArray*comp_array); + int handle_unnested_not(ObJsonObject* json_schema); + int handle_nested_not(ObJsonObject* json_schema, ObJsonArray*comp_array); + int get_difined_type(ObJsonObject* origin_schema, ObIArray &schema_vec_stk, + ObJsonSchemaType& s_type, const bool& is_composition, ObJsonArray* comp_array); + int get_dep_schema_if_defined(ObJsonObject* origin_schema, ObIArray &schema_vec_stk, ObJsonSubSchemaKeywords& key_words, + const bool& is_composition, ObJsonArray* comp_array); + int check_keywords_by_type(const ObJsonSchemaType& s_type, ObJsonObject* origin_schema, + ObIArray &schema_vec_stk, ObJsonSubSchemaKeywords& key_words, + const bool& is_composition, ObJsonArray* comp_array); + int check_keywords_of_string(ObJsonObject* origin_schema, ObIArray &schema_vec_stk, + const bool& is_composition, ObJsonArray* comp_array); + int check_keywords_of_number(ObJsonObject* origin_schema, ObIArray &schema_vec_stk, + const bool& is_composition, ObJsonArray* comp_array); + int check_keywords_of_object(ObJsonObject* origin_schema, ObIArray &schema_vec_stk, + ObJsonSubSchemaKeywords& key_words, + const bool& is_composition, ObJsonArray* comp_array); + int check_keywords_of_array(ObJsonObject* origin_schema, ObIArray &schema_vec_stk, + ObJsonSubSchemaKeywords& key_words, + const bool& is_composition, ObJsonArray* comp_array); + int all_move_to_key(const ObString& key_word, ObJsonObject*& json_schema); + int add_pattern_pro_to_schema(ObJsonObject* pro_schema, const ObIArray& pro_array, const ObString& key); + int json_schema_move_to_key(const ObString& key_word); + int json_schema_move_to_array(const ObString& key_word, ObJsonArray* array_val); + int json_schema_add_comp_value(const ObString& key, ObJsonArray* new_array_val); + int json_schema_add_dep_value(ObJsonObject* dep_val); + int json_schema_back_to_parent(); + int json_schema_back_to_grandpa(); + int get_addition_pro_value(const ObJsonSubSchemaKeywords& key_words, ObJsonObject* origin_schema, ObJsonArray*& add_array, bool check_pattern = true); +public: + static const int DEFAULT_PREVIOUS_NUMBER = -1; + static const int ADDITIONAL_PRO_ARRAY_COUNT = 2; +private: + ObIAllocator *allocator_; + ObJsonObject *root_doc_; // record root of original json doc + ObJsonObject *ref_; // record ref definition + ObJsonArray* schema_map_; // array of schema_map, index 0: schema_root_, index i: schema of number i + ObStack cur_schema_stk_; // value of cur_root, including infomations like: schema, compositon, properties, items, allOf... + ObJsonInt* typeless_; // used for copy schema + ObJsonBuffer str_buf_; + int serial_num_; + ObString json_ptr_; // cur schema ptr, default '#' +}; + +class ObJsonSchemaValidator +{ +public: + explicit ObJsonSchemaValidator(ObIAllocator *allocator, ObIJsonBase *schema_map) + : allocator_(allocator), + schema_map_(schema_map), + failed_keyword_(ObString::make_empty_string()), + json_pointer_(allocator_), + schema_pointer_(allocator_), + str_buf_(allocator), + composition_ans_recorded_(false) + { + } + virtual ~ObJsonSchemaValidator() {} + int schema_validator(ObIJsonBase *json_doc, bool& is_valid); + int get_json_or_schema_point(ObJsonBuffer& pointer, bool is_schema_pointer); + OB_INLINE ObString get_failed_keyword() { return failed_keyword_;} + typedef int (*ObCheckCompSchema)(ObIJsonBase *json_doc, ObIJsonBase* dep_schema, ObIArray &ans_map, bool& is_valid); + +private: + int inner_schema_validator(ObIJsonBase *json_doc, ObIArray &schema_vec, ObIArray &ans_map, bool& is_valid); + int object_recursive_validator(ObIJsonBase *json_doc, ObIArray &schema_vec, ObIArray &ans_map, bool& is_valid); + int collect_schema_by_key(const ObString& key, ObIJsonBase *json_doc, ObIArray &schema_vec, ObIArray &recursive_vec); + int collect_schema_by_pattern_key(const ObString& key, ObIJsonBase* schema_vec, ObIArray &recursive_vec); + int collect_schema_by_add_key(const ObString& key, ObIJsonBase *json_doc, ObIJsonBase* schema_vec, ObIArray &recursive_vec); + int array_recursive_validator(ObIJsonBase *json_doc, ObIArray &schema_vec, ObIArray &ans_map, bool& is_valid); + int collect_schema_by_idx(const int& idx, const ObString& idx_str, ObIArray &schema_vec, ObIArray &recursive_vec); + int collect_schema_by_add_idx(const int& idx, ObIJsonBase* schema_vec, ObIArray &recursive_vec); + int get_schema_composition_ans(ObIJsonBase *json_doc, ObIArray &schema_vec, ObIArray &ans_map, bool& is_valid); + // check compositon in all scheme vec + int get_vec_schema_composition_ans(ObIJsonBase *json_doc, ObJsonSchemaComp comp_idx, const ObString& key, ObIArray &schema_vec, ObIArray &ans_map, bool& is_valid); + // check compositon in each scheme vec + ObCheckCompSchema get_comp_schema_validation_func(ObJsonSchemaComp comp_type); + int check_dep_schema(ObIJsonBase *json_doc, ObIJsonBase* dep_schema, ObIArray &ans_map, bool& is_valid); + int check_allof_schema(ObIJsonBase *json_doc, ObIJsonBase* dep_schema, ObIArray &ans_map, bool& is_valid); + int check_oneof_schema(ObIJsonBase *json_doc, ObIJsonBase* dep_schema, ObIArray &ans_map, bool& is_valid); + int check_anyof_schema(ObIJsonBase *json_doc, ObIJsonBase* dep_schema, ObIArray &ans_map, bool& is_valid); + int check_not_schema(ObIJsonBase *json_doc, ObIJsonBase* dep_schema, ObIArray &ans_map, bool& is_valid); + int get_single_element_and_check(ObIJsonBase *json_doc, const int& idx, ObIJsonBase* dep_schema, ObIArray &ans_map, bool& is_valid); + int check_single_comp_array(ObIJsonBase *json_doc, ObIJsonBase* allof_val, ObIArray &ans_map, bool& is_valid); + int check_single_composition_schema(ObIJsonBase *json_doc, ObIJsonBase* single_schema, ObIArray &ans_map, bool& is_valid); + int get_ans_by_id(ObIJsonBase* j_id, ObIArray &ans_map, bool& ans); + int check_all_schema_def(ObIJsonBase *json_doc, ObIArray &schema_vec, bool& is_valid); + int check_all_composition_def(ObIJsonBase *json_doc, ObIArray &schema_vec, ObIArray &ans_map); + // if in composition, record ans; if not in composition, check ans ,when false raise error + int record_comp_ans(const int& def_id, const bool& ans, ObIArray &ans_map); + int check_single_schema(ObIJsonBase *json_doc, ObIJsonBase *schema, bool& is_valid); + int check_null_or_boolean(ObIJsonBase *json_doc, ObIJsonBase *schema, bool is_null, bool& is_valid); + // keywords in composition + int check_public_key_words(ObIJsonBase *json_doc, ObIJsonBase *schema, const ObJsonSchemaType& valid_type, bool& is_valid); + // key_words_in_schema + int check_public_key_words(ObIJsonBase *json_doc, ObIArray &schema_vec, ObJsonSchemaType valid_type, bool& is_valid); + int check_public_key_words(const char key_start, ObJsonSchemaType &valid_type, ObIJsonBase *json_doc, ObIJsonBase *schema, bool& is_valid); + int check_number_and_integer(ObIJsonBase *json_doc, ObIJsonBase *schema, bool& is_valid); + int check_number_and_integer(ObIJsonBase *json_doc, ObIArray &schema_vec, bool& is_valid); + int check_boundary_key_words(const ObString& key, ObIJsonBase *json_doc, ObIJsonBase *schema, int& res); + int check_string_type(ObIJsonBase *json_doc, ObIArray &schema_vec, bool& is_valid); + int check_string_type(ObIJsonBase *json_doc, ObIJsonBase *schema, bool& is_valid); + int check_object_type(ObIJsonBase *json_doc, ObIArray &schema_vec, bool& is_valid); + int check_object_type(ObIJsonBase *json_doc, ObIJsonBase *schema, bool& is_valid); + int check_array_type(ObIJsonBase *json_doc, ObIArray &schema_vec, bool& is_valid); + int check_array_type(ObIJsonBase *json_doc, ObIJsonBase *schema, bool& is_valid); + bool check_type(const ObJsonSchemaType& valid_type, ObIJsonBase *schema); + bool check_multiple_of(ObIJsonBase *json_doc, ObIJsonBase *schema); + bool check_pattern_keywords(ObIJsonBase *json_doc, ObIJsonBase *schema); + int check_ref(ObIJsonBase *json_doc, ObIJsonBase *schema, bool& is_valid); + int check_single_ref(ObIJsonBase *json_doc, const ObString& ref_key, bool& is_valid); + int check_enum(ObIJsonBase *json_doc, ObIJsonBase *schema, bool& is_valid); + int check_required(ObIJsonBase *json_doc, ObIJsonBase *schema, bool& is_valid); + int check_dep_required(ObIJsonBase *json_doc, ObIJsonBase *schema, bool& is_valid); + int check_add_pro_in_schema(ObIJsonBase *json_doc, ObIJsonBase *schema, bool& is_valid, ObString defined_key = ObString::make_empty_string()); + int check_pattern_key_in_add_pro(const ObString& key, ObIJsonBase *schema, bool& found); + int check_unique_items(ObIJsonBase *json_doc, ObIJsonBase *schema, bool& is_valid); + int get_composition_schema_def(int idx, ObIJsonBase *schema_vec, ObIJsonBase *&schema, ObIArray &ans_map, int& schema_id); +public: + ObIAllocator *allocator_; + ObIJsonBase *schema_map_; + ObString failed_keyword_; + ObStack json_pointer_; + ObStack schema_pointer_; + ObJsonBuffer str_buf_; + // if did't record any composition ans, don't need check anyOf, oneOf, allOf and dependent schema, their default ans is true + // but need check not, which default ans if false + bool composition_ans_recorded_; +}; + +// ObJsonSchemaCache +// used in json schema expression +class ObJsonSchemaCache { +public: + enum ObSchemaParseStat{ + UNINITIALIZED, + INITIALIZED, + ERROR, + }; + + struct ObSchemaCacheStat{ + ObSchemaParseStat state_; + int index_; + ObSchemaCacheStat() : state_(UNINITIALIZED), index_(-1) {} + ObSchemaCacheStat(ObSchemaParseStat state, int idx) : state_(state), index_(idx) {}; + ObSchemaCacheStat(const ObSchemaCacheStat& stat) : state_(stat.state_), index_(stat.index_) {} + }; + typedef PageArena JsonSchemaStrArena; + typedef PageArena JsonSchemaArena; + typedef PageArena SchemaCacheStatArena; + typedef ObVector ObJsonSchemaStr; + typedef ObVector ObJsonSchemaPointers; + typedef ObVector ObSchemaCacheStatArr; + static const int64_t DEFAULT_PAGE_SIZE = (1LL << 10); // 1kb + +public: + ObJsonSchemaCache(common::ObIAllocator *allocator) : + allocator_(allocator), + page_allocator_(*allocator, common::ObModIds::OB_MODULE_PAGE_ALLOCATOR), + schema_cache_arena_(DEFAULT_PAGE_SIZE, page_allocator_), + schema_arena_(DEFAULT_PAGE_SIZE, page_allocator_), + str_arena_(DEFAULT_PAGE_SIZE, page_allocator_), + schema_str_(&str_arena_, common::ObModIds::OB_MODULE_PAGE_ALLOCATOR), + schema_arr_ptr_(&schema_arena_, common::ObModIds::OB_MODULE_PAGE_ALLOCATOR), + stat_arr_(&schema_cache_arena_, common::ObModIds::OB_MODULE_PAGE_ALLOCATOR) {} + ~ObJsonSchemaCache() {}; + + ObIJsonBase* schema_at(size_t idx); + + ObSchemaParseStat schema_stat_at(size_t idx); + + size_t size() { return schema_arr_ptr_.size(); } + void reset() {stat_arr_.clear(); schema_arr_ptr_.clear(); schema_str_.clear();} + + int find_and_add_cache(ObIJsonBase*& out_schema, ObString& in_str, int arg_idx, const ObJsonInType& in_type); + void set_allocator(common::ObIAllocator *allocator); + common::ObIAllocator* get_allocator() {return allocator_;} +private: + int set_schema(ObIJsonBase* j_schema, const ObString& in_str, int arg_idx, int index); + bool is_match(ObString& in_str, size_t idx); + + int fill_empty(size_t reserve_size); + +private: + common::ObIAllocator *allocator_; + ModulePageAllocator page_allocator_; + SchemaCacheStatArena schema_cache_arena_; + JsonSchemaArena schema_arena_; + JsonSchemaStrArena str_arena_; + ObJsonSchemaStr schema_str_; // array of schema str, for varchar, its json str; for json, its raw binary + ObJsonSchemaPointers schema_arr_ptr_; // array of parsed json schema; + ObSchemaCacheStatArr stat_arr_; // stat of json schema; +}; + +class ObJsonSchemaUtils +{ +public: + static int is_all_children_subschema(ObJsonNode* array_of_subschema); + static int check_if_composition_legal(ObJsonObject* origin_schema, ObJsonSubSchemaKeywords& key_words); + static int check_composition_by_name(const ObString& key_word, ObJsonObject* origin_schema, bool& is_legal); + static int json_doc_move_to_key(const ObString& key_word, ObJsonObject*& json_schema); + static int json_doc_move_to_array(const ObString& key_word, ObJsonObject* json_schema, ObJsonArray*& array_schema); + static int record_schema_array(ObStack& stk, ObIArray& array); + static int get_index_str(const int& idx, ObStringBuffer& buf); + static int get_single_key_value(ObIJsonBase *single_schema, ObString& key_words, ObIJsonBase *&value); + static int collect_key(ObJsonNode* obj, ObIAllocator *allocator, ObJsonArray* array, ObJsonBuffer& buf, bool pattern_key = false); + static int get_specific_type_of_child(const ObString& key, ObJsonNodeType expect_type, ObIArray &src, ObIArray &res); + static int get_all_composition_def(ObIArray &src, ObIArray &res); + static int get_json_number(ObIJsonBase* json_doc, double& res); + static int set_type_by_string(const ObString& str, ObJsonSchemaType& s_type); + static int is_valid_pattern(const ObString& regex_text, ObJsonBuffer& buf, bool& ans); + static int if_regex_match(const ObString& text, const ObString& regex_text, ObJsonBuffer& buf, bool& ans); + static bool check_multiple_of(const double& json_val, const double& multi_val); + static ObString get_pointer_key(ObString& ref_str, bool& end_while); + static int set_valid_number_type_by_mode(ObIJsonBase *json_doc, ObJsonSchemaType& valid_type); + static bool is_legal_json_pointer_name(const ObString& name); + static bool is_legal_json_pointer_start(const char& ch); + static bool is_legal_json_pointer_char(const char& ch); + // for array, recursive checking is only required if there are keywords: ITEMS, TUPLE_ITEMS, ADDITIONAL_ITEMS + // for object, recursive checking is only required if there are keywords: PROPERTIES, PATTERN_PRO, ADDITIONAL_PRO + static int need_check_recursive(ObIArray &schema_vec, bool& need_recursive, bool is_array_keywords); +}; + +} // namespace common +} // namespace oceanbase + +#endif // OCEANBASE_SQL_OB_JSON_SCHEMA \ No newline at end of file diff --git a/deps/oblib/src/lib/json_type/ob_json_tree.cpp b/deps/oblib/src/lib/json_type/ob_json_tree.cpp index cc78f5ce58..714579372c 100644 --- a/deps/oblib/src/lib/json_type/ob_json_tree.cpp +++ b/deps/oblib/src/lib/json_type/ob_json_tree.cpp @@ -436,6 +436,7 @@ int ObJsonNode::get_key(uint64_t index, common::ObString &key_out) const int ObJsonNode::get_array_element(uint64_t index, ObIJsonBase *&value) const { INIT_SUCC(ret); + value = NULL; if (OB_FAIL(check_valid_array_op(index))) { LOG_WARN("invalid json array operation", K(ret), K(index)); @@ -450,6 +451,7 @@ int ObJsonNode::get_array_element(uint64_t index, ObIJsonBase *&value) const int ObJsonNode::get_object_value(uint64_t index, ObIJsonBase *&value) const { INIT_SUCC(ret); + value = NULL; if (OB_FAIL(check_valid_object_op(index))) { LOG_WARN("invalid json object operation", K(ret), K(index)); @@ -464,9 +466,31 @@ int ObJsonNode::get_object_value(uint64_t index, ObIJsonBase *&value) const return ret; } +int ObJsonNode::get_object_value(uint64_t index, ObString &key, ObIJsonBase *&value) const +{ + INIT_SUCC(ret); + value = NULL; + + if (OB_FAIL(check_valid_object_op(index))) { + LOG_WARN("invalid json object operation", K(ret), K(index)); + } else { + const ObJsonObject *j_obj = static_cast(this); + ObJsonNode* node = nullptr; + if (OB_FAIL(j_obj->get_value_by_idx(index, key, node))) { + LOG_WARN("fail to find value by index", K(ret), K(index)); + } else if (OB_ISNULL(value = node)) { // maybe not found. + ret = OB_SEARCH_NOT_FOUND; + LOG_WARN("not found value by index", K(ret), K(index)); + } + } + + return ret; +} + int ObJsonNode::get_object_value(const ObString &key, ObIJsonBase *&value) const { INIT_SUCC(ret); + value = NULL; if (json_type() != ObJsonNodeType::J_OBJECT) { // check json node type ret = OB_INVALID_ARGUMENT; @@ -621,6 +645,31 @@ ObJsonNode *ObJsonObject::get_value(uint64_t index) const return j_node; } +int ObJsonObject::get_key_by_idx(uint64_t index, ObString& key) const +{ + INIT_SUCC(ret); + if (index < object_array_.size()) { + key = object_array_[index].get_key(); + } else { + ret = OB_OUT_OF_ELEMENT; + LOG_WARN("fail to get json node", K(ret), K(index)); + } + return ret; +} + +int ObJsonObject::get_value_by_idx(uint64_t index, ObString& key, ObJsonNode*& value) const +{ + INIT_SUCC(ret); + if (index < object_array_.size()) { + value = object_array_[index].get_value(); + key = object_array_[index].get_key(); + } else { + ret = OB_OUT_OF_ELEMENT; + LOG_WARN("fail to get json node", K(ret), K(index)); + } + return ret; +} + int ObJsonObject::remove(const common::ObString &key) { INIT_SUCC(ret); @@ -669,7 +718,7 @@ int ObJsonObject::replace(const ObJsonNode *old_node, ObJsonNode *new_node) // When constructing a JSON tree, if two keys have the same value, // the latter one will overwrite the former one -int ObJsonObject::add(const common::ObString &key, ObJsonNode *value, bool with_unique_key, bool is_lazy_sort, bool need_overwrite) +int ObJsonObject::add(const common::ObString &key, ObJsonNode *value, bool with_unique_key, bool is_lazy_sort, bool need_overwrite, bool is_schema) { INIT_SUCC(ret); @@ -680,7 +729,19 @@ int ObJsonObject::add(const common::ObString &key, ObJsonNode *value, bool with_ value->set_parent(this); ObJsonObjectPair pair(key, value); - if (need_overwrite) { + if (is_schema) { + // if is schema, keep the first value, don't raise error or overwrite + ObJsonKeyCompare cmp; + ObJsonObjectArray::iterator low_iter = std::lower_bound(object_array_.begin(), + object_array_.end(), pair, cmp); + if (low_iter != object_array_.end() && low_iter->get_key() == key) { // Found and covered + // do nothing + } else if (OB_FAIL(object_array_.push_back(pair))) { + LOG_WARN("failed to store in object array.", K(ret)); + } else { + sort(); + } + } else if (need_overwrite) { ObJsonKeyCompare cmp; ObJsonObjectArray::iterator low_iter = std::lower_bound(object_array_.begin(), object_array_.end(), pair, cmp); @@ -696,12 +757,10 @@ int ObJsonObject::add(const common::ObString &key, ObJsonNode *value, bool with_ } else if (!is_lazy_sort) { sort(); } - } else { - if (OB_FAIL(object_array_.push_back(pair))) { - LOG_WARN("failed to store in object array.", K(ret)); - } else if (!is_lazy_sort) { - sort(); - } + } else if (OB_FAIL(object_array_.push_back(pair))) { // if don't check unique key, push directly + LOG_WARN("failed to store in object array.", K(ret)); + } else if (!is_lazy_sort) { + sort(); } set_serialize_delta_size(value->get_serialize_size()); } diff --git a/deps/oblib/src/lib/json_type/ob_json_tree.h b/deps/oblib/src/lib/json_type/ob_json_tree.h index 02dd713e25..6495f8d08f 100644 --- a/deps/oblib/src/lib/json_type/ob_json_tree.h +++ b/deps/oblib/src/lib/json_type/ob_json_tree.h @@ -103,6 +103,7 @@ public: int get_array_element(uint64_t index, ObIJsonBase *&value) const override; int get_object_value(uint64_t index, ObIJsonBase *&value) const override; int get_object_value(const ObString &key, ObIJsonBase *&value) const override; + int get_object_value(uint64_t index, ObString &key, ObIJsonBase *&value) const override; int get_key(uint64_t index, common::ObString &key_out) const override; int array_remove(uint64_t index) override; int object_remove(const common::ObString &key) override; @@ -175,12 +176,14 @@ private: typedef common::ObArray ObJsonObjectArray; class ObJsonObject : public ObJsonContainer { +private: + static const int64_t DEFAULT_PAGE_SIZE = 512L; // 8kb -> 512 public: explicit ObJsonObject(ObIAllocator *allocator) : ObJsonContainer(allocator), serialize_size_(0), page_allocator_(*allocator, common::ObModIds::OB_MODULE_PAGE_ALLOCATOR), - object_array_(OB_MALLOC_NORMAL_BLOCK_SIZE, page_allocator_) + object_array_(DEFAULT_PAGE_SIZE, page_allocator_) { set_parent(NULL); } @@ -237,6 +240,18 @@ public: // @return Returns ObJsonNode on success, NULL otherwise. ObJsonNode *get_value(uint64_t index) const; + // Get object pair by index. + // + // @param [in] index The index. + // @param [out] key The key. + // @param [out] vale The value. + int get_value_by_idx(uint64_t index, ObString& key, ObJsonNode*& value) const; + // Get object pair by index. + // + // @param [in] index The index. + // @param [out] key The key. + int get_key_by_idx(uint64_t index, ObString& key) const; + // Get json node by index. // // @param [in] index The index. @@ -261,7 +276,7 @@ public: // @param [in] key The key. // @param [in] value The Json node. // @return Returns OB_SUCCESS on success, error code otherwise. - int add(const common::ObString &key, ObJsonNode *value, bool with_unique_key = false, bool is_lazy_sort = false, bool need_overwrite = true); + int add(const common::ObString &key, ObJsonNode *value, bool with_unique_key = false, bool is_lazy_sort = false, bool need_overwrite = true, bool is_schema = false); // Rename key in current object if exist. // @@ -293,6 +308,8 @@ public: // @return void void clear(); + void get_obj_array(ObJsonObjectArray*& obj_array) { obj_array = &object_array_; } + private: uint64_t serialize_size_; ModulePageAllocator page_allocator_; @@ -305,7 +322,7 @@ typedef common::ObVector ObJsonNodeVector; class ObJsonArray : public ObJsonContainer { private: - static const int64_t DEFAULT_PAGE_SIZE = 8192L; // 8kb + static const int64_t DEFAULT_PAGE_SIZE = 512L; // 8kb -> 512 public: explicit ObJsonArray(ObIAllocator *allocator) : ObJsonContainer(allocator), @@ -565,6 +582,13 @@ public: is_null_to_str_(false) { } + explicit ObJsonString(ObString str) + : ObJsonScalar(), + str_(str), + ext_(0), + is_null_to_str_(false) + { + } virtual ~ObJsonString() {} int64_t to_string(char *buf, const int64_t buf_len) const { @@ -579,6 +603,7 @@ public: OB_INLINE void set_is_null_to_str(bool value) { is_null_to_str_ = value; } OB_INLINE bool get_is_null_to_str() const { return is_null_to_str_; } OB_INLINE uint64_t length() const { return str_.length(); } + OB_INLINE ObString get_str() const { return str_; } OB_INLINE uint64_t get_serialize_size() { return serialization::encoded_length_vi64(length()) + length(); diff --git a/deps/oblib/src/lib/lob/ob_lob_base.cpp b/deps/oblib/src/lib/lob/ob_lob_base.cpp new file mode 100644 index 0000000000..a180cd1c41 --- /dev/null +++ b/deps/oblib/src/lib/lob/ob_lob_base.cpp @@ -0,0 +1,369 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ +#define USING_LOG_PREFIX LIB +#include "ob_lob_base.h" +#include "lib/utility/serialization.h" + +namespace oceanbase { +namespace common { + + +int ObILobCursor::check_and_get(int64_t offset, int64_t len, const char *&ptr, int64_t &avail_len) const +{ + INIT_SUCC(ret); + int64_t total_len = get_length(); + if (offset >= total_len) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("offset overflow", K(ret), K(len), K(offset), K(total_len)); + } else if ((avail_len = std::min(total_len - offset, len)) <= 0) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("avail_len is zero", K(ret), K(len), K(offset), K(total_len), K(avail_len)); + } else if (OB_FAIL(get_ptr(offset, avail_len, ptr))) { + LOG_WARN("get_ptr fail", K(ret), K(avail_len), K(len), K(offset), K(total_len), KP(ptr)); + } else if (OB_ISNULL(ptr)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("ptr is null", K(ret), K(avail_len), K(len), K(offset), K(total_len), KP(ptr)); + } + return ret; +} + +int ObILobCursor::read_bool(int64_t offset, bool *val) const +{ + INIT_SUCC(ret); + static const int64_t max_bool_len = sizeof(bool); + int64_t avail_len = 0; + const char* ptr = nullptr; + if (OB_FAIL(check_and_get(offset, max_bool_len, ptr, avail_len))) { + LOG_WARN("check_and_get fail", K(ret), K(offset), K(max_bool_len), K(avail_len)); + } else if (avail_len < max_bool_len) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("avail_len not enough", K(ret), K(avail_len), K(offset), K(max_bool_len)); + } else { + *val = *reinterpret_cast(ptr); + } + return ret; +} + +int ObILobCursor::read_i8(int64_t offset, int8_t *val) const +{ + INIT_SUCC(ret); + static const int64_t max_i8_len = sizeof(int8_t); + int64_t avail_len = 0; + const char* ptr = nullptr; + if (OB_FAIL(check_and_get(offset, max_i8_len, ptr, avail_len))) { + LOG_WARN("check_and_get fail", K(ret), K(offset), K(max_i8_len), K(avail_len)); + } else if (avail_len < max_i8_len) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("avail_len not enough", K(ret), K(avail_len), K(offset), K(max_i8_len)); + } else { + *val = *reinterpret_cast(ptr); + } + return ret; +} + +int ObILobCursor::read_i16(int64_t offset, int16_t *val) const +{ + INIT_SUCC(ret); + static const int64_t max_i16_len = sizeof(int16_t); + int64_t avail_len = 0; + const char* ptr = nullptr; + if (OB_FAIL(check_and_get(offset, max_i16_len, ptr, avail_len))) { + LOG_WARN("check_and_get fail", K(ret), K(offset), K(max_i16_len), K(avail_len)); + } else if (avail_len < max_i16_len) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("avail_len not enough", K(ret), K(avail_len), K(offset), K(max_i16_len)); + } else { + *val = *reinterpret_cast(ptr); + } + return ret; +} + +int ObILobCursor::read_i32(int64_t offset, int32_t *val) const +{ + INIT_SUCC(ret); + static const int64_t max_i32_len = sizeof(int32_t); + int64_t avail_len = 0; + const char* ptr = nullptr; + if (OB_FAIL(check_and_get(offset, max_i32_len, ptr, avail_len))) { + LOG_WARN("check_and_get fail", K(ret), K(offset), K(max_i32_len), K(avail_len)); + } else if (avail_len < max_i32_len) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("avail_len not enough", K(ret), K(avail_len), K(offset), K(max_i32_len)); + } else { + *val = *reinterpret_cast(ptr); + } + return ret; +} + +int ObILobCursor::read_i64(int64_t offset, int64_t *val) const +{ + INIT_SUCC(ret); + static const int64_t max_i64_len = sizeof(int64_t); + int64_t avail_len = 0; + const char* ptr = nullptr; + if (OB_FAIL(check_and_get(offset, max_i64_len, ptr, avail_len))) { + LOG_WARN("check_and_get fail", K(ret), K(offset), K(max_i64_len), K(avail_len)); + } else if (avail_len < max_i64_len) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("avail_len not enough", K(ret), K(avail_len), K(offset), K(max_i64_len)); + } else { + *val = *reinterpret_cast(ptr); + } + return ret; +} + +int ObILobCursor::read_float(int64_t offset, float *val) const +{ + INIT_SUCC(ret); + static const int64_t max_float_len = sizeof(float); + int64_t avail_len = 0; + const char* ptr = nullptr; + if (OB_FAIL(check_and_get(offset, max_float_len, ptr, avail_len))) { + LOG_WARN("check_and_get fail", K(ret), K(offset), K(max_float_len), K(avail_len)); + } else if (avail_len < max_float_len) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("avail_len not enough", K(ret), K(avail_len), K(offset), K(max_float_len)); + } else { + *val = *reinterpret_cast(ptr); + } + return ret; +} + +int ObILobCursor::read_double(int64_t offset, double *val) const +{ + INIT_SUCC(ret); + static const int64_t max_double_len = sizeof(double); + int64_t avail_len = 0; + const char* ptr = nullptr; + if (OB_FAIL(check_and_get(offset, max_double_len, ptr, avail_len))) { + LOG_WARN("check_and_get fail", K(ret), K(offset), K(max_double_len), K(avail_len)); + } else if (avail_len < max_double_len) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("avail_len not enough", K(ret), K(avail_len), K(offset), K(max_double_len)); + } else { + *val = *reinterpret_cast(ptr); + } + return ret; +} + +int ObILobCursor::decode_i16(int64_t &offset, int16_t *val) const +{ + INIT_SUCC(ret); + static const int64_t max_i16_len = sizeof(int16_t); + int64_t avail_len = 0; + const char* ptr = nullptr; + int64_t pos = 0; + if (OB_FAIL(check_and_get(offset, max_i16_len, ptr, avail_len))) { + LOG_WARN("check_and_get fail", K(ret), K(offset), K(max_i16_len), K(avail_len)); + } else if (OB_FAIL(serialization::decode_i16(ptr, avail_len, pos, val))) { + LOG_WARN("decode_i16 fail", K(ret), K(avail_len), K(max_i16_len), K(offset), K(pos), KP(ptr)); + } else { + offset += pos; + } + return ret; +} + +int ObILobCursor::decode_vi64(int64_t &offset, int64_t *val) const +{ + INIT_SUCC(ret); + static const int64_t max_vi64_len = serialization::encoded_length_vi64(UINT64_MAX); + int64_t avail_len = 0; + const char* ptr = nullptr; + int64_t pos = 0; + if (OB_FAIL(check_and_get(offset, max_vi64_len, ptr, avail_len))) { + LOG_WARN("check_and_get fail", K(ret), K(offset), K(max_vi64_len), K(avail_len)); + } else if (OB_FAIL(serialization::decode_vi64(ptr, avail_len, pos, val))) { + LOG_WARN("decode_i16 fail", K(ret), K(avail_len), K(max_vi64_len), K(offset), K(pos), KP(ptr)); + } else { + offset += pos; + } + return ret; +} + +int ObILobCursor::deserialize(int64_t &offset, number::ObNumber *number) const +{ + INIT_SUCC(ret); + static const int64_t max_ob_number_len = sizeof(uint32_t) + sizeof(uint32_t) * number::ObNumber::MAX_CALC_LEN; + int64_t avail_len = 0; + const char* ptr = nullptr; + int64_t pos = 0; + if (OB_FAIL(check_and_get(offset, max_ob_number_len, ptr, avail_len))) { + LOG_WARN("check_and_get fail", K(ret), K(offset), K(max_ob_number_len), K(avail_len)); + } else if (OB_FAIL(number->deserialize(ptr, avail_len, pos))) { + LOG_WARN("decode_i16 fail", K(ret), K(avail_len), K(max_ob_number_len), K(offset), K(pos), KP(ptr)); + } else { + offset += pos; + } + return ret; +} + + +int ObILobCursor::get_for_write(int64_t offset, int64_t len, ObString &data) +{ + INIT_SUCC(ret); + int64_t total_len = get_length(); + int64_t avail_len = 0; + char* ptr = nullptr; + if (offset >= total_len) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("offset overflow", K(ret), K(len), K(offset), K(total_len)); + } else if ((avail_len = std::min(total_len - offset, len)) < len) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("avail_len is zero", K(ret), K(len), K(offset), K(total_len), K(avail_len)); + } else if (OB_FAIL(get_ptr_for_write(offset, avail_len, ptr))) { + LOG_WARN("get ptr fail", K(ret), K(avail_len), K(len), K(offset), K(total_len), KP(ptr)); + } else { + data.assign_ptr(ptr, avail_len); + } + return ret; +} + +int ObILobCursor::write_i8(int64_t offset, int8_t val) +{ + INIT_SUCC(ret); + static const int64_t max_i8_len = sizeof(int8_t); + int64_t total_len = get_length(); + int64_t avail_len = 0; + char* ptr = nullptr; + if (offset >= total_len) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("offset overflow", K(ret), K(max_i8_len), K(offset), K(total_len)); + } else if ((avail_len = std::min(total_len - offset, max_i8_len)) < max_i8_len) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("avail_len is not enough", K(ret), K(max_i8_len), K(offset), K(total_len), K(avail_len)); + } else if (OB_FAIL(get_ptr_for_write(offset, avail_len, ptr))) { + LOG_WARN("ptr is null", K(ret), K(avail_len), K(max_i8_len), K(offset), K(total_len), KP(ptr)); + } else { + int8_t *val_pos = reinterpret_cast(ptr); + *val_pos = static_cast(val); + } + return ret; +} + +int ObILobCursor::write_i16(int64_t offset, int16_t val) +{ + INIT_SUCC(ret); + static const int64_t max_i16_len = sizeof(int16_t); + int64_t total_len = get_length(); + int64_t avail_len = 0; + char* ptr = nullptr; + if (offset >= total_len) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("offset overflow", K(ret), K(max_i16_len), K(offset), K(total_len)); + } else if ((avail_len = std::min(total_len - offset, max_i16_len)) < max_i16_len) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("avail_len is not enough", K(ret), K(max_i16_len), K(offset), K(total_len), K(avail_len)); + } else if (OB_FAIL(get_ptr_for_write(offset, avail_len, ptr))) { + LOG_WARN("get ptr fail", K(ret), K(avail_len), K(max_i16_len), K(offset), K(total_len), KP(ptr)); + } else { + int16_t *val_pos = reinterpret_cast(ptr); + *val_pos = static_cast(val); + } + return ret; +} + +int ObILobCursor::write_i32(int64_t offset, int32_t val) +{ + INIT_SUCC(ret); + static const int64_t max_i32_len = sizeof(int32_t); + int64_t total_len = get_length(); + int64_t avail_len = 0; + char* ptr = nullptr; + if (offset >= total_len) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("offset overflow", K(ret), K(max_i32_len), K(offset), K(total_len)); + } else if ((avail_len = std::min(total_len - offset, max_i32_len)) < max_i32_len) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("avail_len is not enough", K(ret), K(max_i32_len), K(offset), K(total_len), K(avail_len)); + } else if (OB_FAIL(get_ptr_for_write(offset, avail_len, ptr))) { + LOG_WARN("get ptr fail", K(ret), K(avail_len), K(max_i32_len), K(offset), K(total_len), KP(ptr)); + } else { + int32_t *val_pos = reinterpret_cast(ptr); + *val_pos = static_cast(val); + } + return ret; +} + +int ObILobCursor::write_i64(int64_t offset, int64_t val) +{ + INIT_SUCC(ret); + static const int64_t max_i64_len = sizeof(int64_t); + int64_t total_len = get_length(); + int64_t avail_len = 0; + char* ptr = nullptr; + if (offset >= total_len) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("offset overflow", K(ret), K(max_i64_len), K(offset), K(total_len)); + } else if ((avail_len = std::min(total_len - offset, max_i64_len)) < max_i64_len) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("avail_len is not enough", K(ret), K(max_i64_len), K(offset), K(total_len), K(avail_len)); + } else if (OB_FAIL(get_ptr_for_write(offset, avail_len, ptr))) { + LOG_WARN("get ptr fail", K(ret), K(avail_len), K(max_i64_len), K(offset), K(total_len), KP(ptr)); + } else { + int64_t *val_pos = reinterpret_cast(ptr); + *val_pos = static_cast(val); + } + return ret; +} + +int ObILobCursor::move_data(int64_t dst_offset, int64_t src_offset, int64_t move_len) +{ + INIT_SUCC(ret); + int64_t total_len = get_length(); + char* src_ptr = nullptr; + if (move_len == 0) { // skip + } else if (dst_offset >= total_len || src_offset >= total_len || dst_offset + move_len > total_len || src_offset + move_len > total_len) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("offset overflow", K(ret), K(dst_offset), K(src_offset), K(move_len), K(total_len)); + } else if (OB_FAIL(get_ptr_for_write(src_offset, move_len, src_ptr))) { + LOG_WARN("get src ptr fail", K(ret), K(src_offset), K(move_len), K(total_len)); + } else if (OB_FAIL(set(dst_offset, src_ptr, move_len, true))) { + LOG_WARN("set src_data fail", K(ret), K(dst_offset), K(src_offset), K(move_len), K(total_len), KP(src_ptr)); + } + return ret; +} + +int ObLobInRowUpdateCursor::init(const ObILobCursor *cursor) +{ + INIT_SUCC(ret); + ObString data; + if (OB_ISNULL(cursor)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("cursor is null", K(ret)); + } else if (OB_FAIL(cursor->get_data(data))) { + LOG_WARN("get_data fail", K(ret), KPC(cursor)); + } else if (OB_FAIL(data_.append(data))) { + LOG_WARN("append fail", K(ret), KPC(cursor), K(data)); + } + return ret; +} + +int ObLobInRowUpdateCursor::set(int64_t offset, const char *buf, int64_t buf_len, bool use_memmove) +{ + INIT_SUCC(ret); + int64_t append_len = offset + buf_len - data_.length(); + if (OB_FAIL(data_.reserve(append_len))) { + LOG_WARN("reserve fail", K(ret), K(offset), K(buf_len), K(append_len), K(data_)); + } else if (append_len > 0 && OB_FAIL(data_.set_length(data_.length() + append_len))) { + LOG_WARN("set_length fail", K(ret), K(offset), K(buf_len), K(append_len), K(data_)); + } else { + if (use_memmove) { + MEMMOVE(data_.ptr() + offset, buf, buf_len); + } else { + MEMCPY(data_.ptr() + offset, buf, buf_len); + } + } + return ret; +} + +} // common +} // oceanbase diff --git a/deps/oblib/src/lib/lob/ob_lob_base.h b/deps/oblib/src/lib/lob/ob_lob_base.h new file mode 100644 index 0000000000..da43ce52a3 --- /dev/null +++ b/deps/oblib/src/lib/lob/ob_lob_base.h @@ -0,0 +1,144 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + * This file contains interface support for the json base abstraction. + */ + +#ifndef OCEANBASE_SQL_OB_LOB_BASE +#define OCEANBASE_SQL_OB_LOB_BASE + +#include "lib/string/ob_string_buffer.h" +#include "lib/number/ob_number_v2.h" + +namespace oceanbase { +namespace common { + +class ObILobCursor +{ +public: + ObILobCursor() {} + virtual ~ObILobCursor(){} + virtual int get_data(ObString &data) const = 0; + virtual int set(int64_t offset, const char *buf, int64_t buf_len, bool use_memmove=false) = 0; + int set(int64_t offset, const ObString &data) { return set(offset, data.ptr(), data.length()); } + + virtual int get(int64_t offset, int64_t len, ObString &data) const = 0; + int get_for_write(int64_t offset, int64_t len, ObString &data); + virtual int64_t get_length() const = 0; + + virtual bool is_full_mode() const = 0; + virtual int append(const char* buf, int64_t buf_len) = 0; + virtual int reset_data(const ObString &data) = 0; + virtual int append(const ObString& data) = 0; + + virtual int reset() = 0; + virtual int64_t to_string(char *buf, const int64_t buf_len) const + { + int64_t pos = 0; + databuff_printf(buf, buf_len, pos, "base"); + return pos; + } + +public: + int check_and_get(int64_t offset, int64_t len, const char *&ptr, int64_t &avail_len) const; + int read_bool(int64_t offset, bool *val) const; + int read_i8(int64_t offset, int8_t *val) const; + int read_i16(int64_t offset, int16_t *val) const; + int read_i32(int64_t offset, int32_t *val) const; + int read_i64(int64_t offset, int64_t *val) const; + int read_float(int64_t offset, float *val) const; + int read_double(int64_t offset, double *val) const; + + int decode_i16(int64_t &offset, int16_t *val) const; + int decode_vi64(int64_t &offset, int64_t *val) const; + int deserialize(int64_t &offset, number::ObNumber *number) const; + + int write_i8(int64_t offset, int8_t val); + int write_i16(int64_t offset, int16_t val); + int write_i32(int64_t offset, int32_t val); + int write_i64(int64_t offset, int64_t val); + int move_data(int64_t dst_offset, int64_t src_offset, int64_t move_len); + +protected: + virtual int get_ptr(int64_t offset, int64_t len, const char *&ptr) const = 0; + virtual int get_ptr_for_write(int64_t offset, int64_t len, char *&ptr) = 0; +}; + +class ObLobInRowCursor : public ObILobCursor +{ +public: + ObLobInRowCursor(): + data_() + {} + ObLobInRowCursor(ObString& data): + data_(data) + {} + ObLobInRowCursor(const char *data, const int64_t length): + data_(length, data) + {} + int init(const ObString &data) { data_ = data; return OB_SUCCESS; } + virtual int get_data(ObString &data) const { data = data_; return OB_SUCCESS; } + virtual int get(int64_t offset, int64_t len, ObString &data) const { data.assign_ptr(data_.ptr() + offset, len); return OB_SUCCESS; } + virtual int64_t get_length() const { return data_.length(); } + virtual bool is_full_mode() const { return true; } + virtual int set(int64_t offset, const char *buf, int64_t buf_len, bool use_memmove) { return OB_NOT_SUPPORTED; } + virtual int append(const char* buf, int64_t buf_len) { return OB_NOT_SUPPORTED; } + virtual int append(const ObString& data) { return OB_NOT_SUPPORTED; } + virtual int reset_data(const ObString &data) { return init(data); } + virtual int reset() { data_.reset(); return OB_SUCCESS; } + ObString& data() { return data_; } + const ObString& data() const { return data_; } + TO_STRING_KV(K(data_)); + +protected: + virtual int get_ptr(int64_t offset, int64_t len, const char *&ptr) const { ptr = data_.ptr() + offset; return OB_SUCCESS; } + virtual int get_ptr_for_write(int64_t offset, int64_t len, char *&ptr) { ptr = data_.ptr() + offset; return OB_SUCCESS; } +private: + ObString data_; +}; + +class ObLobInRowUpdateCursor : public ObILobCursor +{ +public: + ObLobInRowUpdateCursor(ObIAllocator *allocator): + data_(allocator) + {} + + ObLobInRowUpdateCursor(): + data_() + {} + virtual ~ObLobInRowUpdateCursor(){} + + int init(const ObILobCursor *data); + virtual int get_data(ObString &data) const { data = data_.string(); return OB_SUCCESS; } + + virtual int set(int64_t offset, const char *buf, int64_t buf_len, bool use_memmove=false); + virtual int get(int64_t offset, int64_t len, ObString &data) const { data.assign_ptr(data_.ptr() + offset, len); return OB_SUCCESS; } + virtual int64_t get_length() const { return data_.length(); } + + virtual bool is_full_mode() const { return true; } + virtual int append(const char* buf, int64_t buf_len) { return data_.append(buf, buf_len); } + virtual int append(const ObString& data) { return append(data.ptr(), data.length()); } + virtual int reset_data(const ObString &data) { data_.reuse(); return data_.append(data); } + virtual int reset() { data_.reset(); return OB_SUCCESS; } + TO_STRING_KV(K(data_)); + +protected: + virtual int get_ptr(int64_t offset, int64_t len, const char *&ptr) const { ptr = data_.ptr() + offset; return OB_SUCCESS; } + virtual int get_ptr_for_write(int64_t offset, int64_t len, char *&ptr) { ptr = data_.ptr() + offset; return OB_SUCCESS; } + +private: + ObStringBuffer data_; +}; + + +} // common +} // oceanbase +#endif // OCEANBASE_SQL_OB_LOB_BASE \ No newline at end of file diff --git a/deps/oblib/src/lib/ob_define.h b/deps/oblib/src/lib/ob_define.h index 52020e0bd3..aa012ab746 100644 --- a/deps/oblib/src/lib/ob_define.h +++ b/deps/oblib/src/lib/ob_define.h @@ -1967,6 +1967,17 @@ const int64_t OB_MAX_CONTEXT_VALUE_LENGTH = 4000; const int64_t OB_MAX_CONTEXT_CLIENT_IDENTIFIER_LENGTH = 65; const int64_t OB_MAX_CONTEXT_CLIENT_IDENTIFIER_LENGTH_IN_SESSION = 64; +// log row value options +const char *const OB_LOG_ROW_VALUE_PARTIAL_LOB = "partial_lob"; +const char *const OB_LOG_ROW_VALUE_PARTIAL_JSON = "partial_json"; +const char *const OB_LOG_ROW_VALUE_PARTIAL_ALL = "partial_all"; +// json partial update expr flag +enum ObJsonPartialUpdateFlag +{ + OB_JSON_PARTIAL_UPDATE_ALLOW = 1 << 0, + OB_JSON_PARTIAL_UPDATE_LAST_EXPR = 1 << 1, + OB_JSON_PARTIAL_UPDATE_FIRST_EXPR = 1 << 2, +}; enum ObDmlType { diff --git a/deps/oblib/src/lib/ob_name_def.h b/deps/oblib/src/lib/ob_name_def.h index b1cd8344a0..2d2b9cc832 100644 --- a/deps/oblib/src/lib/ob_name_def.h +++ b/deps/oblib/src/lib/ob_name_def.h @@ -977,6 +977,8 @@ #define N_JSON_EXTRACT "json_extract" #define N_JSON_CONTAINS "json_contains" #define N_JSON_CONTAINS_PATH "json_contains_path" +#define N_JSON_SCHEMA_VALID "json_schema_valid" +#define N_JSON_SCHEMA_VALIDATION_REPORT "json_schema_validation_report" #define N_JSON_DEPTH "json_depth" #define N_JSON_KEYS "json_keys" #define N_JSON_ARRAY "json_array" @@ -987,6 +989,7 @@ #define N_JSON_REMOVE "json_remove" #define N_JSON_SEARCH "json_search" #define N_JSON_ARRAY_APPEND "json_array_append" +#define N_JSON_APPEND "json_append" #define N_JSON_ARRAY_INSERT "json_array_insert" #define N_JSON_VALUE "json_value" #define N_JSON_REPLACE "json_replace" @@ -1069,6 +1072,9 @@ #define N_XMLSERIALIZE "xmlserialize" #define N_XMLCAST "xmlcast" #define N_UPDATEXML "updatexml" +#define N_INSERTCHILDXML "insertchildxml" +#define N_XMLSEQUENCE "xmlsequence" +#define N_DELETEXML "deletexml" #define N_NLS_INITCAP "nls_initcap" #define N_PRIV_SQL_UDT_CONSTRUCT "_udt_construct" #define N_PRIV_UDT_ATTR_ACCESS "_udt_attr_access" diff --git a/deps/oblib/src/lib/string/ob_string_buffer.cpp b/deps/oblib/src/lib/string/ob_string_buffer.cpp index fd2af2b17d..498d3cfb29 100644 --- a/deps/oblib/src/lib/string/ob_string_buffer.cpp +++ b/deps/oblib/src/lib/string/ob_string_buffer.cpp @@ -45,12 +45,23 @@ void ObStringBuffer::reuse() len_ = 0; } +int ObStringBuffer::get_result_string(ObString &buffer) +{ + INIT_SUCC(ret); + buffer.assign_buffer(data_, cap_); + buffer.set_length(len_); + data_ = nullptr; + len_ = 0; + cap_ = 0; + return ret; +} + int ObStringBuffer::append(const char *str) { return append(str, NULL == str ? 0 : strlen(str)); } -int ObStringBuffer::append(const char *str, const uint64_t len) +int ObStringBuffer::append(const char *str, const uint64_t len, int8_t index) { INIT_SUCC(ret); if (OB_ISNULL(allocator_)) { @@ -66,7 +77,7 @@ int ObStringBuffer::append(const char *str, const uint64_t len) if (need_len < len_) { ret = OB_SIZE_OVERFLOW; LOG_WARN("size over flow", K(ret), K(need_len), K(len_)); - } else if (OB_FAIL(reserve(need_len))) { + } else if (OB_FAIL(reserve(index == -1 ? need_len : len))) { LOG_WARN("reserve data failed", K(ret), K(need_len), K(len_)); } else { MEMCPY(data_ + len_, str, len); @@ -152,6 +163,22 @@ int ObStringBuffer::set_length(const uint64_t len) return ret; } +int ObStringBuffer::deep_copy(ObIAllocator *allocator, ObStringBuffer &input) +{ + INIT_SUCC(ret); + char *new_data = NULL; + if (OB_ISNULL(allocator)) { + ret = OB_ERR_NULL_VALUE; + LOG_WARN("allocator is null.", K(ret)); + } else { + set_allocator(allocator); + len_ = input.length(); + cap_ = input.capacity(); + data_ = input.ptr(); + } + return ret; +} + const ObString ObStringBuffer::string() const { return ObString(0, static_cast(len_), data_); diff --git a/deps/oblib/src/lib/string/ob_string_buffer.h b/deps/oblib/src/lib/string/ob_string_buffer.h index 44583b70f2..d3a6107b21 100644 --- a/deps/oblib/src/lib/string/ob_string_buffer.h +++ b/deps/oblib/src/lib/string/ob_string_buffer.h @@ -33,9 +33,10 @@ public: common::ObIAllocator *get_allocator() const { return allocator_; } void reset(); void reuse(); + int get_result_string(ObString &buffer); int append(const char *str); - int append(const char *str, const uint64_t len); + int append(const char *str, const uint64_t len, int8_t index = -1); int append(const ObString &str); int reserve(const uint64_t len); int extend(const uint64_t len); @@ -52,6 +53,7 @@ public: int set_length(const uint64_t len); char back() { return data_[len_ - 1]; }; const ObString string() const; + int deep_copy(common::ObIAllocator *allocator, ObStringBuffer &input); int64_t to_string(char *buf, const int64_t buf_len) const { int64_t pos = 0; diff --git a/deps/oblib/src/lib/xml/ob_binary_aggregate.cpp b/deps/oblib/src/lib/xml/ob_binary_aggregate.cpp new file mode 100644 index 0000000000..2cc8e820ee --- /dev/null +++ b/deps/oblib/src/lib/xml/ob_binary_aggregate.cpp @@ -0,0 +1,940 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + * This file contains implementation support for the json and xml binary aggregate. + */ + +#define USING_LOG_PREFIX SQL_ENG + +#include "ob_binary_aggregate.h" + +namespace oceanbase { +namespace common { + +struct ObJsonBinAggKeyCompare { + ObStringBuffer *buff_; + + int operator()(const ObAggBinKeyInfo *left, const ObAggBinKeyInfo *right) { + int res = 0; + if (left->key_len_ != right->key_len_) { + res = left->key_len_ < right->key_len_; + } else { + ObString left_str = ObString(left->key_len_, buff_->ptr() + left->offset_); + ObString right_str = ObString(right->key_len_, buff_->ptr() + right->offset_); + res = (left_str.compare(right_str) < 0); + } + return res; + } + + int compare(const ObAggBinKeyInfo *left, const ObAggBinKeyInfo *right) { + int res = 0; + if (left->key_len_ != right->key_len_) { + res = left->key_len_ < right->key_len_; + } else { + ObString left_str = ObString(left->key_len_, buff_->ptr() + left->offset_); + ObString right_str = ObString(right->key_len_, buff_->ptr() + right->offset_); + res = (left_str.compare(right_str) < 0); + } + return res; + } +}; + +struct ObXmlBinAggKeyCompare { + ObStringBuffer *buff_; + + int operator()(const ObAggBinKeyInfo *left, const ObAggBinKeyInfo *right) { + ObString left_str = ObString(left->key_len_, buff_->ptr() + left->offset_); + ObString right_str = ObString(right->key_len_, buff_->ptr() + right->offset_); + return (left_str.compare(right_str) < 0); + } + + int compare(const ObAggBinKeyInfo *left, const ObAggBinKeyInfo *right) { + ObString left_str = ObString(left->key_len_, buff_->ptr() + left->offset_); + ObString right_str = ObString(right->key_len_, buff_->ptr() + right->offset_); + return (left_str.compare(right_str) < 0); + } +}; + +ObBinAggSerializer::ObBinAggSerializer(ObIAllocator* allocator, + ObBinAggType type, + uint8_t header_type, + bool need_merge_unparsed, + ObIAllocator* back_allocator, + ObIAllocator* arr_allocator) + : value_(allocator), + key_(allocator), + buff_(allocator), + last_is_unparsed_text_(false), + last_is_text_node_(false), + is_xml_agg_(need_merge_unparsed), + sort_and_unique_(false), + merge_text_(true), + header_type_(header_type), + alloc_flag_(ObBinAggAllocFlag::AGG_ALLOC_A), + type_(type), + key_len_(0), + value_len_(0), + count_(0), + index_start_(0), + index_entry_size_(0), + key_entry_start_(0), + key_entry_size_(0), + value_entry_start_(0), + value_entry_size_(0), + key_start_(0), + allocator_(allocator), + back_allocator_(back_allocator), + arr_allocator_(arr_allocator), + page_allocator_(*(arr_allocator == nullptr ? allocator : arr_allocator), common::ObModIds::OB_MODULE_PAGE_ALLOCATOR), + key_info_(OB_MALLOC_NORMAL_BLOCK_SIZE, page_allocator_) +{ + new (&header_) ObMulBinHeaderSerializer(); + if (type_ == AGG_XML) { + new (&doc_header_) ObXmlDocBinHeader(MEMBER_LAZY_SORTED); + } + +} + +// for json +int ObBinAggSerializer::append_key_and_value(ObString key, ObStringBuffer &value, ObJsonBin *json_val) +{ + INIT_SUCC(ret); + value.reuse(); + ObAggBinKeyInfo *key_info = nullptr; + int64_t value_record = value_.length(); + + ObIAllocator * arr_allocator = get_array_allocator(); + if (OB_ISNULL(key_info = static_cast + (arr_allocator->alloc(sizeof(ObAggBinKeyInfo))))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("allocate key info struct failed", K(ret)); + } else { + int key_count = key_info_.count(); + key_info->key_len_ = key.length(); + key_info->origin_index_ = 0; + key_info->unparsed_ = false; + key_info->type_ = static_cast(json_val->json_type()); + key_info->value_offset_ = value_record; + key_info->offset_ = key_count == 0 ? + 0 : key_info_.at(key_count-1)->offset_ + key_info_.at(key_count-1)->key_len_; + + if (OB_FAIL(json_val->get_total_value(value))) { + LOG_WARN("get total value failed", K(ret)); + } else if (OB_FAIL(key_.append(key.ptr(), key.length()))) { + LOG_WARN("failed to append key into key_", K(ret), K(key)); + } else { + uint64_t need_size = value_.length() + value.length() + 8; + if (check_three_allocator() || need_size <= value_.capacity() || need_size < REPLACE_MEMORY_SIZE_THRESHOLD) { + if (OB_FAIL(value_.append(value.ptr(), value.length(), 0))) { + LOG_WARN("failed to append key into key_", K(ret), K(value)); + } + } else { + if (first_alloc_flag()) { + if (OB_FAIL(copy_and_reset(back_allocator_, allocator_, value))) { + LOG_WARN("failed to copy and reset.", K(ret)); + } else { + set_second_alloc(); + } + } else { + if (OB_FAIL(copy_and_reset(allocator_, back_allocator_, value))) { + LOG_WARN("failed to copy and reset.", K(ret)); + } else { + set_first_alloc(); + } + } + } + key_info->value_len_ = value.length(); + if (OB_SUCC(ret) && OB_FAIL(key_info_.push_back(key_info))) { + LOG_WARN("failed to push back key_info.", K(ret)); + } + } + } + + return ret; +} + +// for xml +int ObBinAggSerializer::append_key_and_value(ObXmlBin *xml_bin) +{ + INIT_SUCC(ret); + if (is_xml_agg_) { + if (xml_bin->meta_.is_unparse_) { + if (OB_FAIL(add_unparsed_xml(xml_bin))) { + LOG_WARN("add parsed xml failed", K(ret)); + } + } else { + if (OB_FAIL(add_parsed_xml(xml_bin))) { + LOG_WARN("add parsed xml failed", K(ret)); + } + } + } else { + ObMulModeNodeType type = xml_bin->type(); + if (type == M_ELEMENT || type == M_INSTRUCT) { + if (OB_FAIL(add_element_xml(xml_bin))) { + LOG_WARN("add element failed", K(ret)); + } + } else if (type == M_DOCUMENT || type == M_CONTENT) { + if (OB_FAIL(add_parsed_xml(xml_bin))) { + LOG_WARN("add parsed xml failed", K(ret)); + } + } else { + if (OB_FAIL(add_single_leaf_xml(xml_bin))) { + LOG_WARN("add single leaf xml failed", K(ret)); + } + } + } + + return ret; +} + +// for element +int ObBinAggSerializer::add_element_xml(ObXmlBin *xml_bin) +{ + INIT_SUCC(ret); + ObString value; + ObAggBinKeyInfo *key_info = nullptr; + int32_t count = xml_bin->meta_.count_; + int64_t key_count = key_info_.count(); + int64_t value_record = value_.length(); + ObString key; + + if (last_is_text_node_ && OB_FAIL(deal_last_unparsed())) { + LOG_WARN("failed to deal with last unparsed.", K(ret)); + } else { + value_record = value_.length(); + } + + if (OB_FAIL(ret)) { + } else if (OB_FAIL(xml_bin->get_key(key))) { + LOG_WARN("get key failed.", K(ret)); + } else if (OB_FAIL(value_.append(xml_bin->get_element_buffer()))) { + } else if (OB_ISNULL(key_info = static_cast + (allocator_->alloc(sizeof(ObAggBinKeyInfo))))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("allocate key info struct failed", K(ret)); + } else { + int64_t current_count = key_count; + key_info->key_len_ = key.length(); + key_info->value_len_ = 0; + key_info->type_ = xml_bin->type(); + key_info->unparsed_ = xml_bin->meta_.is_unparse_; + key_info->origin_index_ = key_count; + key_info->value_offset_ = value_record; + key_info->offset_ = current_count == 0 ? + 0 : key_info_.at(current_count - 1)->offset_ + key_info_.at(current_count -1)->key_len_; + + if (OB_FAIL(key_info_.push_back(key_info))) { + LOG_WARN("failed to append key info.", K(ret)); + } else if (OB_FAIL(key_.append(key.ptr(), key.length()))) { + LOG_WARN("failed to append key into key_", K(ret), K(key)); + } + } + + return ret; +} + +// for leaf +int ObBinAggSerializer::add_single_leaf_xml(ObXmlBin *xml_bin) +{ + INIT_SUCC(ret); + ObString value; + ObAggBinKeyInfo *key_info = nullptr; + int32_t count = xml_bin->meta_.count_; + int64_t key_count = key_info_.count(); + + ObMulModeNodeType type = xml_bin->type(); + ObStringBuffer xml_text(allocator_); + if (last_is_text_node_ && !merge_text_ && OB_FAIL(deal_last_unparsed())) { + LOG_WARN("failed to deal with last unparsed.", K(ret)); + } else if (type == M_ATTRIBUTE || type == M_NAMESPACE) { + type = M_TEXT; + last_is_text_node_ = true; + if (OB_FAIL(xml_bin->get_value(value))) { + LOG_WARN("failed to get value.", K(ret)); + } else if (OB_FAIL(xml_text.append(value.ptr(), value.length()))) { + LOG_WARN("failed to append vlaue.", K(ret)); + } + } else if (type == M_TEXT) { + last_is_text_node_ = true; + if (OB_FAIL(xml_bin->get_text_value(value))) { + LOG_WARN("failed to get text value", K(ret)); + } else if (OB_FAIL(xml_text.append(value.ptr(), value.length()))) { + LOG_WARN("failed to append vlaue.", K(ret)); + } + } else { + if (last_is_text_node_ && OB_FAIL(deal_last_unparsed())) { + LOG_WARN("failed to deal with last unparsed.", K(ret)); + } else if (OB_FAIL(xml_text.append(xml_bin->meta_.data_, xml_bin->meta_.len_))) { + LOG_WARN("failed to append xmltext.", K(ret)); + } + } + int64_t value_record = value_.length(); + + if (OB_FAIL(ret)) { + } else if (OB_FAIL(value_.append(xml_text.ptr(), xml_text.length()))) { + LOG_WARN("append failed.", K(xml_text), K(ret)); + } else if (need_to_add_node(key_count, type)) { + if (OB_ISNULL(key_info = static_cast + (allocator_->alloc(sizeof(ObAggBinKeyInfo))))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("allocate key info struct failed", K(ret)); + } else { + key_info->key_len_ = 0; + key_info->value_len_ = 0; + key_info->type_ = type; + key_info->unparsed_ = false; + key_info->origin_index_ = key_count; + key_info->value_offset_ = value_record; + key_info->offset_ = key_.length(); + if (OB_FAIL(key_info_.push_back(key_info))) { + LOG_WARN("failed to push back key info into array", K(ret), K(key_info)); + } + } + } + + return ret; +} + +bool ObBinAggSerializer::need_to_add_node(int64_t key_count, ObMulModeNodeType type) +{ + bool res = false; + if (key_count == 0) { + res = true; + } else if (!merge_text_) { + res = true; + } else if (!(type == M_TEXT && key_info_.at(key_count - 1)->type_ == M_TEXT)) { + res = true; + } else { + res = false; + } + return res; +} + +// for content and document +int ObBinAggSerializer::add_parsed_xml(ObXmlBin *xml_bin) +{ + INIT_SUCC(ret); + + ObString value; + ObAggBinKeyInfo *key_info = nullptr; + int32_t count = xml_bin->meta_.count_; + int64_t key_count = key_info_.count(); + int64_t value_record = value_.length(); + int64_t bin_value_start = 0; + + if (last_is_unparsed_text_ && OB_FAIL(deal_last_unparsed())) { + LOG_WARN("failed to deal with last unparsed.", K(ret)); + } else { + value_record = value_.length(); + } + + if (OB_FAIL(ret)) { + } else if (OB_FAIL(xml_bin->get_value_start(bin_value_start))) { + LOG_WARN("failed to get value start.", K(ret)); + } else if (OB_FAIL(xml_bin->get_total_value(value, bin_value_start))) { + LOG_WARN("failed to get total value.", K(ret)); + } else if (OB_FAIL(value_.append(value.ptr(), value.length(), 0))) { + LOG_WARN("failed to append key into key_", K(ret), K(value)); + } + + for (int32_t i = 0; OB_SUCC(ret) && i < count; i++) { + uint8_t type = 0; + int64_t origin_index = 0; + int64_t bin_value_offset = 0; + + ObString key; + if (OB_FAIL(xml_bin->get_index_key(key, origin_index, bin_value_offset, i))) { + LOG_WARN("get index key failed", K(i)); + } else if (OB_FAIL(xml_bin->get_value_entry_type(type, origin_index))) { + LOG_WARN("get value entry type failed", K(ret)); + } else if (type < 0 || type >= M_MAX_TYPE) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get type invalid.", K(ret), K(type)); + } else if (OB_ISNULL(key_info = static_cast + (allocator_->alloc(sizeof(ObAggBinKeyInfo))))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("allocate key info struct failed", K(ret)); + } else { + int64_t current_count = key_count + i; + key_info->key_len_ = key.length(); + key_info->value_len_ = 0; + key_info->type_ = type; + key_info->unparsed_ = xml_bin->meta_.is_unparse_; + key_info->origin_index_ = current_count; + key_info->value_offset_ = bin_value_offset - bin_value_start + value_record; + key_info->offset_ = current_count == 0 ? + 0 : key_info_.at(current_count - 1)->offset_ + key_info_.at(current_count -1)->key_len_; + + if (OB_FAIL(key_info_.push_back(key_info))) { + LOG_WARN("failed to push back key info into array", K(ret), K(key_info)); + } else if (OB_FAIL(key_.append(key.ptr(), key.length()))) { + LOG_WARN("failed to append key into key_", K(ret), K(key)); + } + } + } + + return ret; +} + +// for text +int ObBinAggSerializer::add_unparsed_xml(ObXmlBin *xml_bin) +{ + INIT_SUCC(ret); + ObString value; + ObAggBinKeyInfo *key_info = nullptr; + int64_t key_count = key_info_.count(); + int64_t value_record = value_.length(); + ObStringBuffer xml_text(allocator_); + if (xml_bin->type() == M_ATTRIBUTE) { + if (OB_FAIL(xml_bin->get_value(value))) { + LOG_WARN("failed to get value.", K(ret)); + } else if (OB_FAIL(xml_text.append(value.ptr(), value.length()))){ + LOG_WARN("failed to append value.", K(ret), K(value)); + } + } else { + if (OB_FAIL(xml_bin->print_xml(xml_text, 0, 0, 0))) { + LOG_WARN("failed to print xml bin.", K(ret)); + } + } + + if (OB_FAIL(ret)) { + } else if (OB_FAIL(value_.append(xml_text.ptr(), xml_text.length()))) { + LOG_WARN("failed to append value.", K(ret)); + } else if (key_count == 0 || !key_info_.at(key_count - 1)->unparsed_) { + if (OB_ISNULL(key_info = static_cast + (allocator_->alloc(sizeof(ObAggBinKeyInfo))))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("allocate key info struct failed", K(ret)); + } else { + key_info->key_len_ = 0; + key_info->value_len_ = 0; + key_info->type_ = ObMulModeNodeType::M_ELEMENT; + key_info->unparsed_ = true; + key_info->origin_index_ = key_count; + key_info->value_offset_ = value_record; + key_info->offset_ = key_.length(); + if (OB_FAIL(key_info_.push_back(key_info))) { + LOG_WARN("failed to push back key info into array", K(ret), K(key_info)); + } + } + } + last_is_unparsed_text_ = true; + + return ret; +} + +/* +Estimated total length +@param[in] part length +@return approximate size + +First estimate an estimate_smaller based on the existing length, +then calculate the total size based on this estimate_smaller, +then calculate the estimated block size based on the calculated total, +and then compare the estimated block size with the initial estimate_smaller, +if it is not in the same order of magnitude, then Go up one order of magnitude + + K: key_len V: value_len C: count S(a): sizeof(a) + Roughly think: + S(total) = ceil((log2 total) / 8) + S(total) = [1, 4] + head = 0 + FOR XML: S(C) * C + header_: obj_var_offset_ + obj_var_size_ +K + V + (1 + S(total)) * C + 2 * S(total) * C + S(C) * C + header = total +*/ +int64_t ObBinAggSerializer::estimate_total(int64_t base_length, int64_t count, + int32_t type, int64_t xml_header_size) +{ + int64_t res = 0; + uint8_t estimate_smaller_type = ObMulModeVar::get_var_type(base_length); + uint8_t estimate_smaller = ObMulModeVar::get_var_size(estimate_smaller_type); + uint8_t estimated_size_type = 0; + do { + estimate_smaller = ObMulModeVar::get_var_size(estimate_smaller_type); + uint8_t count_type = ObMulModeVar::get_var_type(count); + uint8_t count_size = ObMulModeVar::get_var_size(count_type); + + // for head_ + uint8_t header_obj_var_size_type = ObMulModeVar::get_var_type(res > 0 ? res : base_length); + uint8_t header_obj_var_size = ObMulModeVar::get_var_size(header_obj_var_size_type); + uint8_t header_obj_var_offset = MUL_MODE_BIN_HEADER_LEN + count_size; + uint64_t header_size = header_obj_var_offset + header_obj_var_size; + + // for total + int64_t total = base_length + (sizeof(uint8_t) + estimate_smaller) * count + + 2 * estimate_smaller * count + header_size; + if (type == AGG_XML) { + total += count_size * count + xml_header_size; + } + estimated_size_type = ObMulModeVar::get_var_type(total); + res = total; + } while (estimate_smaller_type < ObMulModeBinLenSize::MBL_UINT64 + && estimate_smaller_type++ < estimated_size_type); + return res; +} + +int ObBinAggSerializer::construct_header() +{ + INIT_SUCC(ret); + ObStringBuffer header_buff(allocator_); + ObStringBuffer doc_header_buff(allocator_); + + uint64_t count = key_info_.count(); + int64_t key_len = key_.length(); + int64_t value_len = value_.length(); + + if (has_unique_flag()) { + for (int64_t i = 0; OB_SUCC(ret) && i < key_info_.count() - 1; i++) { + ObAggBinKeyInfo *key_info = key_info_.at(i); + ObAggBinKeyInfo *next_key_info = key_info_.at(i + 1); + if (key_info->key_len_ == next_key_info->key_len_) { + int64_t this_key_start = i == 0 ? 0 : key_info_.at(i - 1)->offset_ + key_info_.at(i - 1)->key_len_; + int64_t next_key_start = this_key_start + key_info->key_len_; + ObString this_key(key_info->key_len_, key_.ptr() + key_info->offset_); + ObString next_key(next_key_info->key_len_, key_.ptr() + next_key_info->offset_); + if (this_key.compare(next_key) == 0) { + key_info->unparsed_ = true; + count--; + key_len -= key_info->key_len_; + value_len -= key_info->value_len_; + } + } + } + } + count_ = count; + key_len_ = key_len; + value_len_ = value_len; + + ObString header_str; + int64_t buff_length = buff_.length(); + if (type_ == AGG_XML) { + buff_length = buff_.length(); + doc_header_.serialize(doc_header_buff); + } + + int64_t total_size = ObBinAggSerializer::estimate_total(value_len_ + key_len_, + count_, type_, doc_header_buff.length()); + ObMulBinHeaderSerializer header_serializer(&header_buff, + static_cast(header_type_), + total_size, + count_); + if (OB_FAIL(header_serializer.serialize())) { + LOG_WARN("header serialize failed.", K(ret)); + } else if (OB_FALSE_IT(header_str = header_serializer.buffer()->string())) { + } else if (OB_FAIL(buff_.reserve(total_size))) { + LOG_WARN("buff reserver failed.", K(ret), K(total_size)); + } else if (OB_FAIL(buff_.append(header_str.ptr(), header_str.length(), 0))) { + LOG_WARN("failed to append.", K(header_str)); + } else if (doc_header_buff.length() != 0 && + OB_FAIL(buff_.append(doc_header_buff.ptr(), doc_header_buff.length(), 0))) { + LOG_WARN("failed to append.", K(doc_header_buff)); + } else { + header_ = header_serializer; + } + + return ret; +} + +void ObBinAggSerializer::set_index_entry(int64_t origin_index, int64_t sort_index) +{ + int64_t offset = index_start_ + origin_index * index_entry_size_; + char* write_buf = buff_.ptr() + offset; + ObMulModeVar::set_var(sort_index, header_.get_count_var_size_type(), write_buf); +} + +void ObBinAggSerializer::set_key_entry(int64_t entry_idx, int64_t key_offset, int64_t key_len) +{ + int64_t offset = key_entry_start_ + entry_idx * (key_entry_size_ * 2); + char* write_buf = buff_.ptr() + offset; + ObMulModeVar::set_var(key_offset, header_.get_entry_var_size_type(), write_buf); + + write_buf += key_entry_size_; + ObMulModeVar::set_var(key_len, header_.get_entry_var_size_type(), write_buf); +} + +void ObBinAggSerializer::set_value_entry(int64_t entry_idx, uint8_t type, int64_t value_offset) +{ + int64_t offset = value_entry_start_ + entry_idx * (value_entry_size_ + sizeof(uint8_t)); + char* write_buf = buff_.ptr() + offset; + *reinterpret_cast(write_buf) = type; + ObMulModeVar::set_var(value_offset, header_.get_entry_var_size_type(), write_buf + sizeof(uint8_t)); +} + +void ObBinAggSerializer::set_value_entry_for_json(int64_t entry_idx, uint8_t type, int64_t value_offset) +{ + int64_t offset = value_entry_start_ + entry_idx * (value_entry_size_ + sizeof(uint8_t)); + char* write_buf = buff_.ptr() + offset; + ObMulModeVar::set_var(value_offset, header_.get_entry_var_size_type(), write_buf); + write_buf += value_entry_size_; + *reinterpret_cast(write_buf) = type; +} + +void ObBinAggSerializer::set_key(int64_t key_offset, int64_t key_len) +{ + char* write_buf = key_.ptr() + key_offset; + buff_.append(write_buf, key_len); +} + +void ObBinAggSerializer::set_value(int64_t value_offset, int64_t value_len) +{ + char* write_buf = value_.ptr() + value_offset; + buff_.append(write_buf, value_len, 0); +} + +void ObBinAggSerializer::set_xml_decl(ObString version, ObString encoding, uint16_t standalone) +{ + ObXmlDocBinHeader new_doc_header(version, encoding, 0, standalone, 1); + doc_header_ = new_doc_header; +} + +int ObBinAggSerializer::reserve_meta() +{ + INIT_SUCC(ret); + int64_t pos = buff_.length(); + uint32_t reserve_size = key_start_ - index_start_; + if (OB_FAIL(buff_.set_length(pos + reserve_size))) { + LOG_WARN("failed to set length.", K(ret), K(pos + reserve_size)); + } + return ret; +} + +int ObBinAggSerializer::construct_meta() +{ + INIT_SUCC(ret); + index_start_ = header_.header_size(); + if (type_ == ObBinAggType::AGG_XML) { + index_start_ += doc_header_.header_size(); + } + + index_entry_size_ = type_ == AGG_XML ? header_.get_count_var_size() : 0; + key_entry_start_ = index_start_ + index_entry_size_ * count_; + key_entry_size_ = value_entry_size_ = header_.get_entry_var_size(); + value_entry_start_ = (type_ == ObBinAggType::AGG_JSON && + header_type_ == static_cast(ObJsonNodeType::J_ARRAY)) ? + key_entry_start_ : + key_entry_start_ + (key_entry_size_ * 2) * count_; + key_start_ = value_entry_start_ + (sizeof(uint8_t) + value_entry_size_) * count_; + int64_t value_start = key_start_ + key_len_; + + if (key_start_ > header_.total_) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("key start unexpected.", K(ret), K(key_start_)); + } else if (OB_FAIL(reserve_meta())) { + LOG_WARN("failed to reserve meta.", K(ret), K(buff_.length())); + } else { + int64_t key_offset = 0; + int64_t i_offset = 0; + int64_t value_offset = 0; + for (int64_t i = 0; OB_SUCC(ret) && i < key_info_.count(); i++) { + ObAggBinKeyInfo *key_info = key_info_.at(i); + if (type_ == ObBinAggType::AGG_XML) { + set_index_entry(key_info->origin_index_, i); + set_key_entry(i, key_start_ + key_offset, key_info->key_len_); + set_value_entry(i, key_info->type_, value_start + key_info->value_offset_); + key_offset += key_info->key_len_; + } else if (!has_unique_flag()) { + if (header_type_ == static_cast(ObJsonNodeType::J_OBJECT)) { + set_key_entry(i, key_start_ + key_offset, key_info->key_len_); + } + set_value_entry_for_json(i, key_info->type_, value_start + key_info->value_offset_); + key_offset += key_info->key_len_; + } else if (!key_info->unparsed_) { + if (header_type_ == static_cast(ObJsonNodeType::J_OBJECT)) { + set_key_entry(i_offset, key_start_ + key_offset, key_info->key_len_); + } + set_value_entry_for_json(i_offset, key_info->type_, value_offset + value_start); + key_offset += key_info->key_len_; + value_offset += key_info->value_len_; + i_offset++; + } + + } + } + + return ret; +} + +int ObBinAggSerializer::text_serialize(ObString value, ObStringBuffer &res) +{ + INIT_SUCC(ret); + + int64_t header_size = sizeof(uint8_t); + int64_t ser_len = serialization::encoded_length_vi64(value.length()); + + if (OB_FAIL(res.reserve(ser_len + header_size + value.length()))) { + LOG_WARN("failed to resoerve serialize size for text.", K(ret), K(ser_len)); + } else if (OB_FAIL(ObMulModeVar::set_var(ObMulModeNodeType::M_TEXT, + ObMulModeBinLenSize::MBL_UINT8, + res.ptr() + res.length()))) { + LOG_WARN("failed to set var.", K(ret)); + } else { + res.set_length(res.length() + header_size); + } + + int64_t pos = res.length(); + + if (OB_FAIL(ret)) { + } else if (OB_FAIL(serialization::encode_vi64(res.ptr(), res.capacity(), pos, value.length()))) { + LOG_WARN("failed to encode str.", K(ret), K(pos)); + } else if (OB_FAIL(res.set_length(pos))) { + LOG_WARN("failed to update len for res.", K(ret), K(pos)); + } else if (OB_FAIL(res.append(value.ptr(), value.length()))) { + LOG_WARN("failed to append value.", K(ret), K(value)); + } + + return ret; +} + +int ObBinAggSerializer::text_deserialize(ObString value, ObStringBuffer &res) +{ + INIT_SUCC(ret); + + int64_t pos = sizeof(uint8_t); + int64_t val = 0; + char *data = value.ptr(); + int64_t data_len = value.length(); + + if (OB_FAIL(serialization::decode_vi64(data, data_len, pos, &val))) { + LOG_WARN("failed to decode value", K(ret), K(value), K(pos)); + } else if (data_len < pos + val) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get val unexpected.", K(ret), K(val), K(value)); + } else if (OB_FAIL(res.append(data + pos, val))) { + LOG_WARN("res append failed", K(ret), K(val)); + } + + return ret; +} + +int ObBinAggSerializer::element_serialize(ObIAllocator* allocator, ObString value, ObStringBuffer &res) +{ + INIT_SUCC(ret); + ObStringBuffer header_buff(allocator); + ObStringBuffer ele_header_buff(allocator); + + ObString header_str; + uint64_t count = 1; + ObString prefix; + int64_t total_size = 0; + ObXmlElementBinHeader element_serializer(true, prefix); + if (OB_FAIL(element_serializer.serialize(ele_header_buff))) { + LOG_WARN("element serialize failed.", K(ret)); + } else { + + total_size = ObBinAggSerializer::estimate_total(value.length(), 1, AGG_XML, ele_header_buff.length()); + ObMulBinHeaderSerializer header_serializer(&header_buff, M_ELEMENT, total_size, count); + if (OB_FAIL(header_serializer.serialize())) { + LOG_WARN("header serialize failed.", K(ret)); + } else if (OB_FALSE_IT(header_str = header_serializer.buffer()->string())) { + } else if (OB_FAIL(res.append(header_str.ptr(), header_str.length()))) { + LOG_WARN("failed to append.", K(header_str)); + } else if (OB_FAIL(res.append(ele_header_buff.ptr(), ele_header_buff.length()))) { + LOG_WARN("failed to append.", K(ele_header_buff)); + } else { + int64_t index_start = res.length(); + int64_t key_entry_start = index_start + header_serializer.get_count_var_size(); + int64_t value_entry_start = key_entry_start + header_serializer.get_entry_var_size() * 2; + int64_t key_start = value_entry_start + header_serializer.get_entry_var_size() + sizeof(uint8_t); + uint32_t reserve_size = key_start - index_start; + if (OB_FAIL(res.reserve(reserve_size))) { + LOG_WARN("failed to reserve buffer.", K(ret), K(reserve_size)); + } else { + res.set_length(index_start + reserve_size); + char* write_buf = res.ptr() + index_start; + ObMulModeVar::set_var(0, header_serializer.get_count_var_size_type(), write_buf); // index + write_buf += header_serializer.get_count_var_size(); + ObMulModeVar::set_var(key_start, header_serializer.get_entry_var_size_type(), write_buf); // key_entry offset + write_buf += header_serializer.get_entry_var_size(); + ObMulModeVar::set_var(0, header_serializer.get_entry_var_size_type(), write_buf); // key_entry length + write_buf += header_serializer.get_entry_var_size(); + *reinterpret_cast(write_buf) = M_TEXT; // value_entry type + ObMulModeVar::set_var(key_start, header_serializer.get_entry_var_size_type(), write_buf + sizeof(uint8_t)); // value_entry offset + if (OB_FAIL(res.append(value))) { + LOG_WARN("failed to append value.", K(ret), K(value)); + } + } + } + } + return ret; +} + +int ObBinAggSerializer::deal_last_unparsed() +{ + INIT_SUCC(ret); + ObAggBinKeyInfo *key_info = nullptr; + int64_t key_count = key_info_.count(); + + if (type_ != AGG_XML || key_count <= 0) { + // do nothing + } else { + ObStringBuffer element_buff(allocator_); + ObStringBuffer text_buff(allocator_); + + ObAggBinKeyInfo *last_key_info = key_info_.at(key_count -1); + ObString value(value_.length() - last_key_info->value_offset_, + value_.ptr() + last_key_info->value_offset_); + + if (last_is_unparsed_text_) { + if (OB_FAIL(ret)) { + } else if (OB_FAIL(ObBinAggSerializer::text_serialize(value, text_buff))) { + LOG_WARN("failed to serialize text.", K(ret), K(value)); + } else if (OB_FAIL(ObBinAggSerializer::element_serialize(allocator_, text_buff.string(), element_buff))) { + LOG_WARN("failed to build element serialize.", K(ret), K(value)); + } else if (OB_FAIL(value_.set_length(last_key_info->value_offset_))) { + LOG_WARN("set length failed", K(ret)); + } else if (OB_FAIL(value_.append(element_buff.ptr(), element_buff.length()))) { + LOG_WARN("failed to append key into key_", K(ret), K(element_buff)); + } else { + last_is_unparsed_text_ = false; + } + } else if (last_is_text_node_) { + if (OB_FAIL(ret)) { + } else if (OB_FAIL(ObBinAggSerializer::text_serialize(value, text_buff))) { + LOG_WARN("failed to serialize text.", K(ret), K(value)); + } else if (OB_FAIL(value_.set_length(last_key_info->value_offset_))) { + LOG_WARN("set length failed", K(ret)); + } else if (OB_FAIL(value_.append(text_buff.ptr(), text_buff.length()))) { + LOG_WARN("failed to append key into key_", K(ret), K(text_buff)); + } else { + last_is_text_node_ = false; + } + } + } + + return ret; +} + +void ObBinAggSerializer::construct_key_and_value() +{ + if (!is_json_array()) { + for (int64_t i = 0; i < key_info_.count(); i++) { + ObAggBinKeyInfo *key_info = key_info_.at(i); + if ((has_unique_flag() && key_info->unparsed_)) { + // do nothing + } else { + set_key(key_info->offset_, key_info->key_len_); + } + } + } + + if (!has_unique_flag()) { + buff_.append(value_.ptr(), value_.length(), 0); + } else { + for (int64_t i = 0; i < key_info_.count(); i++) { + ObAggBinKeyInfo *key_info = key_info_.at(i); + if (key_info->unparsed_) { + // do nothing + } else { + set_value(key_info->value_offset_, key_info->value_len_); + } + } + } +} + +int ObBinAggSerializer::copy_and_reset(ObIAllocator* new_allocator, + ObIAllocator* old_allocator, + ObStringBuffer &add_value) +{ + INIT_SUCC(ret); + if (OB_ISNULL(new_allocator)) { + // do nothing + } else { + ObStringBuffer new_key(new_allocator); + ObStringBuffer new_value(new_allocator); + ObAggBinKeyArray new_key_info; + + if (OB_FAIL(new_value.reserve(value_.length() + add_value.length()))) { + LOG_WARN("failed to reserve new value", K(ret), K(value_.length()), K(add_value.length())); + } else if (OB_FAIL(new_value.append(value_.ptr(), value_.length(), 0))) { + LOG_WARN("failed to append value.", K(new_value.length()), K(value_.length())); + } else if (OB_FAIL(new_value.append(add_value.ptr(), add_value.length(), 0))) { + LOG_WARN("failed to append add value.", K(new_value.length()), K(add_value)); + } else if (OB_FAIL(new_key.append(key_.ptr(), key_.length(), 0))) { + LOG_WARN("failed to reserve new key", K(ret), K(new_key.length()), K(key_.length())); + } else { + key_.reset(); + value_.reset(); + old_allocator->reset(); + if (OB_FAIL(key_.deep_copy(new_allocator, new_key))) { + LOG_WARN("failed to copy new key into key", K(key_), K(new_key)); + } else if (OB_FAIL(value_.deep_copy(new_allocator, new_value))) { + LOG_WARN("failed to copy new value into value", K(value_), K(new_value)); + } + } + + } + + return ret; +} + +int ObBinAggSerializer::rewrite_total_size() +{ + INIT_SUCC(ret); + int64_t actual_total_size = buff_.length(); + int64_t calculate_total_size = header_.get_obj_size(); + if (calculate_total_size == actual_total_size) { + // do nothing + } else if (ObMulModeVar::get_var_type(calculate_total_size) < + ObMulModeVar::get_var_type(actual_total_size)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("header size invalided", K(ret)); + } else { + if (header_.obj_var_size_ == 1) { + *reinterpret_cast(buff_.ptr() + header_.begin_ + header_.obj_var_offset_) = static_cast(actual_total_size); + } else if (header_.obj_var_size_ == 2) { + *reinterpret_cast(buff_.ptr() + header_.begin_ + header_.obj_var_offset_) = static_cast(actual_total_size); + } else if (header_.obj_var_size_ == 4) { + *reinterpret_cast(buff_.ptr() + header_.begin_ + header_.obj_var_offset_) = static_cast(actual_total_size); + } else { + *reinterpret_cast(buff_.ptr() + header_.begin_ + header_.obj_var_offset_) = actual_total_size; + } + } + return ret; +} + +int ObBinAggSerializer::serialize() +{ + INIT_SUCC(ret); + + if (OB_FAIL(deal_last_unparsed())) { // unparsed + LOG_WARN("failed to deal with last unprased.", K(ret)); + } else if (is_json_type() && !json_not_sort() && OB_FALSE_IT(do_json_sort())) { // do json sort + } else if (is_xml_type() && OB_FALSE_IT(do_xml_sort())) { // do xml sort + } else if (OB_FAIL(construct_header())) { // calculate header + LOG_WARN("failed to construct header.", K(ret)); + } else if (OB_FAIL(construct_meta())) { // construct meta_ + LOG_WARN("failed to construct meta.", K(ret)); + } else if (OB_FALSE_IT(construct_key_and_value())) { // merge key_ and value_ + } else if (OB_FAIL(rewrite_total_size())) { // write total + LOG_WARN("failed to rewrite total size.", K(ret)); + } + + return ret; +} + +void ObBinAggSerializer::do_json_sort() +{ + ObJsonBinAggKeyCompare cmp; + cmp.buff_ = &key_; + std::stable_sort(key_info_.begin(), key_info_.end(), cmp); +} + +void ObBinAggSerializer::do_xml_sort() +{ + ObXmlBinAggKeyCompare cmp; + cmp.buff_ = &key_; + std::stable_sort(key_info_.begin(), key_info_.end(), cmp); +} + + +}; // namespace common + +}; // namespace oceanbase \ No newline at end of file diff --git a/deps/oblib/src/lib/xml/ob_binary_aggregate.h b/deps/oblib/src/lib/xml/ob_binary_aggregate.h new file mode 100644 index 0000000000..e6e16eddf0 --- /dev/null +++ b/deps/oblib/src/lib/xml/ob_binary_aggregate.h @@ -0,0 +1,177 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + * This file contains implementation support for the json and xml binary aggregate. + */ + +#ifndef OCEANBASE_SQL_OB_BINARY_AGGREGATE +#define OCEANBASE_SQL_OB_BINARY_AGGREGATE + +#include "lib/container/ob_array_iterator.h" +#include "lib/xml/ob_multi_mode_interface.h" +#include "lib/xml/ob_multi_mode_bin.h" +#include "lib/xml/ob_tree_base.h" +#include "lib/xml/ob_xml_bin.h" +#include "lib/number/ob_number_v2.h" +#include "lib/json_type/ob_json_bin.h" +#include "lib/json_type/ob_json_base.h" +#include "lib/json_type/ob_json_tree.h" + +namespace oceanbase { +namespace common { + +enum ObBinAggType { + AGG_JSON, + AGG_XML, + AGG_MAX +}; + +enum ObBinAggAllocFlag { + AGG_ALLOC_A, + AGG_ALLOC_B, + AGG_ALLOC_MAX +}; + +struct ObAggBinKeyInfo { + uint8_t type_; + bool unparsed_; // special for xml + uint64_t value_offset_; + uint64_t value_len_; + uint64_t offset_; + uint32_t key_len_; + uint32_t origin_index_; // special for xml + TO_STRING_KV(K(type_), + K(unparsed_), + K(value_offset_), + K(offset_), + K(key_len_), + K(origin_index_)); +}; + +typedef common::ObArray ObAggBinKeyArray; +class ObBinAggSerializer { +public: + ObBinAggSerializer(ObIAllocator* allocator_, + ObBinAggType type, + uint8_t header_type, + bool need_merge_unparsed = false, + ObIAllocator* tmp_allocator = nullptr, + ObIAllocator* arr_allocator = nullptr); + + // finaly serialize + int serialize(); + int append_key_and_value(ObString key, ObStringBuffer &value, ObJsonBin *json_val); + int append_key_and_value(ObXmlBin *xml_bin); + void set_header_type(uint8_t header_type) { header_type_ = header_type; } + void set_xml_decl(ObString version, ObString encoding, uint16_t standalone); + void set_sort_and_unique() { + if (type_ == AGG_JSON) { + sort_and_unique_ = true; + } + } + ObStringBuffer *get_buffer() { return &buff_; }; + int64_t get_key_info_count() { return key_info_.count(); } + int64_t get_last_count() { return count_; } + void close_merge_text() { merge_text_ = false; } + void open_merge_text() { merge_text_ = true; } + int64_t get_approximate_length() { return key_.length() + value_.length(); } + int64_t get_key_length() { return key_.length(); } + int64_t get_value_length() { return value_.length(); } + +private: + int construct_meta(); + void construct_key_and_value(); + + int rewrite_total_size(); + + int construct_header(); + bool has_unique_flag() { return type_ == AGG_JSON && sort_and_unique_; } + bool is_xml_type() { return type_ == AGG_XML; } + bool is_json_type() { return type_ == AGG_JSON; } + bool json_not_sort() { + return type_ == AGG_JSON + && header_type_ == (static_cast(ObJsonNodeType::J_OBJECT)) + && !sort_and_unique_ ; + } + // stable sort + // The internal sorting rules of json and xml binary are different + void do_json_sort(); + void do_xml_sort(); + int add_unparsed_xml(ObXmlBin *xml_bin); + int add_parsed_xml(ObXmlBin *xml_bin); + int add_attribute_xml(ObXmlBin *xml_bin); + int add_element_xml(ObXmlBin *xml_bin); + int add_single_leaf_xml(ObXmlBin *xml_bin); + int deal_last_unparsed(); + int serialize_value(int idx); + int serialize_key(int idx); + int reserve_meta(); + void set_key_entry(int64_t entry_idx, int64_t key_offset, int64_t key_len); + void set_index_entry(int64_t origin_index, int64_t sort_index); + void set_value_entry(int64_t entry_idx, uint8_t type, int64_t value_offset); + bool need_to_add_node(int64_t key_count, ObMulModeNodeType type); + int copy_and_reset(ObIAllocator* new_allocator, + ObIAllocator* old_allocator, + ObStringBuffer &add_value); + bool first_alloc_flag() { return alloc_flag_ == ObBinAggAllocFlag::AGG_ALLOC_A; } + void set_first_alloc() { alloc_flag_ = ObBinAggAllocFlag::AGG_ALLOC_A;} + void set_second_alloc() { alloc_flag_ = ObBinAggAllocFlag::AGG_ALLOC_B;} + bool check_three_allocator() { return back_allocator_ == nullptr || arr_allocator_ == nullptr; } + bool is_json_array() { return header_type_ == static_cast(ObJsonNodeType::J_ARRAY); } + ObIAllocator* get_array_allocator() { return arr_allocator_ == nullptr ? allocator_ : arr_allocator_; } + + void set_value_entry_for_json(int64_t entry_idx, uint8_t type, int64_t value_offset); + void set_key(int64_t key_offset, int64_t key_len); + void set_value(int64_t value_offset, int64_t value_len); + static int64_t estimate_total(int64_t base_length, int64_t count, int32_t type, int64_t xml_header_size = 4); + static int text_serialize(ObString value, ObStringBuffer &res); + static int text_deserialize(ObString value, ObStringBuffer &res); + static int element_serialize(ObIAllocator* allocator_, ObString value, ObStringBuffer &res); + static constexpr int REPLACE_MEMORY_SIZE_THRESHOLD = 8 << 20; // 8M +private: + // At present, there is no encapsulated interface for lob's append. + // Use ObStringBuffer, and replace it with lob after the implementation of subsequent lob placement. + ObStringBuffer value_; // value buffer + ObStringBuffer key_; // key buffer + ObStringBuffer buff_; // finaly buff + ObMulBinHeaderSerializer header_; // header + ObXmlDocBinHeader doc_header_;// special for xml + bool last_is_unparsed_text_; + bool last_is_text_node_; + bool is_xml_agg_; + bool sort_and_unique_; + bool merge_text_; + uint8_t header_type_; + uint8_t alloc_flag_; + + int32_t type_; // ObBinAggType 0:json 1:xml + int64_t key_len_; + int64_t value_len_; + int64_t count_; + int64_t index_start_; + int8_t index_entry_size_; + int64_t key_entry_start_; + int8_t key_entry_size_; + int64_t value_entry_start_; + int8_t value_entry_size_; + int64_t key_start_; + + ObIAllocator* allocator_; + ObIAllocator* back_allocator_; + ObIAllocator* arr_allocator_; + ModulePageAllocator page_allocator_; + ObAggBinKeyArray key_info_; +}; + +}; // namespace common + +}; // namespace oceanbase + +#endif // OCEANBASE_SQL_OB_BINARY_AGGREGATE \ No newline at end of file diff --git a/deps/oblib/src/lib/xml/ob_libxml2_sax_handler.cpp b/deps/oblib/src/lib/xml/ob_libxml2_sax_handler.cpp new file mode 100644 index 0000000000..7de160ef2f --- /dev/null +++ b/deps/oblib/src/lib/xml/ob_libxml2_sax_handler.cpp @@ -0,0 +1,1123 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ +#define USING_LOG_PREFIX LIB + +#include "lib/xml/ob_libxml2_sax_handler.h" +#include "lib/xml/ob_xml_util.h" +#include "libxml2/libxml/parser.h" +#include "libxml2/libxml/parserInternals.h" +#include "lib/ob_define.h" +#include "lib/allocator/ob_malloc.h" +#ifdef OB_BUILD_ORACLE_PL +#include "libxslt/xslt.h" +#include "libxslt/extensions.h" +#endif + +namespace oceanbase { +namespace common { + + +// ObLibXml2SaxHandler +_xmlSAXHandler* get_sax_handler() +{ + static _xmlSAXHandler sax_handler = { + ObLibXml2SaxHandler::internal_subset, // internalSubset + nullptr, // isStandalone + nullptr, // hasInternalSubset + nullptr, // hasExternalSubset + nullptr, // resolveEntity + nullptr, // getEntity + nullptr, // entityDecl + nullptr, // notationDecl + nullptr, // attributeDecl + nullptr, // elementDecl + nullptr, // unparsedEntityDecl + nullptr, // setDocumentLocator + ObLibXml2SaxHandler::start_document, // startDocument + ObLibXml2SaxHandler::end_document, // endDocument + ObLibXml2SaxHandler::start_element, // startElement // ObLibXml2SaxHandler::start_element + ObLibXml2SaxHandler::end_element, // endElement // ObLibXml2SaxHandler::end_element + ObLibXml2SaxHandler::entity_reference, // reference + ObLibXml2SaxHandler::characters, // characters + nullptr, // ignorableWhitespace ObLibXml2SaxHandler::ignorable_whitespace + ObLibXml2SaxHandler::processing_instruction, // processingInstruction + ObLibXml2SaxHandler::comment, // comment + nullptr, // warning + nullptr, // error + nullptr, // fatalError + nullptr, // getParameterEntity + ObLibXml2SaxHandler::cdata_block, // cdataBlock + nullptr, // externalSubset + XML_SAX2_MAGIC, // initialized, use sax + nullptr, // private + nullptr, // startElementNs // ObLibXml2SaxHandler::start_element_ns + nullptr, // endElementNs // ObLibXml2SaxHandler::end_element_ns + ObLibXml2SaxHandler::structured_error, // serror + }; + return &sax_handler; +} + +_xmlSAXHandler* get_synax_handler() +{ + static _xmlSAXHandler synax_handler = { + nullptr, // internalSubset + nullptr, // isStandalone + nullptr, // hasInternalSubset + nullptr, // hasExternalSubset + nullptr, // resolveEntity + nullptr, // getEntity + nullptr, // entityDecl + nullptr, // notationDecl + nullptr, // attributeDecl + nullptr, // elementDecl + nullptr, // unparsedEntityDecl + nullptr, // setDocumentLocator + nullptr, // startDocument + nullptr, // endDocument + nullptr, // startElement // ObLibXml2SaxHandler::start_element + nullptr, // endElement // ObLibXml2SaxHandler::end_element + nullptr, // reference + nullptr, // characters + nullptr, // ignorableWhitespace + nullptr, // processingInstruction + nullptr, // comment + nullptr, // warning + nullptr, // error + nullptr, // fatalError + nullptr, // getParameterEntity + nullptr, // cdataBlock + nullptr, // externalSubset + XML_SAX2_MAGIC, // initialized, use sax + nullptr, // private + nullptr, // startElementNs // ObLibXml2SaxHandler::start_element_ns + nullptr, // endElementNs // ObLibXml2SaxHandler::end_element_ns + ObLibXml2SaxHandler::structured_error, // serror + }; + return &synax_handler; +} + +void ObLibXml2SaxHandler::init() +{ + lib::ObMallocHookAttrGuard malloc_guard(lib::ObMemAttr(common::OB_SERVER_TENANT_ID, "XmlGlobal")); + xmlInitParser(); +#ifdef OB_BUILD_ORACLE_PL + xsltInitGlobals(); + xsltInit(); +#endif + LOG_INFO("saxhandler init", K(xmlIsMainThread())); +} + +void ObLibXml2SaxHandler::destroy() +{ +#ifdef OB_BUILD_ORACLE_PL + xsltCleanupGlobals(); +#endif + xmlCleanupParser(); +} + +// libxml has pthread variable xmlGlobalState +// this variable is dynamic use malloc and can +// not belong to tenant, so use observer tenant +void ObLibXml2SaxHandler::reset_libxml_last_error() +{ + lib::ObMallocHookAttrGuard malloc_guard(lib::ObMemAttr(common::OB_SERVER_TENANT_ID, "XmlGlobal")); + xmlResetLastError(); +} + +int ObLibXml2SaxHandler::get_parser(void* ctx, ObLibXml2SaxParser*& parser) +{ + INIT_SUCC(ret); + xmlParserCtxt* context = nullptr; + if (OB_ISNULL(context = static_cast(ctx))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("context is null", K(ret)); + } else if (OB_ISNULL(parser = static_cast(context->_private))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("parser is null", K(ret)); + } else if (OB_UNLIKELY(parser->get_libxml2_ctxt() != context)) { + ret = OB_ERR_PARSER_SYNTAX; + LOG_INFO("parser ctxt changed"); + } + return ret; +} + +void ObLibXml2SaxHandler::start_document(void *ctx) +{ + INIT_SUCC(ret); + ObLibXml2SaxParser* parser = nullptr; + if (OB_FAIL(get_parser(ctx, parser))) { + LOG_WARN("get_parser failed", K(ret)); + } else if (OB_UNLIKELY(parser->is_stop_parse())) { + LOG_INFO("parser is stopped", K(parser->get_last_errno())); + } else if (OB_FAIL(parser->start_document())) { + LOG_WARN("parser start_document failed", K(ret)); + } + if (OB_FAIL(ret)) { + parser->stop_parse(ret); + } +} + +void ObLibXml2SaxHandler::end_document(void *ctx) +{ + INIT_SUCC(ret); + ObLibXml2SaxParser* parser = nullptr; + + if (OB_FAIL(get_parser(ctx, parser))) { + LOG_WARN("get_parser failed", K(ret)); + } else if (OB_UNLIKELY(parser->is_stop_parse())) { + LOG_INFO("parser is stopped", K(parser->get_last_errno())); + } else if (OB_FAIL(parser->end_document())) { + LOG_WARN("parser end_document failed", K(ret)); + } + + if (OB_FAIL(ret)) { + parser->stop_parse(ret); + } +} + +// sax1 +void ObLibXml2SaxHandler::start_element(void* ctx, const xmlChar* name, const xmlChar** p) +{ + INIT_SUCC(ret); + ObLibXml2SaxParser* parser = nullptr; + if (OB_FAIL(get_parser(ctx, parser))) { + LOG_WARN("get_parser failed", K(ret)); + } else if (OB_UNLIKELY(parser->is_stop_parse())) { + LOG_INFO("parser is stopped", K(parser->get_last_errno())); + } else if (OB_FAIL(parser->start_element(reinterpret_cast(name), + reinterpret_cast(p)))) { + LOG_WARN("parser start_element failed", K(ret)); + } + if (OB_FAIL(ret)) { + parser->stop_parse(ret); + } +} + +void ObLibXml2SaxHandler::end_element(void* ctx, const xmlChar* name) +{ + INIT_SUCC(ret); + ObLibXml2SaxParser* parser = nullptr; + if (OB_FAIL(get_parser(ctx, parser))) { + LOG_WARN("get_parser failed", K(ret)); + } else if (OB_UNLIKELY(parser->is_stop_parse())) { + LOG_INFO("parser is stopped", K(parser->get_last_errno())); + } else if (OB_FAIL(parser->end_element())) { + LOG_WARN("parser end_element failed", K(ret)); + } + if (OB_FAIL(ret)) { + parser->stop_parse(ret); + } +} + +void ObLibXml2SaxHandler::characters(void *ctx, const xmlChar *ch, int len) +{ + INIT_SUCC(ret); + ObLibXml2SaxParser* parser = nullptr; + + if (OB_FAIL(get_parser(ctx, parser))) { + LOG_WARN("get_parser failed", K(ret)); + } else if (OB_UNLIKELY(parser->is_stop_parse())) { + LOG_INFO("parser is stopped", K(parser->get_last_errno())); + } else if (parser->characters(reinterpret_cast(ch), + len)) { + LOG_WARN("parser characters failed", K(ret)); + } + + if (OB_FAIL(ret)) { + parser->stop_parse(ret); + } +} + +void ObLibXml2SaxHandler::cdata_block(void* ctx, const xmlChar* value, int len) +{ + INIT_SUCC(ret); + ObLibXml2SaxParser* parser = nullptr; + + if (OB_FAIL(get_parser(ctx, parser))) { + LOG_WARN("get_parser failed", K(ret)); + } else if (OB_UNLIKELY(parser->is_stop_parse())) { + LOG_INFO("parser is stopped", K(parser->get_last_errno())); + } else if (parser->add_text_node(ObMulModeNodeType::M_CDATA, + reinterpret_cast(value), + len)) { + LOG_WARN("parser cdata block failed", K(ret)); + } + + if (OB_FAIL(ret)) { + parser->stop_parse(ret); + } +} + +void ObLibXml2SaxHandler::comment(void* ctx, const xmlChar* value) +{ + INIT_SUCC(ret); + ObLibXml2SaxParser* parser = nullptr; + const char *src_value = reinterpret_cast(value); + if (OB_ISNULL(src_value)) { + LOG_DEBUG("empty comment ignore"); + } else if (OB_FAIL(get_parser(ctx, parser))) { + LOG_WARN("get_parser failed", K(ret)); + } else if (OB_UNLIKELY(parser->is_stop_parse())) { + LOG_INFO("parser is stopped", K(parser->get_last_errno())); + } else if (parser->add_text_node(ObMulModeNodeType::M_COMMENT, + src_value, + STRLEN(src_value))) { + LOG_WARN("parser comment failed", K(ret)); + } + + if (OB_FAIL(ret)) { + parser->stop_parse(ret); + } +} + +void ObLibXml2SaxHandler::processing_instruction(void *ctx, const xmlChar *target, const xmlChar *data) +{ + INIT_SUCC(ret); + ObLibXml2SaxParser* parser = nullptr; + const char *src_target = reinterpret_cast(target); + const char *src_data = reinterpret_cast(data); + int target_len = src_target == nullptr ? 0 : STRLEN(src_target); + int data_len = src_data == nullptr ? 0 : STRLEN(src_data); + + if (OB_FAIL(get_parser(ctx, parser))) { + LOG_WARN("get_parser failed", K(ret)); + } else if (OB_UNLIKELY(parser->is_stop_parse())) { + LOG_INFO("parser is stopped", ); + } else if (OB_FAIL(parser->processing_instruction(ObString(target_len, src_target), ObString(data_len, src_data)))) { + LOG_WARN("processing_instruction failed", K(ret)); + } + if (OB_FAIL(ret)) { + parser->stop_parse(ret); + } +} + +// internal DTD +void ObLibXml2SaxHandler::internal_subset(void *ctx, + const xmlChar *name, + const xmlChar *external_id, + const xmlChar *system_id) +{ + INIT_SUCC(ret); + ObLibXml2SaxParser* parser = nullptr; + + if (OB_FAIL(get_parser(ctx, parser))) { + LOG_WARN("get_parser failed", K(ret)); + } else if (OB_UNLIKELY(parser->is_stop_parse())) { + LOG_INFO("parser is stopped", K(parser->get_last_errno())); + } else { + ret = OB_ERR_PARSER_SYNTAX; + LOG_WARN("not supprt dtd"); + } + if (OB_FAIL(ret)) { + parser->stop_parse(ret); + } +} + +void ObLibXml2SaxHandler::entity_reference(void *ctx, const xmlChar *name) +{ + INIT_SUCC(ret); + ObLibXml2SaxParser* parser = nullptr; + + if (OB_FAIL(get_parser(ctx, parser))) { + LOG_WARN("get_parser failed", K(ret)); + } else if (parser->is_stop_parse()) { + LOG_INFO("parser is stopped", K(parser->get_last_errno())); + } else { + ret = OB_ERR_PARSER_SYNTAX; + LOG_WARN("not supprt custom enity"); + } + if (OB_FAIL(ret)) { + parser->stop_parse(ret); + } +} + +void ObLibXml2SaxHandler::structured_error(void *ctx, xmlErrorPtr error) +{ + INIT_SUCC(ret); + ObLibXml2SaxParser* parser = nullptr; + if (OB_FAIL(get_parser(ctx, parser))) { + LOG_WARN("get_parser failed", K(ret)); + } else if (parser->is_stop_parse()) { + LOG_INFO("parser is stopped", K(parser->get_last_errno())); + } else if (OB_ISNULL(error)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("input error_info is null", K(ret)); + } else if (OB_FAIL(parser->on_error(error->code))) { + LOG_WARN("parse error", K(error->code), K(error->line), K(error->int1), K(error->int2), KCSTRING(error->message)); + } +} + +// ObLibXml2SaxHandler end + +// ObLibXml2SaxParser + +static int create_memory_parser_ctxt(const ObString& xml_text, xmlParserCtxt*& ctxt) +{ + INIT_SUCC(ret); + xmlParserInputPtr input = nullptr; + xmlParserInputBufferPtr buf = nullptr; + + if (xml_text.empty()) { + // do nothing + } else if (OB_ISNULL(ctxt = xmlNewParserCtxt())) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("create parser ctxt failed", K(ret)); + } else if (OB_ISNULL(buf = xmlParserInputBufferCreateMem(xml_text.ptr(), xml_text.length(), XML_CHAR_ENCODING_NONE))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("create parser input buffer failed", K(ret)); + // free when error + xmlFreeParserCtxt(ctxt); + } else if (OB_ISNULL(input = xmlNewIOInputStream(ctxt, buf, XML_CHAR_ENCODING_NONE))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("create parser input failed", K(ret)); + xmlFreeParserInputBuffer(buf); + xmlFreeParserCtxt(ctxt); + } else if (xmlPushInput(ctxt, input) == -1) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("parser push input failed", K(ret)); + // free when error + xmlFreeInputStream(input); // this will free buf, so no need free buf + xmlFreeParserCtxt(ctxt); + } + if (OB_FAIL(ret)) { + ctxt = nullptr; + } + return ret; +} + +ObLibXml2SaxParser::~ObLibXml2SaxParser() +{ + if (OB_NOT_NULL(ctxt_)) { + ctxt_->sax = old_sax_; + ctxt_->_private = nullptr; + xmlFreeParserCtxt(ctxt_); + ctxt_ = nullptr; + } +} + +int ObLibXml2SaxParser::init(const ObString& xml_text, bool skip_start_blank) +{ + INIT_SUCC(ret); + + if (OB_ISNULL(allocator_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("allocator is null", K(ret)); + } else if (OB_FAIL(init_xml_text(xml_text, skip_start_blank))) { + LOG_WARN("init_xml_text failed", K(ret), K(xml_text)); + } else if (xml_text_.empty()) { + // ignore empty + } else if (OB_FAIL(init_parse_context())){ + LOG_WARN("create parser ctxt failed", K(ret)); + } + return ret; +} + +int ObLibXml2SaxParser::init_parse_context() +{ + INIT_SUCC(ret); + ObLibXml2SaxHandler::reset_libxml_last_error(); + xmlParserCtxt* ctxt = nullptr; + + if (OB_FAIL(create_memory_parser_ctxt(xml_text_, ctxt))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("create parser ctxt failed", K(ret)); + } else { + this->ctxt_ = ctxt; + ctxt->_private = this; + this->old_sax_ = ctxt->sax; + + xmlCtxtUseOptions(ctxt, XML_PARSE_IGNORE_ENC | XML_PARSE_NOENT); + if (options_ & OB_XML_PARSE_SYNTAX_CHECK) { + ctxt->sax = get_synax_handler(); + } else { + ctxt->sax = get_sax_handler(); + } + } + return ret; +} + +int ObLibXml2SaxParser::init_xml_text(const ObString& xml_text, bool skip_start_blank) +{ + INIT_SUCC(ret); + const char* src_ptr = xml_text.ptr(); + int64_t src_len = xml_text.length(); + int64_t pos = 0; + int64_t len = 0; + + // libxml2 will report error if with start whitespace + // so skip start whitespace as need + while(skip_start_blank && pos < src_len && isspace(src_ptr[pos])) { + ++pos; + } + len = src_len - pos; + if(len > 0) { + xml_text_.assign_ptr(src_ptr + pos, len); + } + return ret; +} + +void ObLibXml2SaxParser::stop_parse(int code) +{ + if (OB_NOT_NULL(ctxt_)) { + ctxt_->instate = XML_PARSER_EOF; + ctxt_->disableSAX = 1; + } + set_errno(code); + set_stop_parse(true); +} + +int ObLibXml2SaxParser::parse_document(const ObString& xml_text) +{ + INIT_SUCC(ret); + if (OB_FAIL(init(xml_text, true))) { + LOG_WARN("init failed", K(ret)); + } else if (xml_text_.empty()) { + ret = OB_ERR_PARSER_SYNTAX; + LOG_WARN("xml_text_ is empty or blank", K(ret)); + } else { + xmlParseDocument(ctxt_); + if (OB_FAIL(this->get_last_errno())) { + LOG_WARN("parse failed", K(ret), K(xml_text)); + } + ObLibXml2SaxHandler::reset_libxml_last_error(); + } + return ret; +} + +int ObLibXml2SaxParser::parse_content(const ObString& xml_text) +{ + INIT_SUCC(ret); + // In the content, there is no need to delete the leading null character. details as following: + // 1. Contains only text text, and there are blank characters at the beginning that need to be reserved + // 2. Including the element node, the empty characters at the beginning and in the middle should not be reserved + if (OB_FAIL(init(xml_text, false))) { + LOG_WARN("init failed", K(ret)); + } else if (OB_ISNULL(document_ = OB_NEWx(ObXmlDocument, allocator_, ObMulModeNodeType::M_CONTENT, ctx_))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("alloc failed", K(ret)); + } else if (OB_NOT_NULL(ctxt_)) { + this->set_cur_node(document_); + document_->set_flags(MEMBER_LAZY_SORTED); + ctxt_->instate = XML_PARSER_CONTENT; + ctxt_->str_xml = reinterpret_cast(ObXmlConstants::XML_STRING); + ctxt_->str_xmlns = reinterpret_cast(ObXmlConstants::XMLNS_STRING); + ctxt_->str_xml_ns = reinterpret_cast(ObXmlConstants::XML_NAMESPACE_SPECIFICATION_URI); + xmlParseContent(ctxt_); + if (OB_FAIL(this->get_last_errno())) { + LOG_WARN("parse failed", K(ret), K(xml_text)); + } else if (OB_UNLIKELY(! is_parsed_all_input())) { + ret = OB_ERR_PARSER_SYNTAX; + LOG_WARN("input not parsed fullly", K(ret), K(xml_text.length()), K(get_parse_byte_num())); + } else if (OB_FAIL(remove_prev_empty_text())) { + LOG_WARN("remove_prev_empty_text fail", K(ret)); + } + ObLibXml2SaxHandler::reset_libxml_last_error(); + } + return ret; +} + +int ObLibXml2SaxParser::check() +{ + INIT_SUCC(ret); + if (OB_ISNULL(ctxt_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("ctxt is null", K(ret)); + } else if (OB_ISNULL(allocator_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("allocator is null", K(ret)); + } + return ret; +} + +int ObLibXml2SaxParser::to_ob_xml_errno(int code) +{ + INIT_SUCC(ret); + switch (code) + { + case XML_ERR_UNSUPPORTED_ENCODING: + case XML_ERR_ENCODING_NAME: + break; + case XML_ERR_RESERVED_XML_NAME: + if (! is_content_allow_xml_decl()) { + ret = OB_ERR_PARSER_SYNTAX; + } + break; + default: + ret = OB_ERR_PARSER_SYNTAX; + break; + } + return ret; +} + +int ObLibXml2SaxParser::on_error(int code) +{ + INIT_SUCC(ret); + if (is_recover_mode()) { + } else if (OB_FAIL(to_ob_xml_errno(code))) { + this->stop_parse(ret); + } else { + // full recover mode + if (OB_NOT_NULL(ctxt_)) { + ctxt_->recovery = 1; + } + } + return ret; +} + +int ObLibXml2SaxParser::push_namespace(ObXmlAttribute* ns) +{ + INIT_SUCC(ret); + if (ns_cnt_stack_.size() <= 0 || OB_ISNULL(ns)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("ns_cnt_stack_ not push init", K(ret), KP(ns)); + } else { + ns_cnt_stack_[ns_cnt_stack_.size()-1]++; + if (OB_FAIL(ns_stack_.push_back(ns))) { + LOG_WARN("failed to push back ns", K(ret), KP(ns)); + } + } + return ret; +} + +int ObLibXml2SaxParser::pop_namespace() +{ + INIT_SUCC(ret); + int cur_ns_cnt = 0; + if (ns_cnt_stack_.size() > 0) { + if (OB_FAIL(ns_cnt_stack_.pop_back(cur_ns_cnt))) { + LOG_WARN("failed to pop back.", K(ret), K(cur_ns_cnt)); + } + for (int i = 0; OB_SUCC(ret) && i < cur_ns_cnt; ++i) { + ns_stack_.pop_back(); + } + } + return ret; +} + +int ObLibXml2SaxParser::get_namespace(const ObString& name, bool use_default_ns, ObXmlAttribute*& ns) +{ + INIT_SUCC(ret); + for (int i = ns_stack_.size() - 1; i >= 0; --i) { + ObXmlAttribute* cur_ns = ns_stack_[i]; + if (cur_ns->get_key().compare(name) == 0 + || (use_default_ns && name.empty() && cur_ns->get_key().compare(ObXmlConstants::XMLNS_STRING) == 0)) { + ns = cur_ns; + break; + } + } + if(nullptr == ns && !name.empty() && name.compare(ObXmlConstants::XML_STRING) != 0) { + ret = OB_ERR_PARSER_SYNTAX; + LOG_WARN("non-empty prefix can not find namespace", K(ret), K(name)); + } + return ret; +} + +static int get_xml_decl_str(xmlParserCtxt* context, const ObString& xml_text, ObString& xml_decl) +{ + INIT_SUCC(ret); + const char* ptr = xml_text.ptr(); + int32_t length = xml_text.length(); + int32_t end_pos = 0; + if (OB_NOT_NULL(context)) { + end_pos = context->input->cur - context->input->base + context->input->consumed; + if (end_pos >= 0 && end_pos <= length) { + xml_decl.assign_ptr(ptr, end_pos); + } else { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("end_pos invalid", K(ret), KP(ptr), K(length), K(end_pos)); + } + } else { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("context is null", K(ret), KP(ptr), K(length)); + } + return ret; +} + +int ObLibXml2SaxParser::set_xml_decl(const ObString& xml_decl_str) +{ + INIT_SUCC(ret); + ObXmlDocument* document = document_; + ObIAllocator* allocator = allocator_; + ObString src_version_str; + ObString src_encoding_str; + ObString src_standalone_str; + char* version_str = nullptr; + char* encoding_str = nullptr; + bool has_xml_decl = false; + bool has_version_value = false; + bool has_encoding_value = false; + bool has_standalone_value = false; + + if (OB_NOT_NULL(document) && OB_NOT_NULL(allocator)) { + if ((has_xml_decl = ObXmlParserUtils::has_xml_decl(xml_decl_str))) { + document->set_has_xml_decl(has_xml_decl); + if (OB_FAIL(ObXmlParserUtils::parse_xml_decl(xml_decl_str, + src_version_str, + has_version_value, + src_encoding_str, + has_encoding_value, + src_standalone_str, + has_standalone_value))) { + LOG_WARN("parse_xml_decl failed", K(ret), K(has_xml_decl), K(xml_decl_str)); + } else { + int version_length = src_version_str.length(); + int encoding_length = src_encoding_str.length(); + if (version_length > 0 && OB_ISNULL(version_str = static_cast(allocator->alloc(version_length)))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("alloc failed", K(ret), K(version_length)); + } else if (encoding_length > 0 && OB_ISNULL(encoding_str = static_cast(allocator->alloc(encoding_length)))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("alloc failed", K(ret), K(encoding_length)); + } else { + if (version_length > 0) { + MEMCPY(version_str, src_version_str.ptr(), version_length); + document->set_version(ObString(version_length, version_str)); + } + if (encoding_length > 0) { + MEMCPY(encoding_str, src_encoding_str.ptr(), encoding_length); + document->set_encoding(ObString(encoding_length, encoding_str)); + } + document->set_encoding_flag(0 == encoding_length && has_encoding_value); + document->set_standalone(ObXmlParserUtils::get_standalone_type(src_standalone_str)); + } + } + } + } else { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("allocator or document is null", K(ret), KP(allocator), KP(document)); + } + return ret; +} + +int ObLibXml2SaxParser::start_document() +{ + INIT_SUCC(ret); + ObString xml_decl_str; + if (OB_FAIL(this->check())) { + LOG_WARN("check failed", K(ret)); + } else if (OB_ISNULL(document_ = OB_NEWx(ObXmlDocument, allocator_, ObMulModeNodeType::M_DOCUMENT, ctx_))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("alloc failed", K(ret)); + } else if (OB_FAIL(document_->init())) { + LOG_WARN("document init failed", K(ret)); + } else { + document_->set_flags(MEMBER_LAZY_SORTED); + if (OB_FAIL(ObXmlParserBase::start_document(document_))) { + LOG_WARN("parser start_document failed", K(ret)); + } else if (OB_FAIL(get_xml_decl_str(ctxt_, xml_text_, xml_decl_str))) { + LOG_WARN("get xml decl string failed", K(ret)); + } else if (OB_FAIL(this->set_xml_decl(xml_decl_str))) { + LOG_WARN("set_xml_decl failed", K(ret)); + } + } + return ret; +} + +int ObLibXml2SaxParser::set_element_name(ObXmlElement& element, const char* src_name) +{ + INIT_SUCC(ret); + ObIAllocator* allocator = allocator_; + char* elem_name = nullptr; + int32_t elem_name_length = 0; + ObString qname; + ObString prefix; + ObString localname; + + if (OB_ISNULL(allocator)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("allocator is null", K(ret)); + } else if (OB_ISNULL(src_name)) { + // do nothin ignore + } else if ((elem_name_length = STRLEN(src_name)) > 0) { + if (src_name[0] == ':') { + ret = OB_ERR_PARSER_SYNTAX; + LOG_WARN("element-start tag is not well formed", K(ret), K(elem_name_length)); + } else if (OB_ISNULL(elem_name = static_cast(allocator->alloc(elem_name_length)))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("alloc failed", K(ret), K(elem_name_length)); + } else { + MEMCPY(elem_name, src_name, elem_name_length); + qname.assign_ptr(elem_name, elem_name_length); + if (OB_FAIL(ObXmlParserUtils::get_prefix_and_localname(qname, prefix, localname))) { + LOG_WARN("get_prefix_and_localname failed", K(ret), K(elem_name_length)); + } else { + element.set_prefix(prefix); + element.set_xml_key(localname); + } + } + } + return ret; +} + +int ObLibXml2SaxParser::escape_xml_text(const ObString& src_attr_value, ObString &dst_attr_value) +{ + INIT_SUCC(ret); + const char *src_value_ptr = src_attr_value.ptr(); + int src_len = src_attr_value.length(); + int dst_len = ObXmlParserUtils::get_xml_escape_str_length(src_attr_value); + ObString attr_value; + char *attr_value_ptr = nullptr; + if (OB_ISNULL(attr_value_ptr = static_cast(allocator_->alloc(dst_len)))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("alloc failed", K(ret), K(dst_len)); + } else if (OB_FALSE_IT(attr_value.assign_buffer(attr_value_ptr, dst_len))) { + } else if (OB_FAIL(ObXmlParserUtils::escape_xml_text(src_attr_value, attr_value))) { + LOG_WARN("escape_xml_text failed", K(ret), K(src_attr_value)); + } else { + dst_attr_value.assign_ptr(attr_value_ptr, dst_len); + } + return ret; +} + +int ObLibXml2SaxParser::construct_text_value(const ObString &src_attr_value, ObString &attr_value) +{ + INIT_SUCC(ret); + char *attr_value_ptr = nullptr; + int attr_value_len = 0; + if (! is_entity_replace()) { + if (OB_FAIL(escape_xml_text(src_attr_value, attr_value))) { + LOG_WARN("escape_attr_value failed", K(ret), K(src_attr_value)); + } + } else { + attr_value_len = src_attr_value.length(); + if (OB_ISNULL(attr_value_ptr = static_cast(allocator_->alloc(attr_value_len)))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("alloc failed", K(ret), K(attr_value_len)); + } else { + MEMCPY(attr_value_ptr, src_attr_value.ptr(), attr_value_len); + attr_value.assign_ptr(attr_value_ptr, attr_value_len); + } + } + return ret; +} + +int ObLibXml2SaxParser::add_element_attr(ObXmlElement& element, const char* src_attr_name, const char* src_attr_value) +{ + INIT_SUCC(ret); + ObIAllocator* allocator = allocator_; + ObXmlAttribute* attr = nullptr; + char* attr_name = nullptr; + int64_t attr_name_length = 0; + int64_t attr_value_length = 0; + ObString src_attr_value_str; + ObString attr_value; + ObString qname; + ObString prefix; + ObString localname; + + if (OB_ISNULL(allocator)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("allocator is null", K(ret)); + } else if (OB_ISNULL(attr = OB_NEWx(ObXmlAttribute, allocator, ObMulModeNodeType::M_ATTRIBUTE, ctx_))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("alloc failed", K(ret)); + } else { + attr_name_length = src_attr_name == nullptr ? 0 : STRLEN(src_attr_name); + attr_value_length = src_attr_value == nullptr ? 0 : STRLEN(src_attr_value); + src_attr_value_str.assign_ptr(src_attr_value, attr_value_length); + + if (OB_SUCC(ret) && attr_name_length > 0) { + if (src_attr_name[0] == ':') { + ret = OB_ERR_PARSER_SYNTAX; + LOG_WARN("element-start tag is not well formed", K(ret), KCSTRING(src_attr_name)); + } else if (OB_ISNULL(attr_name = static_cast(allocator->alloc(attr_name_length)))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("alloc failed", K(ret), K(attr_name_length)); + } else { + MEMCPY(attr_name, src_attr_name, attr_name_length); + qname.assign_ptr(attr_name, attr_name_length); + if (OB_FAIL(ObXmlParserUtils::get_prefix_and_localname(qname, prefix, localname))) { + LOG_WARN("get_prefix_and_localname failed", K(ret), K(attr_name_length), KCSTRING(src_attr_name)); + } else { + attr->set_prefix(prefix); + attr->set_xml_key(localname); + } + } + } + + if (OB_SUCC(ret) && attr_value_length > 0) { + if (OB_FAIL(construct_text_value(src_attr_value_str, attr_value))) { + LOG_WARN("construct_text_value failed", K(ret), K(src_attr_value_str)); + } else { + attr->set_value(attr_value); + } + } + + if (OB_SUCC(ret)) { + if (qname.compare("xmlns:") == 0) { + ret = OB_ERR_PARSER_SYNTAX; + LOG_WARN("ns is invalid", K(ret), KPC(attr), K(attr->get_prefix())); + } else if (ObXmlParserUtils::is_namespace_attribute(attr)) { + attr->set_xml_type(ObMulModeNodeType::M_NAMESPACE); + if (this->is_document_parse() && attr_value.empty() && !prefix.empty()) { + ret = OB_ERR_PARSER_SYNTAX; + LOG_WARN("attr_value is empty", K(ret), K(attr_value)); + } else if (!prefix.empty() && (localname.compare("xml") == 0 || (localname.compare("xmlns") == 0))) { + // "xml" and "xmlns" are reserved words and their use is prohibited + ret = OB_ERR_PARSER_SYNTAX; + LOG_WARN("ns is invalid", K(ret), KPC(attr), K(localname)); + } else if (OB_FAIL(element.add_attribute(attr))) { + LOG_WARN("add_attribute failed", K(ret)); + } else if (OB_FAIL(this->push_namespace(attr))) { + LOG_WARN("push_namespace failed", K(ret)); + } + } else if (OB_FAIL(element.add_attribute(attr))) { + LOG_WARN("add_attribute failed", K(ret)); + } + } + } + return ret; +} + +int ObLibXml2SaxParser::set_element_namespace(ObXmlElement& element) { + INIT_SUCC(ret); + ObXmlAttribute* elem_ns = nullptr; + ObXmlAttribute* attr_ns = nullptr; + ObLibContainerNode* attributes = nullptr; + + if (OB_FAIL(this->get_namespace(element.get_prefix(), true, elem_ns))) { + LOG_WARN("get element namespace failed", K(ret), K(element.get_prefix())); + } else { + element.set_ns(elem_ns); + int attr_size = element.attribute_size(); + for (int i = 0; OB_SUCC(ret) && i < attr_size; ++i) { + ObXmlAttribute* attr = NULL; + if (OB_FAIL(element.get_attribute(attr, i))) { + LOG_WARN("get attribute failed", K(ret), K(i)); + } else if (OB_ISNULL(attr)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("fail to get attr", K(ret)); + } else if (attr->type() == ObMulModeNodeType::M_ATTRIBUTE) { + if (OB_FAIL(this->get_namespace(attr->get_prefix(), false, attr_ns))) { + LOG_WARN("get attribute namespace failed", K(ret), K(i)); + } else { + attr->set_ns(attr_ns); + attr_ns = nullptr; + } + } + } + } + return ret; +} + +int ObLibXml2SaxParser::start_element(const char* name, const char** attrs) +{ + INIT_SUCC(ret); + ObXmlElement* element = nullptr; + if (OB_FAIL(this->check())) { + LOG_WARN("check failed", K(ret)); + } else if (OB_ISNULL(element = OB_NEWx(ObXmlElement, allocator_, ObMulModeNodeType::M_ELEMENT, ctx_))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("alloc failed", K(ret)); + } else if (OB_FAIL(element->init())) { + LOG_WARN("element init failed", K(ret)); + } else if (FALSE_IT(element->set_flags(MEMBER_LAZY_SORTED))) { + } else if(OB_FAIL(ns_cnt_stack_.push_back(0))) { + LOG_WARN("ns_cnt_stack_ current ns cnt init failed", K(ret)); + } else if (OB_FAIL(this->set_element_name(*element, name))) { + LOG_WARN("set_element_name failed", K(ret)); + } + + if (OB_SUCC(ret) && OB_NOT_NULL(attrs)) { + for (const char** cur = attrs; OB_SUCC(ret) && cur && *cur; cur += 2) { + if (OB_FAIL(this->add_element_attr(*element, *cur, *(cur + 1)))) { + LOG_WARN("add_element_attr failed", K(ret)); + } + } + } + + if (OB_SUCC(ret)) { + if (OB_FAIL(this->set_element_namespace(*element))) { + LOG_WARN("parser set_element_namespace failed", K(ret)); + } else if (OB_FAIL(ObXmlParserBase::start_element(element))) { + LOG_WARN("parser start_element failed", K(ret)); + } + } + + return ret; +} + +static bool is_empty_element_tag(xmlParserCtxt* ctxt) +{ + bool res = false; + if (OB_NOT_NULL(ctxt->input->cur) && ctxt->input->cur - ctxt->input->base > 2) { + const xmlChar* c1 = ctxt->input->cur - 1; + const xmlChar* c2 = ctxt->input->cur - 2; + if (*c1 == '>' && *c2 == '/') { + res = true; + } + } + return res; +} + +int ObLibXml2SaxParser::end_element() +{ + INIT_SUCC(ret); + ObXmlElement* element = nullptr; + if (OB_FAIL(this->check())) { + LOG_WARN("check failed", K(ret)); + } else if (OB_ISNULL(element = + ObXmlUtil::xml_node_cast(cur_node_, ObMulModeNodeType::M_ELEMENT))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("end element current node not element", K(ret), K(cur_node_->type())); + } else { + element->set_empty(is_empty_element_tag(ctxt_)); + if (OB_FAIL(ObXmlParserBase::end_element())) { + LOG_WARN("parser end_element failed", K(ret)); + } else if (OB_FAIL(this->pop_namespace())) { + LOG_WARN("pop_namespace failed", K(ret)); + } + } + return ret; +} + +int ObLibXml2SaxParser::alloc_text_node(ObMulModeNodeType type, + const char* src_value, + int value_len, + ObXmlText*& node) +{ + INIT_SUCC(ret); + char* str = nullptr; + if (OB_FAIL(this->check())) { + LOG_WARN("check failed", K(ret)); + } else if (OB_ISNULL(node = OB_NEWx(ObXmlText, allocator_, type, ctx_))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("alloc failed", K(ret)); + } else if (value_len > 0) { + if (OB_ISNULL(str = static_cast(allocator_->alloc(value_len)))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("alloc failed", K(ret), K(value_len)); + } else { + MEMCPY(str, src_value, value_len); + node->set_value(ObString(value_len, str)); + } + } + return ret; +} + +int ObLibXml2SaxParser::add_text_node(ObMulModeNodeType type, const char* value, int len) +{ + INIT_SUCC(ret); + ObXmlText* cdata = nullptr; + if (OB_FAIL(this->check())) { + LOG_WARN("check failed", K(ret)); + } else if (OB_FAIL(this->alloc_text_node(type, + value, + len, + cdata))) { + LOG_WARN("alloc_text_node failed", K(ret)); + } else if (OB_FAIL(ObXmlParserBase::add_text_node(cdata))) { + LOG_WARN("cdata_block failed", K(ret)); + } + return ret; +} + +static bool is_char_entity_ref(xmlParserCtxt* ctxt, ObString& ref) +{ + bool res = false; + const xmlChar* cur = ctxt->input->cur - 1; + const xmlChar* base = ctxt->input->base; + // char entity ref max length + const int MAX_CHAR_REF_LENGTH = 20; + int len = 0; + if (cur > base && *cur == ';') { + --cur; + ++len; + while (cur > base && *cur != '&' && *cur !=';' && len < MAX_CHAR_REF_LENGTH) { + --cur; + ++len; + } + if (cur >= base && *cur == '&') { + ref.assign_ptr(reinterpret_cast(cur), len+1); + res = true; + } + } + return res; +} + +int ObLibXml2SaxParser::characters(const char *ch, int len) +{ + INIT_SUCC(ret); + ObString data; + if (OB_FAIL(this->check())) { + LOG_WARN("check failed", K(ret)); + } else { + // libxml2 will replace entity ref, so if need replace, just copy. + // when parse characters in libxml2, if is plain text, ctxt_->input->cur points same with ch. + // if is predefined entity ref or char ref, ctxt_->input->cur points the end of ref + if (! is_entity_replace() + && reinterpret_cast(ctxt_->input->cur) != ch + && is_char_entity_ref(ctxt_, data)) { + LOG_DEBUG("replace character with entity", K(ObString(len, ch)), K(data)); + } else { + data.assign_ptr(ch, len); + } + if (OB_FAIL(this->add_or_merge_text(data))) { + LOG_WARN("parser add or merge text failed", K(ret), K(data)); + } + } + return ret; +} + +int ObLibXml2SaxParser::processing_instruction(const ObString& target, const ObString& data) +{ + INIT_SUCC(ret); + ObXmlAttribute* pi = nullptr; + const char *src_target = target.ptr(); + const char *src_data = data.ptr(); + int name_len = target.length(); + int value_len = data.length(); + char* name = nullptr; + char* value = nullptr; + + if (OB_FAIL(this->check())) { + LOG_WARN("check failed", K(ret)); + } else if (OB_ISNULL(pi = OB_NEWx(ObXmlAttribute, allocator_, ObMulModeNodeType::M_INSTRUCT, ctx_))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("alloc failed", K(ret)); + } else if (name_len > 0 && OB_ISNULL(name = static_cast(allocator_->alloc(name_len)))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("alloc failed", K(ret), K(name_len)); + } else if (value_len > 0 && OB_ISNULL(value = static_cast(allocator_->alloc(value_len)))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("alloc failed", K(ret), K(value_len)); + } else { + if (name_len > 0)MEMCPY(name, src_target, name_len); + if (value_len > 0)MEMCPY(value, src_data, value_len); + pi->set_xml_key(ObString(name_len, name)); + pi->set_value(ObString(value_len, value)); + if (OB_FAIL(ObXmlParserBase::processing_instruction(pi))) { + LOG_WARN("processing_instruction failed", K(ret)); + } + } + return ret; +} + +int ObLibXml2SaxParser::get_parse_byte_num() +{ + int num = 0; + if (OB_ISNULL(ctxt_)) { + } else { + num = ctxt_->input->cur - ctxt_->input->base + ctxt_->input->consumed; + } + return num; +} + +bool ObLibXml2SaxParser::is_parsed_all_input() +{ + return get_parse_byte_num() == xml_text_.length(); +} + +// ObLibXml2SaxParser end + +} // namespace common +} // namespace oceanbase diff --git a/deps/oblib/src/lib/xml/ob_libxml2_sax_handler.h b/deps/oblib/src/lib/xml/ob_libxml2_sax_handler.h new file mode 100644 index 0000000000..31623403c9 --- /dev/null +++ b/deps/oblib/src/lib/xml/ob_libxml2_sax_handler.h @@ -0,0 +1,47 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ +#ifndef OCEANBASE_LIBXML2_SAX_HANDLER_H_ +#define OCEANBASE_LIBXML2_SAX_HANDLER_H_ +#include "lib/xml/ob_xml_parser.h" +#include "libxml2/libxml/parser.h" +namespace oceanbase { +namespace common { +struct ObLibXml2SaxHandler +{ +public: + // libxml2 sax callback start + static void start_document(void* ctx); + static void end_document(void* ctx); + // used for sax1 + static void start_element(void* ctx, const xmlChar* name, const xmlChar** p); + static void end_element(void* ctx, const xmlChar* name); + static void characters(void* ctx, const xmlChar* ch, int len); + static void cdata_block(void* ctx, const xmlChar* value, int len); + static void comment(void* ctx, const xmlChar* value); + static void processing_instruction(void *ctx, const xmlChar *target, const xmlChar *data); + static void internal_subset(void *ctx, + const xmlChar *name, + const xmlChar *external_id, + const xmlChar *system_id); + static void entity_reference(void *ctx, const xmlChar *name); + // for error msg + static void structured_error(void *ctx, xmlErrorPtr error); + // libxml2 sax callback end + // helper method + static int get_parser(void* ctx, ObLibXml2SaxParser*& parser); + static void init(); + static void destroy(); + static void reset_libxml_last_error(); +}; +} // namespace common +} // namespace oceanbase +#endif //OCEANBASE_LIBXML2_SAX_HANDLER_H_ \ No newline at end of file diff --git a/deps/oblib/src/lib/xml/ob_mul_mode_reader.cpp b/deps/oblib/src/lib/xml/ob_mul_mode_reader.cpp new file mode 100644 index 0000000000..2868cd8cb5 --- /dev/null +++ b/deps/oblib/src/lib/xml/ob_mul_mode_reader.cpp @@ -0,0 +1,269 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + * This file contains interface implement for multi mode reader abstraction. + */ +#define USING_LOG_PREFIX SQL_ENG +#include "lib/ob_errno.h" +#include "lib/utility/ob_macro_utils.h" +#include "lib/ob_define.h" +#include "lib/container/ob_array_iterator.h" +#include "lib/xml/ob_mul_mode_reader.h" +#include "lib/xml/ob_xml_util.h" + +namespace oceanbase { +namespace common { + + +int ObMulModeReader::check_if_match(bool& is_match, bool& filtered, ObIMulModeBase* base) +{ + INIT_SUCC(ret); + is_match = false; + filtered = false; + if (seek_info_.type_ == KEY_TYPE) { + ObString tmp_key; + if (OB_FAIL(base->get_key(tmp_key))) { + LOG_WARN("fail to get key match children xnode.", K(ret)); + } else { + is_match = (tmp_key.compare(seek_info_.key_) == 0); + } + } else if (seek_info_.type_ == ALL_ARR_TYPE || seek_info_.type_ == ALL_KEY_TYPE) { + is_match = true; + } + if (!is_match) { + } else if (OB_ISNULL(seek_info_.filter_)) { + filtered = true; + } else if (OB_FAIL((*seek_info_.filter_)(base, filtered))) { + LOG_WARN("fail to filter xnode", K(ret)); + } + return ret; +} + +void ObMulModeReader::reuse() +{ + is_filtered_ = true; + init(); +} + +void ObMulModeReader::init() +{ + if (OB_NOT_NULL(cur_)) { + bool is_simple_scan = false; + bool is_ordered_scan = false; + bool is_attr_scan = false; + + if (!(flags_ & SEEK_FLAG)) { + is_simple_scan = true; + } else if (seek_info_.type_ == ALL_ARR_TYPE || seek_info_.type_ == ALL_KEY_TYPE) { + is_simple_scan = true; + } else if (seek_info_.type_ == KEY_TYPE) { + is_ordered_scan = true; + } else if (seek_info_.type_ == ATTR_KEY) { + is_attr_scan = true; + } + + if (is_simple_scan) { + if (cur_->is_binary()) { + ObXmlBin* tmp = static_cast(cur_); + new (&bin_iter_) ObXmlBinIterator(tmp); + bin_iter_.set_range(tmp->get_child_start(), tmp->get_child_start() + tmp->count()); + } else { + new (&tree_iter_) ObXmlNode::iterator(((ObXmlNode*)cur_)->begin()); + } + } else if (is_ordered_scan) { + if (cur_->is_binary()) { + ObXmlBin* bin = static_cast(cur_); + + if (!ObXmlUtil::is_container_tc(cur_->type())) { + bin_iter_.set_range(0, 0); + } else { + int64_t low = bin->low_bound(seek_info_.key_); + int64_t upper = bin->up_bound(seek_info_.key_); + + new (&bin_iter_) ObXmlBinIterator(static_cast(cur_), true); + bin_iter_.set_range(low, upper); + } + } else { + IterRange range; + ObXmlNode* node = static_cast(cur_); + node->ObLibContainerNode::get_children(seek_info_.key_, range); + new (&tree_iter_) ObXmlNode::iterator(node->sorted_begin()); + + tree_iter_.set_range(range.first - tree_iter_, range.second - tree_iter_ + 1); + } + } else if (is_attr_scan) { + if (cur_->is_binary()) { + new (&bin_iter_) ObXmlBinIterator(static_cast(cur_)); + ObXmlBin* bin = static_cast(cur_); + bin_iter_.set_range(0, bin->get_child_start()); + } else { + ObXmlNode* handle = nullptr; + if (!(cur_->type() == M_ELEMENT || cur_->type() == M_DOCUMENT)) { + new (&tree_iter_) ObXmlNode::iterator((static_cast(cur_))->sorted_begin()); + tree_iter_.set_range(0, 0); + } else if (OB_ISNULL(handle = static_cast(cur_->get_attribute_handle()))) { + new (&tree_iter_) ObXmlNode::iterator((static_cast(cur_))->sorted_begin()); + tree_iter_.set_range(0, 0); + } else { + new (&tree_iter_) ObXmlNode::iterator(handle->begin()); + } + } + } + } +} + +int ObMulModeReader::eval_entry(ObIMulModeBase*& node) +{ + node = cur_; + return OB_SUCCESS; +} + +void ObMulModeReader::alter_seek_param(const ObPathSeekInfo& info) +{ + seek_info_ = info; +} + +void ObMulModeReader::alter_filter(ObMulModeFilter* filter) +{ + seek_info_.filter_ = filter; +} + + +int ObMulModeReader::attr_next(ObIMulModeBase*& node, ObMulModeNodeType filter_type) +{ + INIT_SUCC(ret); + + if (OB_ISNULL(cur_) || cur_->data_type() != OB_XML_TYPE || seek_info_.type_ != ATTR_KEY) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("cur_ is null or data is not xml type not supported yet.", K(ret), KP(cur_), K(seek_info_.type_)); + } else { + bool is_found = false; + + for (; OB_SUCC(ret) && !is_found; ) { + if (cur_->is_binary()) { + if (!bin_iter_.is_valid()) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("bin iter is invalid.", K(ret), KP(cur_)); + } else if (bin_iter_.end()) { + ret = OB_ITER_END; + } else { + node = *bin_iter_; + ++bin_iter_; + } + } else { + if (tree_iter_.end()) { + ret = OB_ITER_END; + } else { + node = *tree_iter_; + ++tree_iter_; + } + } + + is_found = false; + if (OB_FAIL(ret)) { + } else if (OB_ISNULL(seek_info_.filter_)) { + is_found = true; + } else if (OB_FAIL((*seek_info_.filter_)(node, is_found))) { + LOG_WARN("failed to filter node.", K(ret)); + } + } + } + + return ret; +} + +int ObMulModeReader::scan_next(ObIMulModeBase*& node) +{ + INIT_SUCC(ret); + bool is_found = false; + ObXmlNode::iterator save_iterator; + ObXmlBin::iterator save_bin; + + while (OB_SUCC(ret) && !is_found) { + if (cur_->is_binary()) { + if (!bin_iter_.is_valid()) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("bin iter is invalid.", K(ret), KP(cur_)); + } else if (bin_iter_.end()) { + ret = OB_ITER_END; + } else { + save_bin = bin_iter_; + node = *bin_iter_; + ++bin_iter_; + } + } else { + if (tree_iter_.end()) { + ret = OB_ITER_END; + } else { + save_iterator = tree_iter_; + node = *tree_iter_; + ++tree_iter_; + } + } + + if (OB_SUCC(ret)) { + if (OB_ISNULL(seek_info_.filter_)) { + is_found = true; + } else if (OB_FAIL((*seek_info_.filter_)(node, is_found))) { + LOG_WARN("failed to filter node.", K(ret)); + } + } + } + + return ret; +} + +int ObMulModeReader::next(ObIMulModeBase*& node) +{ + INIT_SUCC(ret); + + if (OB_ISNULL(cur_) || cur_->data_type() != OB_XML_TYPE) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("cur_ is null or data is not xml type not supported yet.", K(ret), KP(cur_)); + } else { + if (!(flags_ & SEEK_FLAG)) { + if (OB_FAIL(scan_next(node))) { + LOG_WARN("fail to filter next node.", K(ret)); + } + } else if (seek_info_.type_ == KEY_TYPE) { + if (get_mul_mode_tc(cur_->type()) != MulModeContainer) { + ret = OB_ITER_END; + } else if (OB_FAIL(scan_next(node))) { + LOG_WARN("fail to get key match children xnode.", K(ret)); + } + } else if (seek_info_.type_ == INDEX_TYPE) { + node = cur_->at(seek_info_.index_); + if (OB_ISNULL(node)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("fail to get node.", K(ret), K(seek_info_.index_)); + } + } else if (seek_info_.type_ == ALL_ARR_TYPE || seek_info_.type_ == ALL_KEY_TYPE) { + if (get_mul_mode_tc(cur_->type()) != MulModeContainer) { + ret = OB_ITER_END; + } else if (OB_FAIL(scan_next(node))) { + LOG_WARN("fail to filter next node.", K(ret)); + } + } else { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("fail to all children xnode.", K(ret), K(seek_info_.type_), K(cur_->data_type())); + } + } + + return ret; +} + +int ObMulModeReader::get_children_nodes(ObIArray& nodes) +{ + INIT_SUCC(ret); + return ret; +} + +} // namespace common +} // namespace oceanbase diff --git a/deps/oblib/src/lib/xml/ob_mul_mode_reader.h b/deps/oblib/src/lib/xml/ob_mul_mode_reader.h new file mode 100644 index 0000000000..4392d3e52d --- /dev/null +++ b/deps/oblib/src/lib/xml/ob_mul_mode_reader.h @@ -0,0 +1,185 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + * This file contains interface define for multi mode reader abstraction. + */ + +#ifndef OCEANBASE_SQL_OB_MULTI_MODE_READER +#define OCEANBASE_SQL_OB_MULTI_MODE_READER + +#include "lib/xml/ob_multi_mode_interface.h" +#include "ob_tree_base.h" +#include "lib/xml/ob_xml_tree.h" +#include "ob_multi_mode_bin.h" +#include "lib/xml/ob_xml_bin.h" +#include "lib/container/ob_array_iterator.h" +#include "lib/number/ob_number_v2.h" // for number::ObNumber + +namespace oceanbase { +namespace common { + + +enum SimpleSeekType { + ATTR_KEY, + KEY_TYPE, // .KEY + INDEX_TYPE, // [IDX] + ALL_ARR_TYPE, // all element + ALL_KEY_TYPE, // element + POST_SCAN_TYPE, + PRE_SCAN_TYPE +}; + +class ObPathLocationNode; + +typedef struct ObPathSeekInfo { + SimpleSeekType type_; + ObMulModeFilter* filter_; + ObString key_; + int64_t index_; + + ObPathSeekInfo() + : type_(ALL_ARR_TYPE), + filter_(nullptr), + key_(), + index_(-1) {} + + ObPathSeekInfo(SimpleSeekType seek_type) + : ObPathSeekInfo() + { + type_ = seek_type; + } + + ObPathSeekInfo(ObMulModeFilter* filter) + : ObPathSeekInfo() + { + filter_ = filter; + } + + ObPathSeekInfo(const ObPathSeekInfo& from) + : type_(from.type_), + filter_(from.filter_), + key_(from.key_), + index_(from.index_) + { + } + + ObPathSeekInfo& operator=(const ObPathSeekInfo& from) + { + type_ = from.type_; + filter_ = from.filter_; + key_ = from.key_; + index_= from.index_; + return *this; + } +} ObPathSeekInfo; + +class ObIMulModeBase; + +struct ObMulModeReader { + friend class ObSeekIterator; + friend class ObSeekComplexIterator; + enum MulModeIterFlag { + DEFAULT_FLAG, + SEEK_FLAG = 0x01 + }; + + ~ObMulModeReader() {} + + // construct + ObMulModeReader(ObIMulModeBase* node, MulModeIterFlag flag = DEFAULT_FLAG) + : cur_(node), + flags_(flag), + is_eval_cur_(false), + is_filtered_(false), + seek_info_() + { + init(); + } + + ObMulModeReader(const ObMulModeReader& from) + { + cur_ = from.cur_; + flags_ = from.flags_; + seek_info_ = from.seek_info_; + is_eval_cur_ = from.is_eval_cur_; + is_filtered_= from.is_filtered_; + init(); + } + + ObMulModeReader(ObIMulModeBase* node, const ObPathSeekInfo& info) + { + seek_info_ = info; + is_eval_cur_ = false; + is_filtered_ = false; + cur_ = node; + flags_ = SEEK_FLAG; + init(); + } + void construct(ObIMulModeBase* node, const ObPathSeekInfo& info) + { + cur_ = node; + seek_info_ = info; + is_eval_cur_ = false; + is_filtered_ = false; + flags_ = SEEK_FLAG; + init(); + } + int next(ObIMulModeBase*& node); + + int attr_next(ObIMulModeBase*& node, ObMulModeNodeType filter_type); + + int eval_entry(ObIMulModeBase*& node); + + void set_entry(ObIMulModeBase* node) { + cur_ = node; + init(); + } + void reuse(); + int check_if_match(bool& is_match, bool& filtered, ObIMulModeBase* base); + + void alter_seek_param(const ObPathSeekInfo& info); + + void alter_filter(ObMulModeFilter* filter); + + + // for compile + + int get_parent_node(ObIMulModeBase*& nodes) { + return 0; + } + + int get_upward_nodes(ObIArray& nodes) { + return 0; + } + + int get_children_nodes(ObIArray& nodes); + +protected: + void init(); + int scan_next(ObIMulModeBase*& node); + +protected: + ObIMulModeBase* cur_; + + uint32_t flags_; + bool is_eval_cur_; + bool is_filtered_; + ObPathSeekInfo seek_info_; + union { + ObXmlBin::iterator bin_iter_; + ObXmlNode::iterator tree_iter_; + }; +}; + + +} // namespace common +} // namespace oceanbase + +#endif // OCEANBASE_SQL_OB_MULTI_MODE_READER diff --git a/deps/oblib/src/lib/xml/ob_multi_mode_bin.cpp b/deps/oblib/src/lib/xml/ob_multi_mode_bin.cpp new file mode 100644 index 0000000000..ae0b914e89 --- /dev/null +++ b/deps/oblib/src/lib/xml/ob_multi_mode_bin.cpp @@ -0,0 +1,812 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + * This file contains implement for the xml & json type data basic interface abstraction. + */ +#define USING_LOG_PREFIX LIB + +#include "lib/string/ob_string_buffer.h" +#include "lib/xml/ob_multi_mode_interface.h" +#include "lib/xml/ob_xml_tree.h" +#include "lib/xml/ob_multi_mode_bin.h" + +namespace oceanbase { +namespace common { + +class ObIMulModeBase; + + + +ObMulBinHeaderSerializer::ObMulBinHeaderSerializer( + ObStringBuffer* buffer, + ObMulModeNodeType type, + uint64_t total_size, + uint64_t count) + : buffer_(buffer), + begin_(buffer->length()), + total_(total_size), + count_(count) +{ + type_ = type; + + obj_var_size_type_ = ObMulModeVar::get_var_type(total_); + entry_var_size_type_ = ObMulModeVar::get_var_type(total_); + count_var_size_type_ = ObMulModeVar::get_var_type(count_); + + obj_var_size_ = ObMulModeVar::get_var_size(obj_var_size_type_); + entry_var_size_ = obj_var_size_; + count_var_size_ = ObMulModeVar::get_var_size(count_var_size_type_); + + count_var_offset_ = MUL_MODE_BIN_HEADER_LEN; + if (is_extend_type(type)) { + count_var_offset_++; + } + + obj_var_offset_ = count_var_offset_ + count_var_size_; +} + +void ObMulBinHeaderSerializer::set_var_value(uint8_t var_size, uint8_t offset, uint64_t value) +{ + if (var_size == 1) { + *reinterpret_cast(buffer_->ptr() + begin_ + offset) = static_cast(value); + } else if (var_size == 2) { + *reinterpret_cast(buffer_->ptr() + begin_ + offset) = static_cast(value); + } else if (var_size == 4) { + *reinterpret_cast(buffer_->ptr() + begin_ + offset) = static_cast(value); + } else { + *reinterpret_cast(buffer_->ptr() + begin_ + offset) = value; + } +} + +void ObMulBinHeaderSerializer::set_obj_size(uint64_t size) +{ + set_var_value(obj_var_size_, obj_var_offset_, size); +} + +void ObMulBinHeaderSerializer::set_count(uint64_t size) +{ + set_var_value(count_var_size_, count_var_offset_, size); +} + +ObMulBinHeaderSerializer::ObMulBinHeaderSerializer(const char* data, uint64_t length) + : data_(data), + data_len_(length) +{ +} + +int ObMulBinHeaderSerializer::serialize() +{ + INIT_SUCC(ret); + if (OB_FAIL(buffer_->reserve(MUL_MODE_BIN_HEADER_LEN))) { + LOG_WARN("failed to reserve", K(ret), K(buffer_->length())); + } else if (is_scalar_data_type(type_)) { + if (is_extend_type(type_)) { + ObMulModeExtendStorageType tmp = get_extend_storage_type(type_); + if (OB_FAIL(buffer_->append(reinterpret_cast(&tmp.first), sizeof(uint8_t))) + || OB_FAIL(buffer_->append(reinterpret_cast(&tmp.second), sizeof(uint8_t)))) + LOG_WARN("failed to append", K(ret), K(buffer_->length())); + } else if (OB_FAIL(buffer_->append(reinterpret_cast(&type_), sizeof(uint8_t)))) { + LOG_WARN("failed to append", K(ret), K(buffer_->length())); + } + } else if (OB_FAIL(buffer_->reserve(header_size()))) { + LOG_WARN("failed to reserve", K(ret), K(buffer_->length())); + } else { + buffer_->set_length(start() + header_size()); + new (buffer_->ptr() + start())ObMulModeBinHeader(static_cast(type_), + ObMulModeVar::get_var_type(total_), + ObMulModeVar::get_var_type(count_), + ObMulModeVar::get_var_type(total_), + static_cast(1)); + + if (is_extend_type(type_)) { + ObMulModeExtendStorageType tmp = get_extend_storage_type(type_); + *reinterpret_cast(buffer_->ptr() + start()) = static_cast(tmp.first); + *reinterpret_cast(buffer_->ptr() + start() + MUL_MODE_BIN_HEADER_LEN) = static_cast(tmp.first); + } + set_obj_size(total_); + set_count(count_); + } + + return ret; +} + +int ObMulBinHeaderSerializer::deserialize() +{ + INIT_SUCC(ret); + if (data_len_ < 1) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("failed to deserialize, data len is 0", K(ret)); + } else { + type_ = static_cast(*data_); + if (is_scalar_data_type(type_) && is_extend_type(type_)) { + if (data_len_ <= 2) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("failed to deserialize, data len is 2", K(ret), K(type_), K(data_len_)); + } else { + type_ = eval_data_type(type_, static_cast(data_[1])); + } + } else if (is_scalar_data_type(type_)) { + } else if (data_len_ <= 2) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("failed to deserialize, data len less than 2", K(ret), K(type_), K(data_len_)); + } else { + const ObMulModeBinHeader* header = reinterpret_cast(data_); + obj_var_size_ = ObMulModeVar::get_var_size(header->obj_size_type_); + entry_var_size_ = ObMulModeVar::get_var_size(header->kv_entry_size_type_); + count_var_size_ = ObMulModeVar::get_var_size(header->count_size_type_); + count_var_offset_ = MUL_MODE_BIN_HEADER_LEN; + + obj_var_size_type_ = header->obj_size_type_; + entry_var_size_type_ = header->kv_entry_size_type_; + count_var_size_type_ = header->count_size_type_; + + if (is_extend_type(type_)) { + count_var_offset_++; + } + obj_var_offset_ = count_var_offset_ + count_var_size_; + + if (obj_var_offset_ + obj_var_size_ > data_len_) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("failed to deserialize, data len less than 2", K(ret), K(type_), + K(data_len_), K(entry_var_size_), K(count_var_size_), K(obj_var_size_)); + } else { + if (is_extend_type(type_)) { + type_ = eval_data_type(type_, data_[2]); + } + ObMulModeVar::read_size_var(data_ + obj_var_offset_, obj_var_size_, &total_); + ObMulModeVar::read_size_var(data_ + count_var_offset_, count_var_size_, &count_); + } + } + } + + return ret; +} + +int ObMulModeScalarSerializer::serialize_scalar_header(ObMulModeNodeType type, ObStringBuffer& buffer) +{ + INIT_SUCC(ret); + + bool is_extend = is_extend_type(type); + ObMulModeExtendStorageType extend_type; + + if (is_extend) { + extend_type = get_extend_storage_type(type); + } + + uint8_t reserve_size = sizeof(uint8_t) + (is_extend ? sizeof(uint8_t) : 0); + uint64_t pos = buffer.length(); + if (OB_FAIL(buffer.reserve(reserve_size))) { + LOG_WARN("failed to reserve size for type header", K(ret), K(buffer.length())); + } else { + char* data = buffer.ptr(); + if (is_extend) { + data[pos++] = extend_type.first; + data[pos++] = extend_type.second; + } else { + data[pos++] = static_cast(type); + } + buffer.set_length(pos); + } + + return ret; +} + +int ObMulModeScalarSerializer::serialize_integer(ObIMulModeBase* node, int32_t depth) +{ + INIT_SUCC(ret); + int64_t value = is_int_type(node->type()) ? node->get_int() : node->get_uint(); + int64_t ser_len = serialization::encoded_length_vi64(value); + int64_t pos = buffer_->length(); + if (OB_FAIL(buffer_->reserve(ser_len))) { + LOG_WARN("failed to reserver serialize size for int", K(ret), K(ser_len)); + } else if (OB_FAIL(serialization::encode_vi64(buffer_->ptr(), buffer_->capacity(), pos, value))) { + LOG_WARN("failed to serialize for int", K(ret), K(ser_len)); + } else if (OB_FAIL(buffer_->set_length(pos))) { + LOG_WARN("failed to update len for int", K(ret), K(pos)); + } + return ret; +} + +int ObMulModeScalarSerializer::serialize_decimal(ObIMulModeBase* node, int32_t depth) +{ + INIT_SUCC(ret); + ObPrecision prec = node->get_decimal_precision(); + ObScale scale = node->get_decimal_scale(); + int64_t ser_len = node->get_serialize_size(); + int64_t pos = buffer_->length(); + if (OB_FAIL(buffer_->reserve(ser_len))) { + LOG_WARN("failed to reserver serialize size for decimal obj", K(ret), K(pos), K(ser_len)); + } else if (OB_FAIL(serialization::encode_i16(buffer_->ptr(), buffer_->capacity(), pos, prec))) { + LOG_WARN("failed to serialize for decimal precision", K(ret), K(pos), K(prec)); + } else if (OB_FAIL(buffer_->set_length(pos))) { + LOG_WARN("failed to set length for decimal precision", K(ret), K(pos), K(prec)); + } else if (OB_FAIL(serialization::encode_i16(buffer_->ptr(), buffer_->capacity(), pos, scale))) { + LOG_WARN("failed to serialize for decimal precision", K(ret), K(pos), K(scale)); + } else if (OB_FAIL(buffer_->set_length(pos))) { + LOG_WARN("failed to set length for decimal scale", K(ret), K(pos), K(scale)); + } else if (OB_FAIL(node->get_decimal_data().serialize(buffer_->ptr(), buffer_->capacity(), pos))) { + LOG_WARN("failed to serialize for decimal value", K(ret), K(pos)); + } else if (OB_FAIL(buffer_->set_length(pos))){ + LOG_WARN("failed to update len for decimal obj", K(ret), K(pos)); + } + return ret; +} + +int ObMulModeScalarSerializer::serialize_string(ObIMulModeBase* node, int32_t depth) +{ + INIT_SUCC(ret); + + ObString value; + if (OB_FAIL(node->get_value(value))) { + LOG_WARN("failed to get string value for obj", K(ret)); + } else if (OB_FAIL(buffer_->reserve(value.length() + 2))) { + LOG_WARN("failed to reserver serialize size obj", K(ret), K(value.length())); + } else { + int64_t ser_len = serialization::encoded_length_vi64(value.length()); + int64_t pos = buffer_->length() + sizeof(uint8_t); + ObMulModeNodeType type = node->type(); + + if (OB_FAIL(serialize_scalar_header(type, *buffer_))) { + LOG_WARN("failed to serialize type for str obj", K(ret), K(ser_len)); + } else if (OB_FAIL(buffer_->reserve(ser_len))) { + LOG_WARN("failed to reserver serialize size for str obj", K(ret), K(ser_len)); + } else if (OB_FAIL(serialization::encode_vi64(buffer_->ptr(), buffer_->capacity(), pos, value.length()))) { + LOG_WARN("failed to serialize for str obj", K(ret), K(ser_len)); + } else if (OB_FAIL(buffer_->set_length(pos))) { + LOG_WARN("failed to update len for str obj", K(ret), K(pos)); + } else if (OB_FAIL(buffer_->append(value.ptr(), value.length()))) { + LOG_WARN("failed to append string value", K(ret)); + } + } + + return ret; +} + +int ObMulModeScalarSerializer::serialize_null(ObIMulModeBase* node, int32_t depth) +{ + INIT_SUCC(ret); + + if (OB_FAIL(buffer_->append("\0", sizeof(uint8_t)))) { + LOG_WARN("failed to append null obj", K(ret)); + } + + return ret; +} + +int ObMulModeScalarSerializer::serialize_boolean(ObIMulModeBase* node, int32_t depth) +{ + INIT_SUCC(ret); + char value = static_cast(node->get_boolean()); + if (OB_FAIL(buffer_->append(reinterpret_cast(&value), sizeof(char)))) { + LOG_WARN("failed to append bool obj", K(ret)); + } + + return ret; +} + +int ObMulModeScalarSerializer::serialize_time(ObIMulModeBase* node, int32_t depth) +{ + INIT_SUCC(ret); + + ObTime ob_time = node->get_time(); + // todo: switch case for ObMulModeNodeType + int64_t value = ObTimeConverter::ob_time_to_time(ob_time); + + if (OB_FAIL(buffer_->append(reinterpret_cast(&value), sizeof(int64_t)))) { + LOG_WARN("failed to append timestamp obj value", K(ret)); + } + return ret; +} + +int ObMulModeScalarSerializer::serialize_double(ObIMulModeBase* node, int32_t depth) +{ + INIT_SUCC(ret); + + double value = node->get_double(); + if (isnan(value) || isinf(value)) { + ret = OB_INVALID_NUMERIC; + LOG_WARN("invalid double value", K(ret), K(value)); + } else if (OB_FAIL(buffer_->append(reinterpret_cast(&value), sizeof(double)))) { + LOG_WARN("failed to append double obj", K(ret)); + } + + return ret; +} + +ObMulModeContainerSerializer::ObMulModeContainerSerializer(ObIMulModeBase* root, ObStringBuffer* buffer, int64_t children_count) + : header_(buffer, root->type(), root->get_serialize_size(), children_count) +{ + root_ = root; + type_ = root->type(); +} + +ObMulModeContainerSerializer::ObMulModeContainerSerializer(ObIMulModeBase* root, ObStringBuffer* buffer) + : header_(buffer, root->type(), root->get_serialize_size(), root->size()) +{ + root_ = root; + type_ = root->type(); +} + +ObMulModeContainerSerializer::ObMulModeContainerSerializer(const char* data, int64_t length) + : header_(data, length), + data_(data), + length_(length) +{ +} + +/* var size */ +int ObMulModeVar::read_var(const char *data, uint8_t type, uint64_t *var) +{ + INIT_SUCC(ret); + if (OB_ISNULL(data)) { + ret = OB_ERR_NULL_VALUE; + LOG_WARN("input data null val.", K(ret)); + } else { + ObMulModeBinLenSize size = static_cast(type); + switch (size) { + case MBL_UINT8: { + *var = static_cast(*reinterpret_cast(data)); + break; + } + case MBL_UINT16: { + *var = static_cast(*reinterpret_cast(data)); + break; + } + case MBL_UINT32: { + *var = static_cast(*reinterpret_cast(data)); + break; + } + case MBL_UINT64: { + *var = static_cast(*reinterpret_cast(data)); + break; + } + default: { + ret = OB_NOT_SUPPORTED; + LOG_WARN("invalid var type.", K(ret), K(type)); + break; + } + } + } + return ret; +} + +int ObMulModeVar::read_size_var(const char *data, uint8_t var_size, int64_t *var) +{ + INIT_SUCC(ret); + if (var_size == 1) { + *var = *reinterpret_cast (data); + } else if (var_size == 2) { + *var = *reinterpret_cast(data); + } else if (var_size == 4) { + *var = *reinterpret_cast(data); + } else if (var_size == 8) { + *var = *reinterpret_cast(data); + } else { + ret = OB_NOT_SUPPORTED; + LOG_WARN("invalid var type.", K(ret), K(var_size)); + } + return ret; +} + +int ObMulModeVar::append_var(uint64_t var, uint8_t type, ObStringBuffer &result) +{ + INIT_SUCC(ret); + ObMulModeBinLenSize size = static_cast(type); + switch (size) { + case MBL_UINT8: { + uint8_t var_trans = static_cast(var); + ret = result.append(reinterpret_cast(&var_trans), sizeof(uint8_t)); + break; + } + case MBL_UINT16: { + uint16_t var_trans = static_cast(var); + ret = result.append(reinterpret_cast(&var_trans), sizeof(uint16_t)); + break; + } + case MBL_UINT32: { + uint32_t var_trans = static_cast(var); + ret = result.append(reinterpret_cast(&var_trans), sizeof(uint32_t)); + break; + } + case MBL_UINT64: { + uint64_t var_trans = static_cast(var); + ret = result.append(reinterpret_cast(&var_trans), sizeof(uint64_t)); + break; + } + default: { + ret = OB_NOT_SUPPORTED; + break; + } + } + if (OB_FAIL(ret)) { + LOG_WARN("fail to do append var.", K(ret), K(size), K(var)); + } + return ret; +} + +int ObMulModeVar::reserve_var(uint8_t type, ObStringBuffer &result) +{ + INIT_SUCC(ret); + ObMulModeBinLenSize size = static_cast(type); + switch (size) { + case MBL_UINT8: { + uint8_t var = 0; + ret = result.append(reinterpret_cast(&var), sizeof(uint8_t)); + break; + } + case MBL_UINT16: { + uint16_t var = 0; + ret = result.append(reinterpret_cast(&var), sizeof(uint16_t)); + break; + } + case MBL_UINT32: { + uint32_t var = 0; + ret = result.append(reinterpret_cast(&var), sizeof(uint32_t)); + break; + } + case MBL_UINT64: { + uint64_t var = 0; + ret = result.append(reinterpret_cast(&var), sizeof(uint64_t)); + break; + } + default: { + ret = OB_NOT_SUPPORTED; + break; + } + } + if (OB_FAIL(ret)) { + LOG_WARN("fail to do reserve var.", K(ret), K(size)); + } + return ret; +} + +int ObMulModeVar::set_var(uint64_t var, uint8_t type, char *pos) +{ + INIT_SUCC(ret); + if (OB_ISNULL(pos)) { + ret = OB_ERR_NULL_VALUE; + LOG_WARN("output pos is null.", K(ret)); + } else { + ObMulModeBinLenSize size = static_cast(type); + switch (size) { + case MBL_UINT8: { + uint8_t *val_pos = reinterpret_cast(pos); + *val_pos = static_cast(var); + break; + } + case MBL_UINT16: { + uint16_t *val_pos = reinterpret_cast(pos); + *val_pos = static_cast(var); + break; + } + case MBL_UINT32: { + uint32_t *val_pos = reinterpret_cast(pos); + *val_pos = static_cast(var); + break; + } + case MBL_UINT64: { + uint64_t *val_pos = reinterpret_cast(pos); + *val_pos = static_cast(var); + break; + } + default: { + ret = OB_NOT_SUPPORTED; + LOG_WARN("invalid var type.", K(ret), K(size)); + break; + } + } + } + return ret; +} + +uint64_t ObMulModeVar::get_var_size(uint8_t type) +{ + uint64_t var_size = MBL_MAX; + ObMulModeBinLenSize size = static_cast(type); + switch (size) { + case MBL_UINT8: { + var_size = sizeof(uint8_t); + break; + } + case MBL_UINT16: { + var_size = sizeof(uint16_t); + break; + } + case MBL_UINT32: { + var_size = sizeof(uint32_t); + break; + } + case MBL_UINT64: { + var_size = sizeof(uint64_t); + break; + } + default: { + LOG_WARN_RET(OB_ERR_UNEXPECTED, "invalid var type.", K(OB_NOT_SUPPORTED), K(size)); + break; + } + } + return var_size; +} + +uint8_t ObMulModeVar::get_var_type(uint64_t var) +{ + ObMulModeBinLenSize lsize = MBL_UINT64; + if ((var & 0xFFFFFFFFFFFFFF00ULL) == 0) { + lsize = MBL_UINT8; + } else if ((var & 0xFFFFFFFFFFFF0000ULL) == 0) { + lsize = MBL_UINT16; + } else if ((var & 0xFFFFFFFF00000000ULL) == 0) { + lsize = MBL_UINT32; + } + return static_cast(lsize); +} + +int ObMulModeVar::read_var(const char *data, uint8_t type, int64_t *var) +{ + INIT_SUCC(ret); + if (OB_ISNULL(data)) { + ret = OB_ERR_NULL_VALUE; + LOG_WARN("input data is null.", K(ret)); + } else { + ObMulModeBinLenSize size = static_cast(type); + switch (size) { + case MBL_UINT8: { + *var = static_cast(*reinterpret_cast(data)); + break; + } + case MBL_UINT16: { + *var = static_cast(*reinterpret_cast(data)); + break; + } + case MBL_UINT32: { + *var = static_cast(*reinterpret_cast(data)); + break; + } + case MBL_UINT64: { + *var = static_cast(*reinterpret_cast(data)); + break; + } + default: { + ret = OB_NOT_SUPPORTED; + LOG_WARN("invalid var type.", K(ret), K(type)); + break; + } + } + } + return ret; +} + +uint64_t ObMulModeVar::var_int2uint(int64_t var) +{ + ObMulModeBinLenSize size = static_cast(ObMulModeVar::get_var_type(var)); + uint64 val = 0; + switch (size) { + case MBL_UINT8: { + val = static_cast(static_cast(var)); + break; + } + case MBL_UINT16: { + val = static_cast(static_cast(var)); + break; + } + case MBL_UINT32: { + val = static_cast(static_cast(var)); + break; + } + case MBL_UINT64: { + val = static_cast(static_cast(var)); + break; + } + default: { + LOG_WARN_RET(OB_ERR_UNEXPECTED, "invalid var type.", K(size)); + break; + } + } + return val; +} + +int64_t ObMulModeVar::var_uint2int(uint64_t var, uint8_t entry_size) +{ + ObMulModeBinLenSize size = static_cast(entry_size); + int64_t val = 0; + switch (size) { + case MBL_UINT8: { + if (var > INT8_MAX) { + val = static_cast(static_cast(static_cast(var))); + } else { + val = static_cast(static_cast(static_cast(var))); + } + break; + } + case MBL_UINT16: { + if (var > INT16_MAX) { + val = static_cast(static_cast(static_cast(var))); + } else { + val = static_cast(static_cast(static_cast(var))); + } + break; + } + case MBL_UINT32: { + val = static_cast(static_cast(static_cast(var))); + break; + } + case MBL_UINT64: { + val = static_cast(var); + break; + } + default: { + LOG_WARN_RET(OB_ERR_UNEXPECTED, "invalid var type.", K(size)); + break; + } + } + return val; +} + +uint8_t ObMulModeVar::get_var_type(int64_t var) +{ + ObMulModeBinLenSize lsize = MBL_UINT64; + if (var <= INT8_MAX && var >= INT8_MIN) { + lsize = MBL_UINT8; + } else if (var <= INT16_MAX && var >= INT16_MIN) { + lsize = MBL_UINT16; + } else if (var <= INT32_MAX && var >= INT32_MIN) { + lsize = MBL_UINT32; + } + return static_cast(lsize); +} + +bool ObBinMergeCtx::is_all_deleted() +{ + bool ret_bool = true; + for (int i = 0; ret_bool && i < del_map_.size(); ++i) { + ret_bool = del_map_.at(i); + } + return ret_bool; +} +int ObBinMergeCtx::get_valid_key_count() +{ + int count = 0; + for (int i = 0; i < del_map_.size(); ++i) { + if (!(del_map_.at(i))) { + ++count; + } + } + return count; +} +uint64_t ObMulModeBinMerge::estimated_count(bool retry, ObBinMergeCtx& ctx, ObIMulModeBase& origin, ObIMulModeBase& patch) +{ + return retry ? ctx.retry_count_ : origin.attribute_count() + origin.size() + ctx.get_valid_key_count(); +} +int ObMulModeBinMerge::merge(ObIMulModeBase& origin, ObIMulModeBase& patch, ObIMulModeBase& res) +{ + INIT_SUCC(ret); + ObBinMergeCtx ctx(origin.get_allocator()); + // init ctx, and estimating buffer size + if (origin.is_tree() || patch.is_tree()) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("must be binary.", K(patch.is_tree()), K(origin.is_tree()), K(ret)); + } else if (OB_FAIL(init_merge_info(ctx, origin, patch, res))) { + LOG_WARN("fail to init ctx", K(ret)); + } else if (OB_FAIL(inner_merge(ctx, origin, patch, res))) { + LOG_WARN("fail to merge", K(ret)); + } + return ret; +} +int ObMulModeBinMerge::inner_merge(ObBinMergeCtx& ctx, ObIMulModeBase& origin, + ObIMulModeBase& patch, ObIMulModeBase& res, bool retry) +{ + INIT_SUCC(ret); + // duplicate ns that defined in this element should be delete + // because namespaces with the same key are subject to the latest + // but this definition is only valid in this element and its descendant + // so, restore ns vec when finish merging this element, in case its sibling loses ns definition + ObStack origin_del_map(ctx.allocator_); + int64_t start = 0; + bool need_merge = false; + if (OB_ISNULL(ctx.buffer_)) { + ret = OB_BAD_NULL_ERROR; + LOG_WARN("should not be null", K(ret)); + } else { + start = ctx.buffer_->length(); + } + if (ctx.reuse_del_map_ ) { + origin_del_map = ctx.del_map_; + } + if (OB_FAIL(ret)) { + } else if (OB_FAIL(if_need_merge(ctx, origin, patch, res, need_merge))) { + LOG_WARN("fail to check if need to merge", K(ret)); + } else if (!need_merge) { + if (OB_FAIL(append_res_without_merge(ctx, origin, patch, res))) { + LOG_WARN("fail to merge", K(ret)); + } + } else { + // init common_header, the total_size and count is not precise + // check after all value is merged + ObMulBinHeaderSerializer cur_header(ctx.buffer_, get_res_type(origin.type(), patch.type()), + estimated_length(retry, ctx, origin, patch), + estimated_count(retry, ctx, origin, patch)); + if (OB_FAIL(append_header_to_res(ctx, origin, patch, cur_header, res))) { + LOG_WARN("fail to append header", K(ret)); + } else if (!if_need_append_key(ctx, origin, patch, res)) { + if (OB_FAIL(append_key_without_merge(ctx, origin, cur_header, res))) { + LOG_WARN("fail to copy key", K(ret)); + } + } else if (OB_FAIL(append_merge_key(ctx, origin, patch, cur_header, res))) { + LOG_WARN("fail to merge key", K(ret)); + } + int64_t merged_len = 0; + int append_key_count = ctx.defined_ns_idx_.size(); + for (int i = 0; OB_SUCC(ret) && i < cur_header.count_; ++i) { + uint64_t origin_len = ctx.buffer_->length(); + bool is_origin = true; + int idx = i; + if (i < append_key_count) { + is_origin = false; + } else { + idx -= append_key_count; + } + if (OB_FAIL(append_value_by_idx(is_origin, idx, ctx, origin, patch, cur_header, res))) { + LOG_WARN("fail to append value", K(ret)); + } else if (OB_FALSE_IT(merged_len = ctx.buffer_->length())) { + } else if (merged_len < origin_len) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("value length must > 0", K(ret)); + } else if (OB_FAIL(set_value_offset(i, origin_len - start, ctx, res))) { + LOG_WARN("fail to set value offset", K(ret)); + } + } + if (OB_FAIL(ret)) { + } else if (merged_len < start) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("error length", K(ret)); + } else if (ObMulModeVar::get_var_type(merged_len - start) > cur_header.get_obj_var_size_type() + || ObMulModeVar::get_var_type(cur_header.count_) > cur_header.get_count_var_size_type()) { + if (retry) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("failed to serialize as meta info not match.", K(ret)); + } else { + int obj_type_diff = ObMulModeVar::get_var_type(merged_len - start) - cur_header.get_obj_var_size_type(); + int count_type_diff = ObMulModeVar::get_var_type(cur_header.count_) - cur_header.get_count_var_size_type(); + ctx.retry_count_ = cur_header.count_; + ctx.retry_len_ = (merged_len - start) + cur_header.count_ * count_type_diff + 4 * cur_header.count_ * obj_type_diff; + new (&cur_header) ObMulBinHeaderSerializer(ctx.buffer_, get_res_type(origin.type(), patch.type()), merged_len, cur_header.count_); + if (OB_FAIL(inner_merge(ctx, origin, patch, res, true))) { + LOG_WARN("fail to retry", K(ret)); + } else { + merged_len = ctx.buffer_->length(); + } + } + } else { + cur_header.set_obj_size(merged_len - start); + cur_header.set_count(cur_header.count_); + } + } + if (OB_FAIL(ret)) { + } else if (ctx.reuse_del_map_) { + ctx.del_map_ = origin_del_map; + } + return ret; +} +// serialize common header +int ObMulModeBinMerge::append_header_to_res(ObBinMergeCtx& ctx, ObIMulModeBase& origin, ObIMulModeBase& patch, + ObMulBinHeaderSerializer& header, ObIMulModeBase& res) +{ + INIT_SUCC(ret); + if (OB_ISNULL(header.buffer_)) { + ret = OB_BAD_NULL_ERROR; + LOG_WARN("should not be null", K(ret)); + } else if (OB_FAIL(header.serialize())) { + LOG_WARN("fail to serialize common header", K(ret)); + } + return ret; +} + +} // namespace common +} // namespace oceanbase \ No newline at end of file diff --git a/deps/oblib/src/lib/xml/ob_multi_mode_bin.h b/deps/oblib/src/lib/xml/ob_multi_mode_bin.h new file mode 100644 index 0000000000..9cf0ff4cd0 --- /dev/null +++ b/deps/oblib/src/lib/xml/ob_multi_mode_bin.h @@ -0,0 +1,395 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + * This file contains interface support for the xml & json basic interface abstraction. + */ + +#ifndef OCEANBASE_MUL_MODE_BIN_BASE +#define OCEANBASE_MUL_MODE_BIN_BASE + +#include "lib/string/ob_string_buffer.h" +#include "lib/xml/ob_multi_mode_interface.h" + +namespace oceanbase { +namespace common { + +class ObIMulModeBase; + +enum ObMulModeBinType { + MulModeNull = 0, + MulModeBoolean, + MulModeDecimal, + MulModeUint, + MulModeInt, + MulModeTime, + MulModeDouble, + MulModeString, + /** + * container is not a basic type, while in multi mode binary is a basic data structure + */ + MulModePair, + MulModeContainer, + MulModeMaxType +}; + +static ObMulModeBinType g_mul_mode_tc[] = { + MulModeNull, // M_NULL, // 0 oracle & mysql + MulModeDecimal, // M_DECIMAL, + MulModeInt, // M_INT, + MulModeInt, // M_UINT, + MulModeDouble, // M_DOUBLE, + MulModeString, // M_STRING, // 5 oracle & mysql + MulModeContainer, // M_OBJECT, // oracle & mysql + MulModeContainer, // M_ARRAY, // oracle & mysql + MulModeBoolean, // M_BOOLEAN, + MulModeTime, // M_DATE, + MulModeTime, // M_TIME, // 10 + MulModeTime, // M_DATETIME, + MulModeTime, // M_TIMESTAMP, + MulModeString, // M_OPAQUE, 13 + MulModeContainer, // M_UNPARSED, 14 + MulModeContainer, // M_UNPARESED_DOC, 15 + MulModeContainer, // M_DOCUMENT, 16 + MulModeContainer, // M_CONTENT, 17 + MulModeContainer, // M_ELEMENT, 18 + MulModePair, // M_ATTRIBUTE, 19 + MulModePair, // M_NAMESPACE, 20 + MulModePair, // M_PI, 21 + MulModeString, // M_TEXT, 22 + MulModeString, // M_COMMENT, 23 + MulModeString, // M_CDATA, 24 + MulModePair, // M_ENTITY, 25 + MulModePair, // M_ENTITY_REF, 26 + MulModeContainer, // M_DTD 27 +}; + +#define OB_MUL_MODE_INLINE_REV_MASK (0x7F) +#define OB_MUL_MODE_INLINE_MASK (0x80) +#define OB_MUL_MODE_IS_INLINE(origin_type) ((OB_MUL_MODE_INLINE_MASK & (origin_type)) != 0) +#define OB_MUL_MODE_GET_INLINE(origin_type) (OB_MUL_MODE_INLINE_REV_MASK & (origin_type)) + +#define OB_MUL_MODE_BIN_MAX_SERIALIZE_TIME 2 +enum ObMulModeBinLenSize:uint8_t { + MBL_UINT8 = 0, + MBL_UINT16 = 1, + MBL_UINT32 = 2, + MBL_UINT64 = 3, + MBL_MAX = 4, +}; + +static const uint8_t MUL_MODE_BIN_HEADER_LEN = 2; +static const uint8_t MUL_MODE_BIN_BASE_HEADER_LEN = 1; + +typedef struct ObMulModeBinHeader { + ObMulModeBinHeader() { + (&type_)[1] = 0; + } + ObMulModeBinHeader(uint8_t type, + uint8_t kv_entry_type, + uint8_t count_type, + uint8_t obj_type, + uint8_t is_continous) + : type_(type), + kv_entry_size_type_(kv_entry_type), + count_size_type_(count_type), + obj_size_type_(obj_type), + is_continuous_(is_continous), + reserved_(0) {} + + uint8_t type_; // node type for current node + uint8_t kv_entry_size_type_ : 2; // the size describe var size of key_entry,val_entry + uint8_t count_size_type_ : 2; // the size describe var size of element count + uint8_t obj_size_type_ : 2; // the size describe var size of key_entry,val_entry + uint8_t is_continuous_ : 1; // memory of current node and subtree is continous + uint8_t reserved_ : 1; // reserved bit + char used_size_[]; // var size + +} ObMulModeBinHeader; + +typedef ObMulModeBinHeader ObXmlBinHeader; + +typedef std::pair ObMulModeExtendStorageType; + +class ObMulBinHeaderSerializer { +public: + ObMulBinHeaderSerializer(ObStringBuffer* buffer, + ObMulModeNodeType type, + uint64_t total_size, + uint64_t count); + + ObMulBinHeaderSerializer(const char* data, uint64_t length); + ObMulBinHeaderSerializer() {}; + + int serialize(); + int deserialize(); + uint8_t get_obj_var_size() { return obj_var_size_; } + uint8_t get_entry_var_size() { return entry_var_size_; } + uint8_t get_count_var_size() { return count_var_size_; } + + void set_obj_size(uint64_t size); + void set_count(uint64_t size); + + uint64_t get_obj_size() { return total_; } + uint64_t count() { return count_; } + + ObStringBuffer* buffer() { return buffer_; } + + uint64_t start() { return begin_; } + uint64_t finish() { return begin_ + obj_var_offset_ + obj_var_size_; } + uint64_t header_size() { return obj_var_offset_ + obj_var_size_; } + uint8_t get_obj_var_size_type() { return obj_var_size_type_; } + uint8_t get_entry_var_size_type() { return entry_var_size_type_; } + uint8_t get_count_var_size_type() { return count_var_size_type_; } + ObMulModeNodeType type() { return type_; } + + + TO_STRING_KV(K_(obj_var_size_type), + K_(entry_var_size_type), + K_(count_var_size_type), + K_(obj_var_size), + K_(entry_var_size), + K_(count_var_size), + K_(obj_var_offset), + K_(count_var_offset), + K_(type), + K_(total), + K_(count)); + + void set_var_value(uint8_t var_size, uint8_t offset, uint64_t value); + + // meta info + uint8_t obj_var_size_type_; + uint8_t entry_var_size_type_; + uint8_t count_var_size_type_; + + // meta info + uint8_t obj_var_size_; + uint8_t entry_var_size_; + uint8_t count_var_size_; + + uint8_t obj_var_offset_; + uint8_t count_var_offset_; + + ObMulModeNodeType type_; + + // input for serialize + ObStringBuffer* buffer_; + uint64_t begin_; + + // serialize & deserialize both use + int64_t total_; + int64_t count_; + + // input for deserialize + const char* data_; + uint64_t data_len_; +}; + +class ObMulModeContainerSerializer { +public: + ObMulModeContainerSerializer(ObIMulModeBase* root, ObStringBuffer* buffer); + ObMulModeContainerSerializer(const char* data, int64_t length); + ObMulModeContainerSerializer(ObIMulModeBase* root, ObStringBuffer* buffer, int64_t children_count); + + bool need_serialize_key() { + return root_->data_type() == OB_XML_TYPE || root_->type() == M_OBJECT; + } + + bool is_kv_seperate() { return root_->data_type() == OB_XML_TYPE; } + +protected: + + ObIMulModeBase* root_; + ObMulModeNodeType type_; + + + int64_t value_entry_start_; + int64_t value_entry_size_; + + + ObMulBinHeaderSerializer header_; + // for deseialize + const char* data_; + int64_t length_; + +}; + + +inline ObMulModeBinType get_mul_mode_tc(ObMulModeNodeType type) +{ + ObMulModeBinType res = MulModeMaxType; + if (type >= M_MAX_TYPE) { + // do nothing,current not used + } else if (type >= M_NULL && type < M_MAX_TYPE) { + res = g_mul_mode_tc[type]; + } + return res; +} + +inline bool is_valid_xml_type(uint8_t type) +{ + return (type >= M_UNPARESED_DOC && type <= M_DTD); +} + +inline bool is_extend_type(ObMulModeNodeType type) +{ + return (type >= M_EXTENT_LEVEL2 && type <= M_EXTENT_LEVEL0); +} + +inline ObMulModeNodeType eval_data_type(ObMulModeNodeType part1, uint8_t part2) +{ + return static_cast(M_EXTENT_BEGIN0 + 256 * (M_EXTENT_LEVEL0 - part1)); +} + + +inline bool is_scalar_data_type(ObMulModeNodeType type) +{ + ObMulModeBinType tc_type = get_mul_mode_tc(type); + return (tc_type == MulModeNull + || tc_type == MulModeBoolean + || tc_type == MulModeDecimal + || tc_type == MulModeInt + || tc_type == MulModeUint + || tc_type == MulModeTime + || tc_type == MulModeDouble + || tc_type == MulModeString); +} + +inline bool is_complex_data_type(ObMulModeNodeType type) +{ + ObMulModeBinType tc_type = get_mul_mode_tc(type); + return (tc_type == MulModePair + || tc_type == MulModeContainer); +} + +inline bool is_int_type(ObMulModeNodeType type) +{ + return get_mul_mode_tc(type) == MulModeInt; +} + +inline bool is_uint_type(ObMulModeNodeType type) +{ + return get_mul_mode_tc(type) == MulModeUint; +} + +inline ObMulModeExtendStorageType get_extend_storage_type(ObMulModeNodeType type) +{ + ObMulModeExtendStorageType res; + if (is_scalar_data_type(type)) { + res.first = ObMulModeNodeType::M_EXTENT_LEVEL0 - ((type - 0x7f) >> 8); + } else { + res.first = ObMulModeNodeType::M_EXTENT_LEVEL0 - ((type - 0x7f) >> 8); + } + + res.second = (type & 0xff) - 0x7f; + return res; +} + +inline ObMulModeNodeType get_extend_data_type(ObMulModeExtendStorageType& type) +{ + ObMulModeNodeType res; + res = static_cast(type.second + (ObMulModeNodeType::M_EXTENT_LEVEL0 - type.first) * 256); + return res; +} + +inline bool is_xml_type(ObIMulModeBase* node) { return node->data_type() == ObNodeDataType::OB_XML_TYPE; } +inline bool is_json_type(ObIMulModeBase* node) { return node->data_type() == ObNodeDataType::OB_XML_TYPE; } + +class ObMulModeScalarSerializer { +public: + // serialize use + ObMulModeScalarSerializer(ObStringBuffer* buffer) + : buffer_(buffer) {} + + int serialize_integer(ObIMulModeBase* node, int32_t depth); + int serialize_string(ObIMulModeBase* node, int32_t depth); + int serialize_decimal(ObIMulModeBase* node, int32_t depth); + int serialize_null(ObIMulModeBase* node, int32_t depth); + int serialize_boolean(ObIMulModeBase* node, int32_t depth); + int serialize_time(ObIMulModeBase* node, int32_t depth); + int serialize_double(ObIMulModeBase* node, int32_t depth); + + static int serialize_scalar_header(ObMulModeNodeType type, ObStringBuffer& buffer); +protected: + ObStringBuffer* buffer_; +}; + + +class ObMulModeVar { +public: + static int read_size_var(const char *data, uint8_t var_size, int64_t *var); + static int read_var(const char *data, uint8_t type, uint64_t *var); + static int append_var(uint64_t var, uint8_t type, ObStringBuffer &result); + static int reserve_var(uint8_t type, ObStringBuffer &result); + static int set_var(uint64_t var, uint8_t type, char *pos); // fill var at pos + static uint64_t get_var_size(uint8_t type); + static uint8_t get_var_type(uint64_t var); + static int read_var(const char *data, uint8_t type, int64_t *var); + static uint64_t var_int2uint(int64_t var); + static int64_t var_uint2int(uint64_t var, uint8_t entry_size); + static uint8_t get_var_type(int64_t var); +}; + +class ObBinMergeCtx { +public: + ObBinMergeCtx(ObIAllocator* allocator) + : allocator_(allocator), + del_map_(allocator), + defined_ns_idx_(allocator) {} + ~ObBinMergeCtx() {} + bool is_all_deleted(); + int get_valid_key_count(); + common::ObIAllocator *allocator_; + ObStringBuffer* buffer_; + uint64_t retry_len_; + uint64_t retry_count_; + uint8_t reuse_del_map_ : 1; + uint8_t only_merge_ns_ : 1; + uint8_t reserve_ : 6; + // for xml, defined ns or duplicate ns should be delete + // for json, dulipcate key should be delete + // deleted key do not need merge + ObStack del_map_; + ObStack defined_ns_idx_; +}; +class ObMulModeBinMerge { +public: +// use for merge binary, make sure base_node is binary + virtual int merge(ObIMulModeBase& origin, ObIMulModeBase& patch, ObIMulModeBase& res); +protected: + virtual int inner_merge(ObBinMergeCtx& ctx, ObIMulModeBase& origin, + ObIMulModeBase& patch, ObIMulModeBase& res, bool retry = false); + virtual int init_merge_info(ObBinMergeCtx& ctx, ObIMulModeBase& origin, + ObIMulModeBase& patch, ObIMulModeBase& res) = 0; + virtual int if_need_merge(ObBinMergeCtx& ctx, ObIMulModeBase& origin, + ObIMulModeBase& patch, ObIMulModeBase& res, bool& need_merge) = 0; + virtual bool if_need_append_key(ObBinMergeCtx& ctx, ObIMulModeBase& origin, + ObIMulModeBase& patch, ObIMulModeBase& res) = 0; + virtual int append_res_without_merge(ObBinMergeCtx& ctx, ObIMulModeBase& origin, + ObIMulModeBase& patch, ObIMulModeBase& res) = 0; + virtual int append_value_without_merge(ObBinMergeCtx& ctx, ObIMulModeBase& value, ObIMulModeBase& res) = 0; + virtual int append_key_without_merge(ObBinMergeCtx& ctx, ObIMulModeBase& origin, + ObMulBinHeaderSerializer& header, ObIMulModeBase& res) = 0; + virtual int append_header_to_res(ObBinMergeCtx& ctx, ObIMulModeBase& origin, ObIMulModeBase& patch, + ObMulBinHeaderSerializer& header, ObIMulModeBase& res); + virtual int append_merge_key(ObBinMergeCtx& ctx, ObIMulModeBase& origin, ObIMulModeBase& patch, + ObMulBinHeaderSerializer& header, ObIMulModeBase& res) = 0; + virtual int append_value_by_idx(bool is_origin, int idx, ObBinMergeCtx& ctx, ObIMulModeBase& origin, + ObIMulModeBase& patch, ObMulBinHeaderSerializer& header, ObIMulModeBase& res) = 0; + virtual int set_value_offset(int idx, uint64_t offset, ObBinMergeCtx& ctx, ObIMulModeBase& res) = 0; + virtual uint64_t estimated_length(bool retry, ObBinMergeCtx& ctx, ObIMulModeBase& origin, ObIMulModeBase& patch) = 0; + virtual uint64_t estimated_count(bool retry, ObBinMergeCtx& ctx, ObIMulModeBase& origin, ObIMulModeBase& patch); + virtual ObMulModeNodeType get_res_type(const ObMulModeNodeType &origin_type, const ObMulModeNodeType &res_type) = 0; +}; + +} // namespace common +} // namespace oceanbase + +#endif // OCEANBASE_MUL_MODE_BIN_BASE \ No newline at end of file diff --git a/deps/oblib/src/lib/xml/ob_multi_mode_interface.cpp b/deps/oblib/src/lib/xml/ob_multi_mode_interface.cpp new file mode 100644 index 0000000000..2cf555fc83 --- /dev/null +++ b/deps/oblib/src/lib/xml/ob_multi_mode_interface.cpp @@ -0,0 +1,818 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + * This file contains parts interface implement for the multi mode type data abstraction. + */ + +#define USING_LOG_PREFIX LIB +#include "lib/xml/ob_multi_mode_interface.h" +#include "lib/xml/ob_xml_parser.h" +#include "lib/xml/ob_xml_tree.h" +#include "lib/xml/ob_xml_bin.h" +#include "lib/xml/ob_xml_util.h" + +namespace oceanbase { +namespace common { + +int ObPathPool::init(int64_t obj_size, ObIAllocator *alloc) +{ + INIT_SUCC(ret); + if (OB_UNLIKELY(obj_size < static_cast(sizeof(FreeNode)))) { + LIB_LOG_RET(ERROR, common::OB_ERR_UNEXPECTED, "obj_size_ < size of FreeNode"); + } else if (OB_ISNULL(alloc)) { + ret = OB_BAD_NULL_ERROR; + LOG_WARN("should not be null", K(ret)); + } else { + // must use tmp_allocator to init + obj_size_ = obj_size; + alloc_ = alloc; + is_inited_ = true; + } + return ret; +} + +void *ObPathPool::alloc() +{ + void *ptr_ret = NULL; + if (!is_inited_) { + } else if (NULL == (ptr_ret = freelist_pop())) { + alloc_new_node(); + ptr_ret = freelist_pop(); + } + return ptr_ret; +} + +void ObPathPool::free(void *obj) +{ + if (!is_inited_) { + } else if (NULL != obj) { + --in_use_count_; + } + freelist_push(obj); +} + +void ObPathPool::reset() +{ + is_inited_ = false; + freelist_ = nullptr; +} + +void *ObPathPool::freelist_pop() +{ + void *ptr_ret = NULL; + if (!is_inited_) { + } else if (NULL != freelist_) { + ptr_ret = freelist_; + freelist_ = freelist_->next_; + --free_count_; + ++in_use_count_; + } + return ptr_ret; +} + +void ObPathPool::freelist_push(void *obj) +{ + if (!is_inited_) { + } else if (NULL != obj) { + FreeNode *node = static_cast(obj); + if (OB_ISNULL(node)) { + LIB_LOG_RET(ERROR, common::OB_ERR_UNEXPECTED, "node is NULL"); + } else { + node->next_ = freelist_; + freelist_ = node; + ++free_count_; + } + } +} + +void ObPathPool::alloc_new_node() +{ + if (!is_inited_) { + } else if (OB_ISNULL(alloc_)) { + } else { + ++total_count_; + freelist_push(alloc_->alloc(obj_size_)); + } +} + +// need overwrite +bool ObIMulModeBase::get_boolean() +{ + return false; +} + +// need overwrite +double ObIMulModeBase::get_double() +{ + return 0; +} + +// need overwrite +float ObIMulModeBase::get_float() +{ + return 0; +} + +// need overwrite +int64_t ObIMulModeBase::get_int() +{ + return 0; +} + +// need overwrite +uint64_t ObIMulModeBase::get_uint() +{ + return 0; +} + +// need overwrite +const char* ObIMulModeBase::get_data() +{ + return nullptr; +} + +// need overwrite +uint64_t ObIMulModeBase::get_data_length() +{ + return 0; +} + +// need overwrite +number::ObNumber ObIMulModeBase::get_decimal_data() +{ + return number::ObNumber(); +} + +// need overwrite +ObPrecision ObIMulModeBase::get_decimal_precision() +{ + return -1; +} + +// need overwrite +ObScale ObIMulModeBase::get_decimal_scale() +{ + return -1; +} + +// need overwrite +ObTime ObIMulModeBase::get_time() +{ + return ObTime(); +} + +// need overwrite +int64_t ObIMulModeBase::get_serialize_size() +{ + return 0; +} + +int ObIMulModeBase::print(ObStringBuffer& x_buf, uint32_t format_flag, uint64_t depth, uint64_t size, ObCollationType charset) +{ + INIT_SUCC(ret); + + if (meta_.data_type_ == OB_XML_TYPE) { + if (!check_extend()) { + ret = print_xml(x_buf, format_flag, depth, size, nullptr, charset); + } else { + ObNsSortedVector ns_vec; + if (OB_FAIL(ObXmlUtil::init_extend_ns_vec(allocator_, this, ns_vec))) { + LOG_WARN("fail to init ns vector by extend area", K(ret)); + } else if (OB_FAIL(print_xml(x_buf, format_flag, depth, size, &ns_vec, charset))) { + LOG_WARN("fail to print xml", K(ret)); + } + ns_vec.clear(); + } + } else { + ret = OB_NOT_SUPPORTED; + } + + return ret; +} + +int ObIMulModeBase::print_xml(ObStringBuffer& x_buf, uint32_t format_flag, uint64_t depth, uint64_t size, ObNsSortedVector* ns_vec, ObCollationType charset) +{ + INIT_SUCC(ret); + ObMulModeNodeType xml_type = type(); + + switch(xml_type) { + case ObMulModeNodeType::M_DOCUMENT : { + if (OB_FAIL(print_document(x_buf, CS_TYPE_INVALID, format_flag, size, ns_vec))) { + LOG_WARN("fail to print element to string", K(ret), K(depth), K(xml_type)); + } + break; + } + case ObMulModeNodeType::M_CONTENT : { + ParamPrint param_list; + if (OB_FAIL(print_content(x_buf, false, false, format_flag, param_list, ns_vec))) { + LOG_WARN("fail to print element to string", K(ret), K(depth), K(xml_type)); + } + break; + } + case ObMulModeNodeType::M_UNPARESED_DOC: + case ObMulModeNodeType::M_UNPARSED : { + if (OB_FAIL(print_unparsed(x_buf, CS_TYPE_INVALID, format_flag, size))) { + LOG_WARN("fail to print element to string", K(ret), K(depth), K(xml_type)); + } + break; + } + case ObMulModeNodeType::M_ELEMENT : { + if (ObXmlUtil::is_xml_doc_over_depth((depth + 1))) { + ret = OB_ERR_JSON_OUT_OF_DEPTH; + LOG_WARN("current xml over depth", K(ret), K(depth), K(xml_type)); + } else if (OB_FAIL(print_element(x_buf, depth, format_flag, size, ns_vec))) { + LOG_WARN("fail to print element to string", K(ret), K(depth), K(xml_type)); + } + break; + } + case ObMulModeNodeType::M_ATTRIBUTE : { + if (OB_FAIL(print_attr(x_buf, format_flag))) { + LOG_WARN("fail to print attribute to string", K(ret)); + } + break; + } + case ObMulModeNodeType::M_NAMESPACE : { + if (OB_FAIL(print_ns(x_buf, format_flag))) { + LOG_WARN("fail to print namespace to string", K(ret), K(depth), K(xml_type)); + } + break; + } + case ObMulModeNodeType::M_TEXT : { + if (OB_FAIL(print_text(x_buf, format_flag))) { + LOG_WARN("fail to print text to string", K(ret), K(depth), K(xml_type)); + } + break; + } + case ObMulModeNodeType::M_CDATA : { + if (OB_FAIL(print_cdata(x_buf, format_flag))) { + LOG_WARN("fail to print cdata to string", K(ret), K(depth), K(xml_type)); + } + break; + } + case ObMulModeNodeType::M_INSTRUCT : { + if (OB_FAIL(print_pi(x_buf, format_flag))) { + LOG_WARN("fail to print pi to string", K(ret), K(depth), K(xml_type)); + } + break; + } + case ObMulModeNodeType::M_COMMENT : { + if (OB_FAIL(print_comment(x_buf, format_flag))) { + LOG_WARN("fail to print comment to string", K(ret), K(depth), K(xml_type)); + } + break; + } + default: { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("undefined xml node type", K(ret), K(xml_type)); + break; + } + } + return ret; +} + +int ObIMulModeBase::print_attr(ObStringBuffer& x_buf, uint32_t format_flag) +{ + INIT_SUCC(ret); + ObString key; + ObString value; + ObXmlAttribute *att = NULL; + bool is_mysql_key_only = false; + + if (OB_FAIL(ret)) { + } else if (lib::is_oracle_mode()) { + // do nothing + } else if (type() != M_ATTRIBUTE) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("print attributes but type not attribute.", K(ret), K(type())); + } else if (OB_ISNULL(att = static_cast(this))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get attribute node null", K(ret)); + } else if (att->get_only_key()) { + is_mysql_key_only = true; + } + + if (OB_FAIL(get_key(key))) { + LOG_WARN("fail to print =\" in attr", K(ret)); + } else if (OB_FAIL(get_value(value))) { + LOG_WARN("fail to print =\" in attr", K(ret)); + } else if (OB_FAIL(ObXmlUtil::append_qname(x_buf, get_prefix(), key))) { + LOG_WARN("fail to print prefix in attr", K(ret), K(get_prefix()), K(key)); + } else if (is_mysql_key_only) { + // do nothing + } else if (OB_FAIL(x_buf.append("=\""))) { + LOG_WARN("fail to print =\" in attr", K(ret)); + } else if (!(format_flag & NO_ENTITY_ESCAPE)) { + if (OB_FAIL(ObXmlParserUtils::escape_xml_text(value, x_buf))) { + LOG_WARN("fail to print text with escape char", K(ret), K(value)); + } + } else if (OB_FAIL(x_buf.append(value))) { + LOG_WARN("fail to print value in attr", K(ret), K(value)); + } + + if (OB_SUCC(ret) && !is_mysql_key_only && OB_FAIL(x_buf.append("\""))) { + LOG_WARN("fail to print \" in attr", K(ret)); + } + return ret; +} + +int ObIMulModeBase::print_ns(ObStringBuffer& x_buf, uint32_t format_flag) +{ + INIT_SUCC(ret); + + ObString xmlns = "xmlns"; + ObString key; + ObString value; + + if (OB_FAIL(get_key(key))) { + LOG_WARN("fail to print =\" in attr", K(ret)); + } else if (OB_FAIL(get_value(value))) { + LOG_WARN("fail to print =\" in attr", K(ret)); + } else if (xmlns.compare(key) == 0) { + xmlns = ObString(); + } + + if (OB_FAIL(ret)) { + } else if (OB_FAIL(ObXmlUtil::append_qname(x_buf, xmlns, key))) { + LOG_WARN("fail to print prefix in ns", K(ret), K(xmlns), K(key)); + } else if (OB_FAIL(x_buf.append("=\""))) { + LOG_WARN("fail to print =\" in ns", K(ret)); + } else if (!(format_flag & NO_ENTITY_ESCAPE)) { + if (OB_FAIL(ObXmlParserUtils::escape_xml_text(value, x_buf))) { + LOG_WARN("fail to print text with escape char", K(ret), K(value)); + } + } else if (OB_FAIL(x_buf.append(value))) { + LOG_WARN("fail to print value in ns", K(ret), K(value)); + } + + if (OB_SUCC(ret) && OB_FAIL(x_buf.append("\""))) { + LOG_WARN("fail to print \" in ns", K(ret)); + } + + return ret; +} +int ObIMulModeBase::print_pi(ObStringBuffer& x_buf, uint32_t format_flag) +{ + INIT_SUCC(ret); + ObString key; + ObString value; + if (format_flag & ObXmlFormatType::HIDE_PI) { + // do nothing + } else { + if (OB_FAIL(get_key(key))) { + LOG_WARN("fail to print =\" in attr", K(ret)); + } else if (OB_FAIL(get_value(value))) { + LOG_WARN("fail to print =\" in attr", K(ret)); + } else if (OB_FAIL(x_buf.append(""))) { + LOG_WARN("fail to print ?> in attr", K(ret)); + } + } + return ret; +} + +int ObIMulModeBase::print_unparsed(ObStringBuffer& x_buf, ObCollationType charset, uint32_t format_flag, uint64_t size) +{ + INIT_SUCC(ret); + ObString version = get_version(); + ObString encoding = get_encoding(); + uint16_t standalone = get_standalone(); + if (!(format_flag & ObXmlFormatType::HIDE_PROLOG) && has_flags(XML_DECL_FLAG)) { + if (OB_FAIL(x_buf.append("\n"))) { + LOG_WARN("failed to print ?>", K(ret)); + } + } + } + + int64_t num_children = count(); + ObXmlBin tmp_bin; + + for (int64_t i = 0; OB_SUCC(ret) && i < num_children; i ++) { + ObIMulModeBase* cur = at(i, &tmp_bin); + if (OB_ISNULL(cur)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("failed to get child from element", K(ret), K(i)); + } else if (i > 0 && OB_FAIL(x_buf.append(" "))) { + LOG_WARN("failed to append space", K(ret)); + } else if (cur->type() == M_TEXT || (cur->type() == M_ELEMENT && cur->get_unparse())) { + // unparsed element skip print newline and indent + } else if ((format_flag & ObXmlFormatType::NEWLINE_AND_INDENT) && i > 0 + && OB_FAIL(ObXmlUtil::append_newline_and_indent(x_buf, 0, size))) { + LOG_WARN("failed to add is_pretty", K(ret), K(size)); + } + + if (OB_SUCC(ret) && OB_FAIL(cur->print_xml(x_buf, format_flag, 0, size))) { + LOG_WARN("failed to print child in element", K(ret), K(i)); + } + } + + return ret; +} + +int ObIMulModeBase::print_document(ObStringBuffer& x_buf, ObCollationType charset, uint32_t format_flag, uint64_t size, ObNsSortedVector* ns_vec) +{ + INIT_SUCC(ret); + ObString version = get_version(); + ObString encoding = get_encoding(); + uint16_t standalone = get_standalone(); + bool need_newline_end = true; + + if (!(format_flag & ObXmlFormatType::HIDE_PROLOG) && has_flags(XML_DECL_FLAG)) { + if (OB_FAIL(x_buf.append("\n"))) { + LOG_WARN("failed to print ?>", K(ret)); + } + } + } + + if (OB_SUCC(ret)) { + int64_t num_children = attribute_count(); + ObXmlBin tmp_bin; + + for (int64_t i = 0; OB_SUCC(ret) && i < num_children; i++) { + ObIMulModeBase* cur = attribute_at(i, &tmp_bin); + if (OB_ISNULL(cur)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("failed to get child from element", K(ret), K(i)); + } else if ((format_flag & ObXmlFormatType::NEWLINE_AND_INDENT) && i > 0 + && OB_FAIL(ObXmlUtil::append_newline_and_indent(x_buf, 0, size))) { + LOG_WARN("failed to add is_pretty", K(ret), K(size)); + } else if (OB_FAIL(cur->print_xml(x_buf, format_flag, 0, size))) { + LOG_WARN("failed to print child in element", K(ret), K(i)); + } + } + } + + if (OB_SUCC(ret)) { + int64_t num_children = count(); + ObXmlBin tmp_bin; + + for (int64_t i = 0; OB_SUCC(ret) && i < num_children; i++) { + ObIMulModeBase* cur = at(i, &tmp_bin); + if (OB_ISNULL(cur)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("failed to get child from element", K(ret), K(i)); + } else if (cur->type() == M_TEXT || (cur->type() == M_ELEMENT && cur->get_unparse())) { + // unparsed element skip print newline and indent + } else if ((format_flag & ObXmlFormatType::NEWLINE_AND_INDENT) && i > 0 + && OB_FAIL(ObXmlUtil::append_newline_and_indent(x_buf, 0, size))) { + LOG_WARN("failed to add is_pretty", K(ret), K(size)); + } + + if (OB_FAIL(ret)) { + } else if (OB_FAIL(cur->print_xml(x_buf, format_flag, 0, size, ns_vec))) { + LOG_WARN("failed to print child in element", K(ret), K(i)); + } else if (num_children - 1 == i) { + need_newline_end = !(cur->type() == M_TEXT || (cur->type() == M_ELEMENT && cur->get_unparse())); + } + } + } + + if (OB_SUCC(ret) && need_newline_end && (format_flag & ObXmlFormatType::NEWLINE) && OB_FAIL(x_buf.append("\n"))) { + LOG_WARN("failed to print \n", K(ret)); + } + return ret; +} + +int ObIMulModeBase::print_content(ObStringBuffer& x_buf, bool with_encoding, bool with_version, uint32_t format_flag, ParamPrint ¶m_list, ObNsSortedVector* ns_vec) +{ + INIT_SUCC(ret); + bool need_newline_end = true; + + if (with_encoding || with_version) { + if (OB_FAIL(x_buf.append(""))) { + LOG_WARN("failed to print ?>", K(ret)); + } else if ((format_flag & ObXmlFormatType::NEWLINE_AND_INDENT) && + OB_FAIL(ObXmlUtil::append_newline_and_indent(x_buf, 0, param_list.indent))) { + LOG_WARN("fail to add newline and indent", K(ret)); + } + } + } + + if (OB_SUCC(ret)) { + int64_t num_children = attribute_count(); + ObXmlBin tmp_bin; + + for (int64_t i = 0; OB_SUCC(ret) && i < num_children; i++) { + ObIMulModeBase* cur = attribute_at(i, &tmp_bin); + if (OB_ISNULL(cur)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("failed to get child from element", K(ret), K(i)); + } else if ((format_flag & ObXmlFormatType::NEWLINE_AND_INDENT) && i > 0 + && OB_FAIL(ObXmlUtil::append_newline_and_indent(x_buf, 0, param_list.indent))) { + LOG_WARN("failed to add is_pretty", K(ret)); + } else if (OB_FAIL(cur->print_xml(x_buf, format_flag, 0, param_list.indent))) { + LOG_WARN("failed to print child in element", K(ret), K(i)); + } + } + } + + if (OB_SUCC(ret)) { + ObIMulModeBase* cur = nullptr; + int64_t num_children = count(); + ObXmlBin tmp_bin; + + for (int64_t i = 0; OB_SUCC(ret) && i < num_children; i ++) { + cur = at(i, &tmp_bin); + if (OB_ISNULL(cur)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("failed to get child from element", K(ret), K(i)); + } else if (cur->type() == M_TEXT || (cur->type() == M_ELEMENT && cur->get_unparse())) { + // unparsed element skip print newline and indent + } else if ((format_flag & ObXmlFormatType::NEWLINE_AND_INDENT) + && i > 0 + && OB_FAIL(ObXmlUtil::append_newline_and_indent(x_buf, 0, param_list.indent))) { + LOG_WARN("failed to add is_pretty", K(ret)); + } + + if (OB_FAIL(ret)) { + } else if (OB_FAIL(cur->print_xml(x_buf, format_flag, 0, param_list.indent, ns_vec))) { + LOG_WARN("failed to print child in element", K(ret), K(i)); + } else if (num_children - 1 == i) { + need_newline_end = !(cur->type() == M_TEXT || (cur->type() == M_ELEMENT && cur->get_unparse())); + } + } + } + + if (OB_SUCC(ret) && need_newline_end && (format_flag & ObXmlFormatType::NEWLINE) && OB_FAIL(x_buf.append("\n"))) { + LOG_WARN("failed to print \n", K(ret)); + } + return ret; +} + +int ObIMulModeBase::print_cdata(ObStringBuffer& x_buf, uint32_t format_flag) +{ + INIT_SUCC(ret); + ObString value; + if (OB_FAIL(get_value(value))) { + LOG_WARN("fail to get value.", K(ret)); + } else if (format_flag & ObXmlFormatType::PRINT_CDATA_AS_TEXT) { + if (ObXmlParserUtils::escape_xml_text(value, x_buf)) { + LOG_WARN("fail to print escape text", K(ret)); + } + } else if (OB_FAIL(x_buf.append(""))) { + LOG_WARN("fail to print ]]> in attr", K(ret)); + } + return ret; +} + +int ObIMulModeBase::print_comment(ObStringBuffer& x_buf, uint32_t format_flag) +{ + UNUSED(format_flag); + INIT_SUCC(ret); + ObString value; + + if (OB_FAIL(get_value(value))) { + LOG_WARN("fail to get value.", K(ret)); + } else if (OB_FAIL(x_buf.append(""))) { + LOG_WARN("fail to print --> in attr", K(ret)); + } + return ret; +} + +int ObIMulModeBase::print_text(ObStringBuffer& x_buf, uint32_t format_flag) +{ + INIT_SUCC(ret); + ObString value; + if (OB_FAIL(get_value(value))) { + LOG_WARN("fail to get value.", K(ret)); + } else if (!(format_flag & NO_ENTITY_ESCAPE) && !lib::is_mysql_mode()) { + if (OB_FAIL(ObXmlParserUtils::escape_xml_text(value, x_buf))) { + LOG_WARN("fail to print text with escape char", K(ret), K(value)); + } + } else if (OB_FAIL(x_buf.append(value))) { + LOG_WARN("fail to print text", K(ret), K(value)); + } + return ret; +} + +int ObIMulModeBase::print_element(ObStringBuffer& x_buf, uint64_t depth, uint32_t format_flag, uint64_t size, ObNsSortedVector* ns_vec) +{ + INIT_SUCC(ret); + bool is_unparse = get_unparse(); + int64_t num_children = this->size(); + ObIMulModeBase* cur = nullptr; + ObString key; + ObXmlBin tmp_bin; + int attributes_count = 0; + format_flag = is_unparse ? (format_flag | NO_ENTITY_ESCAPE) : (format_flag & ~NO_ENTITY_ESCAPE); + // duplicate ns that defined in this element should be delete + // because namespaces with the same key are subject to the latest + // but this definition is only valid in this element and its descendant + // so, restore ns vec when finish printing this element, in case its sibling loses ns definition + ObVector deleted_ns_vec; + if (OB_FAIL(get_key(key))) { + LOG_WARN("fail get key of element", K(ret)); + } else if (is_unparse && key.empty() && OB_FAIL(get_node_count(ObMulModeNodeType::M_ATTRIBUTE, attributes_count))) { + LOG_WARN("get attributes count failed", K(ret)); + } else if (is_unparse && key.empty() && attributes_count == 0) { + + for (int64_t i = 0; OB_SUCC(ret) && i < num_children; i++) { + cur = at(i, &tmp_bin); + if (OB_ISNULL(cur)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("failed to get child from element", K(ret), K(i)); + } else if (OB_FAIL(cur->print_xml(x_buf, format_flag, (depth + 1), size, ns_vec))) { + LOG_WARN("failed to print child in element", K(ret), K(i)); + } + } + } else if (OB_NOT_NULL(ns_vec) && OB_FAIL(ObXmlUtil::delete_dup_ns_definition(this, *ns_vec, deleted_ns_vec))) { + LOG_WARN("fail to delete dup ns definition", K(ret)); + } else { + ObString prefix = get_prefix(); + if (OB_FAIL(x_buf.append("<"))) { + LOG_WARN("fail to print < in element", K(ret)); + } else if (OB_FAIL(ObXmlUtil::append_qname(x_buf, prefix, key))) { + LOG_WARN("fail to print tag in element", K(ret), K(prefix), K(key)); + } else if (OB_NOT_NULL(ns_vec) + && (OB_FAIL(ObXmlUtil::add_ns_def_if_necessary(format_flag, x_buf, prefix, ns_vec, deleted_ns_vec)) + || OB_FAIL(ObXmlUtil::add_attr_ns_def(this, format_flag, x_buf, ns_vec, deleted_ns_vec)))) { + LOG_WARN("fail to add ns definition of prefix", K(ret)); + } + + if (OB_SUCC(ret)) { + ObIMulModeBase* cur = nullptr; + int64_t num_children = attribute_size(); + + for (int64_t i = 0; OB_SUCC(ret) && i < num_children; i ++) { + cur = attribute_at(i, &tmp_bin); + if (OB_ISNULL(cur)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("failed to get child from element", K(ret), K(i)); + } else if (OB_FAIL(x_buf.append(" "))) { + LOG_WARN("failed to append space", K(ret)); + } else if (OB_FAIL(cur->print_xml(x_buf, format_flag, 0, size))) { + LOG_WARN("failed to print child in element", K(ret), K(i)); + } + } + } + + bool is_empty = get_is_empty(); + int64_t num_children = this->size(); + ObMulModeNodeType last_node_type; + ObIMulModeBase* last_node = nullptr; + + if (OB_FAIL(ret)) { + } else if ((format_flag & ObXmlFormatType::MERGE_EMPTY_TAG) && (is_empty || num_children == 0)) { + if (OB_FAIL(x_buf.append("/>"))) { + LOG_WARN("fail to print />", K(ret)); + } + } else if (OB_FAIL(x_buf.append(">"))) { + LOG_WARN("fail to print >", K(ret)); + } else { + ObIMulModeBase* cur = nullptr; + ObMulModeNodeType prev_node_type, cur_node_type; + + for (int64_t i = 0; OB_SUCC(ret) && i < num_children; i++) { + cur = at(i, &tmp_bin); + if (OB_ISNULL(cur)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("failed to get child from element", K(ret), K(i)); + } else if (FALSE_IT(cur_node_type = cur->type())) { + } else if ((format_flag & ObXmlFormatType::NEWLINE_AND_INDENT) + && !((cur_node_type == ObMulModeNodeType::M_TEXT + || cur_node_type == ObMulModeNodeType::M_CDATA) + || ((i > 0) && (prev_node_type == ObMulModeNodeType::M_TEXT + || prev_node_type == ObMulModeNodeType::M_CDATA))) + && OB_FAIL(ObXmlUtil::append_newline_and_indent(x_buf, depth + 1, size))) { + LOG_WARN("failed to add is_pretty", K(ret), K(depth), K(size)); + } else if (OB_FAIL(cur->print_xml(x_buf, format_flag, depth + 1, size, ns_vec))) { + LOG_WARN("failed to print child in element", K(ret), K(i)); + } else { + prev_node_type = cur->type(); + last_node_type = prev_node_type; + } + } + } + + if (OB_FAIL(ret)) { + } else if ((format_flag & ObXmlFormatType::MERGE_EMPTY_TAG) && (is_empty || num_children == 0)) { // error or empty do nothing + } else if ((format_flag & ObXmlFormatType::NEWLINE_AND_INDENT) && last_node_type != ObMulModeNodeType::M_TEXT + && last_node_type != ObMulModeNodeType::M_CDATA + && OB_FAIL(ObXmlUtil::append_newline_and_indent(x_buf, depth, size))) { + LOG_WARN("failed to add is_pretty", K(ret), K(depth), K(size)); + } else if (OB_FAIL(x_buf.append(""))) { + LOG_WARN("fail to print > in element", K(ret)); + } + } + + if (deleted_ns_vec.size() > 0 && OB_NOT_NULL(ns_vec) + && OB_FAIL(ObXmlUtil::restore_ns_vec(ns_vec, deleted_ns_vec))) { + LOG_WARN("fail to restore ns vec", K(ret)); + } + deleted_ns_vec.clear(); + return ret; +} + + +} // namespace common +} // namespace oceanbase diff --git a/deps/oblib/src/lib/xml/ob_multi_mode_interface.h b/deps/oblib/src/lib/xml/ob_multi_mode_interface.h new file mode 100644 index 0000000000..021d89a5bd --- /dev/null +++ b/deps/oblib/src/lib/xml/ob_multi_mode_interface.h @@ -0,0 +1,614 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + * This file contains interface define for the multi mode type data abstraction. + */ + +#ifndef OCEANBASE_SQL_OB_MULTI_MODE_INTERFACE +#define OCEANBASE_SQL_OB_MULTI_MODE_INTERFACE + +#include "lib/json_type/ob_json_common.h" +#include "lib/number/ob_number_v2.h" // for number::ObNumber +#include "lib/allocator/page_arena.h" +#include "lib/container/ob_vector.h" + +namespace oceanbase { +namespace common { + +const uint16_t MUL_MODE_SINGLE_LELVEL_COUNT = 256; + +enum ObMulModeNodeType +{ + /** + * attention !!! + * inline data type must added at(0 ~ 127) !!! + * often used data type added at(0 ~ 127) + * rarely used data new type added at(129,unlimited) + * At the same time increase the type in g_mul_mode_tc + * */ + + // the following M_NULL ~ M_OPAQUE is for json type + M_NULL, + M_DECIMAL, + M_INT, + M_UINT, + M_DOUBLE, + M_STRING, // 5 + M_OBJECT, // + M_ARRAY, // + M_BOOLEAN, + M_DATE, + M_TIME, // 10 + M_DATETIME, + M_TIMESTAMP, + M_OPAQUE, + // reserve some for json + // the following is for xml type + M_UNPARESED_DOC, //14 + M_UNPARSED, // 15 + M_DOCUMENT, // + M_CONTENT, // + M_ELEMENT, // 18 + M_ATTRIBUTE, // 19 + M_NAMESPACE, + M_INSTRUCT, + M_TEXT, // 22 + M_COMMENT, + M_CDATA, // 24 + M_ENTITY, + M_ENTITY_REF, // 26 + M_DTD, // 27 + M_MAX_TYPE, + + /** + * attention !!! + * often used data type added at(0 ~ 127) + */ + + M_EXTENT_LEVEL2 = 125, + M_EXTENT_LEVEL1 = 126, + M_EXTENT_LEVEL0 = 127, + + M_EXTENT_BEGIN0 = 128, + // attention !!! + // rarely used data new type add here !!! + + M_EXTENT_BEGIN1 = M_EXTENT_BEGIN0 + MUL_MODE_SINGLE_LELVEL_COUNT, + M_EXTENT_BEGIN2 = M_EXTENT_BEGIN1 + MUL_MODE_SINGLE_LELVEL_COUNT, +}; + +enum scan_type { + PRE_ORDER, + POST_ORDER +}; + +static constexpr int OB_XML_PARSER_MAX_DEPTH_ = 1000; + +enum ObMulModeNodeFlag: uint64_t { + XML_DECL_FLAG = 0x1, + XML_ENCODING_EMPTY_FLAG = 0x2 +}; + +typedef struct ObParameterPrint +{ + ObString encode; + ObString version; + int indent; + ObParameterPrint() : encode(), version(), indent(0) {} +} ParamPrint; + +struct ObXmlConstants { + static constexpr const char* XML_STRING = "xml"; + static constexpr const char* XMLNS_STRING = "xmlns"; + static constexpr const char* XML_NAMESPACE_SPECIFICATION_URI = "http://www.w3.org/XML/1998/namespace"; +}; + +enum ObXmlFormatType: uint32_t { + NO_FORMAT = 0, + MERGE_EMPTY_TAG = 1 << 0, + NEWLINE = 1 << 1, + INDENT = 1 << 2, + HIDE_PROLOG = 1 << 3, + HIDE_PI = 1 << 4, + PRINT_CDATA_AS_TEXT = 1 << 5, + NO_ENTITY_ESCAPE = 1 << 6, + NEWLINE_AND_INDENT = NEWLINE | INDENT, + WITH_FORMAT = MERGE_EMPTY_TAG | NEWLINE_AND_INDENT, +}; + +enum ObNodeMemType : int8_t { + BINARY_TYPE = 1, + TREE_TYPE, +}; + +enum ObNodeDataType: int8_t { + OB_XML_TYPE = 0, + OB_JSON_TYPE, + OB_PATH_TYPE +}; + +class ObIMulModeBase; +struct ObNodeMetaData { + ObNodeMetaData(ObNodeMemType m_type, ObNodeDataType data_type) + : m_type_(m_type), + data_type_(data_type) + {} + + ObNodeMetaData(const ObNodeMetaData& meta) + : m_type_(meta.m_type_), + data_type_(meta.data_type_) + {} + + ObNodeMemType m_type_; + ObNodeDataType data_type_; +} ; + +struct ObLibTreeNodeBase; +class ObNsPair; +typedef PageArena LibTreeModuleArena; +typedef common::ObVector ObLibTreeNodeVector; +typedef common::ObSortedVector ObNsSortedVector; + +struct ObMulModeMemCtx { + ObIAllocator *allocator_; + ModulePageAllocator page_allocator_; + LibTreeModuleArena mode_arena_; +}; + +class ObMulModeFilter { +public: + ObMulModeFilter() {} + ~ObMulModeFilter() {} + virtual int operator()(ObIMulModeBase* cur, bool& filtered) = 0; +}; + + +template +class ObStack { +public: + ObStack(common::ObIAllocator *allocator, int stack_size = 2, int extend_step = 2) + : buffer_(allocator), + total_(0), + pos_(0), + init_size_(stack_size), + extend_step_(extend_step) + { + if (OB_NOT_NULL(allocator)) { + int64_t length = stack_size * sizeof(T); + int ret = buffer_.reserve(length); + if (OB_SUCC(ret)) { + total_ = stack_size; + buffer_.set_length(length); + } else { + OB_LOG(WARN, "failed to construct ObStack", K(ret), K(length)); + } + } + } + + ObStack(const ObStack& from) + : buffer_(from.buffer_.get_allocator()), + total_(0), + pos_(0), + init_size_(from.init_size_), + extend_step_(from.extend_step_) + { + int64_t length = from.total_ * sizeof(T); + int ret = buffer_.reserve(length); + if (OB_SUCC(ret)) { + buffer_.set_length(length); + total_ = from.total_; + pos_ = from.pos_; + for (int16_t i = 0; i < pos_; ++i) { + char* src_buf = const_cast (from.buffer_.ptr()) + i * sizeof(T); + char* dst_buf = const_cast (buffer_.ptr()) + i * sizeof(T); + new (dst_buf) T(*reinterpret_cast(src_buf)); + } + } else { + OB_LOG(WARN, "failed to construct ObStack", K(ret), K(length));; + } + } + + ObStack& operator=(const ObStack& from) + { + new (this) ObStack(from); + return *this; + } + + int push(const T& iter) + { + INIT_SUCC(ret); + int32_t node_size = sizeof(T); + + if (total_ <= pos_) { + if (OB_FAIL(extend())) { + OB_LOG(WARN, "failed to extend", K(ret)); + } + } + + if (OB_SUCC(ret)) { + char* write_buf = buffer_.ptr() + pos_* node_size; + new (write_buf) T(iter); + pos_++; + } + + return ret; + } + + int extend() + { + INIT_SUCC(ret); + int64_t length = buffer_.length(); + int64_t extend_size = extend_step_ * sizeof(T); + if (OB_FAIL(buffer_.reserve(extend_size))) { + OB_LOG(WARN, "failed to reserve.", K(ret), K(extend_size)); + } else { + total_ += extend_step_; + length += extend_size; + buffer_.set_length(length); + } + return ret; + } + + T& top() { + int64_t size = count(); + int64_t offset = (size - 1) * sizeof(T); + return *reinterpret_cast(buffer_.ptr() + offset); + } + + void pop() + { + if (!empty()) { + int64_t size = count(); + int64_t offset = (size - 1) * sizeof(T); + T* tmp = reinterpret_cast(buffer_.ptr() + offset); + tmp->~T(); + + --pos_; + } + } + + T& at(int64_t pos) + { + int32_t size = pos_; + OB_ASSERT(pos < size); + + int64_t offset = pos * sizeof(T); + T* tmp = reinterpret_cast(buffer_.ptr() + offset); + return *tmp; + } + + int set(int64_t pos, const T& iter) + { + INIT_SUCC(ret); + int32_t node_size = sizeof(T); + int32_t size = pos_; + OB_ASSERT(pos < size); + int64_t offset = pos * node_size; + + if (OB_SUCC(ret)) { + char* write_buf = buffer_.ptr() + offset; + new (write_buf) T(iter); + } + + return ret; + } + + bool empty() { return pos_ == 0; } + int64_t size() { return pos_; } + int64_t count() { return size(); } + void reset() { pos_ = 0; } +private: + ObStringBuffer buffer_; + int32_t total_; + int32_t pos_; + int32_t init_size_; + int32_t extend_step_; +}; + +class ObPathPool { +public: + ObPathPool() : freelist_(nullptr), is_inited_(false) {} + ~ObPathPool() {reset();} + int init(int64_t obj_size, ObIAllocator *alloc); + void *alloc(); + void free(void *obj); + void reset(); + uint64_t get_free_count() {return free_count_;} + uint64_t get_in_use_count() {return in_use_count_;} + uint64_t get_total_count() {return total_count_;} +private: + void *freelist_pop(); + void freelist_push(void *obj); + void alloc_new_node(); +private: + struct FreeNode + { + FreeNode *next_; + }; +private: + // data members + int64_t obj_size_; + uint64_t in_use_count_; + uint64_t free_count_; + uint64_t total_count_; + ObIAllocator *alloc_; + FreeNode *freelist_; + bool is_inited_; +}; + +class ObIMulModeBase { +public: + ObIMulModeBase(ObNodeMemType mem_type, + ObNodeDataType data_type, + ObIAllocator *allocator) + : meta_(mem_type, data_type), + allocator_(allocator) + {} + + ObIMulModeBase(ObNodeMemType mem_type, + ObNodeDataType data_type) + : meta_(mem_type, data_type), + allocator_(nullptr) + {} + + virtual ~ObIMulModeBase() + {} + + ObIAllocator *get_allocator() { return allocator_; } + + virtual void set_allocator(ObIAllocator* allocator) { allocator_ = allocator; } + + ObNodeDataType data_type() { return meta_.data_type_; } + + /** + * node type, json xml together enum + */ + virtual ObMulModeNodeType type() const = 0; + + bool is_binary() const { return meta_.m_type_ == BINARY_TYPE; } + + bool is_tree() const { return meta_.m_type_ == TREE_TYPE; } + + /** + * children number under current node + */ + virtual int64_t size() = 0; + + /** + * children number under current node + * the same as size() + */ + virtual int64_t count() = 0; + + /** + * children number under current node + */ + virtual int64_t attribute_size() = 0; + + /** + * children number under current node + * the same as size() + */ + virtual int64_t attribute_count() = 0; + + /** + * curent is node is tree, or is storage type for example json type , xml type + */ + ObNodeMemType get_internal_type() const { return meta_.m_type_; }; + + /** + * for search + * current node key name is consistent with input key string + */ + virtual int compare(const ObString& key, int& res) = 0; + + /** + * under current node + * get specified element's key string + */ + virtual int get_key(ObString& res, int64_t index = -1) = 0; + + /** + * under current node + * get specified element's value string + */ + virtual int get_value(ObString& value, int64_t index = -1) = 0; + + /** + * get speicified position element pointer under current node + */ + virtual int get_value(ObIMulModeBase*& value, int64_t index = -1) = 0; + + /** + * get namespace for element + */ + virtual int get_ns_value(ObStack& stk, ObString &ns_value, ObIMulModeBase* extend) = 0; + + virtual int get_ns_value(const ObString& prefix, ObString& ns_value, int& ans_idx) = 0; + + /** + * get speicified element pointer under current node + */ + virtual ObIMulModeBase* at(int64_t pos, ObIMulModeBase* buffer = nullptr) = 0; + + virtual ObIMulModeBase* attribute_at(int64_t pos, ObIMulModeBase* buffer = nullptr) = 0; + + /** + * get all children member under current node + */ + virtual int get_children(ObIArray &res, ObMulModeFilter* filter = nullptr) = 0; + + /** + * get all childrent member under current node, whose key string is equal with input key stirng + */ + virtual int get_children(const ObString& key, ObIArray& res, ObMulModeFilter* filter = nullptr) = 0; + + + /** + * get ObNodeMemType=node_type count + */ + virtual int get_node_count(ObMulModeNodeType node_type, int &count) = 0; + + virtual int get_attribute(ObIArray& res, ObMulModeNodeType filter_type, int32_t flags = 0) = 0; + + virtual int get_attribute(ObIMulModeBase*& res, ObMulModeNodeType filter_type, const ObString& key1, const ObString &key2 = ObString()) = 0; + + virtual int get_raw_binary(common::ObString &out, ObIAllocator *allocator) = 0; + /** + * just append new member children under current node + * the same as insert at last + */ + virtual int append(ObIMulModeBase* node) = 0; + + /** + * add new member at specified postition element + * if pos is larger than size, do append at last + * if smaller than 0, insert at 0 position + */ + virtual int insert(int64_t pos, ObIMulModeBase* node) = 0; + + /** + * remove specified member at specified postion, under current node + * iff pos larger than size(), or smaller than 0, return error code + */ + virtual int remove(int64_t pos) = 0; + + /** + * remove speicfied node from current node + * return error code if node not exists + */ + virtual int remove(ObIMulModeBase* node) = 0; + + /** + * update specified postion member under current node + * if pos is too large or smaller, return error + */ + virtual int update(int64_t pos, ObIMulModeBase* new_node) = 0; + + /** + * update specified postion member under current node + * return error code iff old_node not found + */ + virtual int update(ObIMulModeBase* old_node, ObIMulModeBase* new_node) = 0; + + // serialize as text string,json xml both require + // format_flag: use 32 bit to indicate the print format, the specific meaning of each bit decided by each data type + virtual int print(ObStringBuffer& x_buf, uint32_t format_flag, uint64_t depth = 0, uint64_t size = 0, ObCollationType charset = CS_TYPE_INVALID); + + /** + * for node compare + */ + // virtual int compare(const ObIMulModeBase &other, int &res) = 0; + + int64_t to_string(char *buf, const int64_t buf_len) const { + int64_t pos = 0; + databuff_printf(buf, buf_len, pos, "data_type = %d, m_type_=%d", meta_.data_type_, meta_.m_type_); + return pos; + } + + virtual ObMulModeMemCtx* get_mem_ctx() { return nullptr; } + + virtual int print_xml(ObStringBuffer& x_buf, uint32_t format_flag, uint64_t depth, uint64_t size, ObNsSortedVector* ns_vec = nullptr, ObCollationType charset = CS_TYPE_INVALID); + virtual int print_attr(ObStringBuffer& x_buf, uint32_t format_flag); + virtual int print_ns(ObStringBuffer& x_buf, uint32_t format_flag); + virtual int print_pi(ObStringBuffer& x_buf, uint32_t format_flag); + virtual int print_cdata(ObStringBuffer& x_buf, uint32_t format_flag); + virtual int print_comment(ObStringBuffer& x_buf, uint32_t format_flag); + virtual int print_text(ObStringBuffer& x_buf, uint32_t format_flag); + virtual int print_document(ObStringBuffer& x_buf, ObCollationType charset, uint32_t format_flag, uint64_t size = 2, ObNsSortedVector* ns_vec = nullptr); + virtual int print_unparsed(ObStringBuffer& x_buf, ObCollationType charset, uint32_t format_flag, uint64_t size = 2); + virtual int print_content(ObStringBuffer& x_buf, bool with_encoding, bool with_version, uint32_t format_flag, ParamPrint ¶m_list, ObNsSortedVector* ns_vec = nullptr); + virtual int print_element(ObStringBuffer& x_buf, uint64_t depth, uint32_t format_flag, uint64_t size = 2, ObNsSortedVector* ns_vec = nullptr); + + virtual ObString get_version() = 0; + virtual ObString get_encoding() = 0; + virtual ObString get_prefix() = 0; + virtual uint16_t get_standalone() = 0; + virtual void set_standalone(uint16_t) = 0; + virtual uint16_t get_encoding_flag() = 0; + virtual uint16_t has_xml_decl() = 0; + virtual uint16_t is_unparse() = 0; + virtual ObIMulModeBase* get_attribute_handle() = 0; + + virtual bool get_is_empty() = 0; + virtual bool has_flags(ObMulModeNodeFlag flag) = 0; + virtual bool get_unparse() = 0; + virtual bool is_equal_node(const ObIMulModeBase* other) = 0; + virtual bool is_node_before(const ObIMulModeBase* other) = 0; + virtual bool check_extend() = 0; + virtual bool check_if_defined_ns() = 0; + + virtual int64_t get_serialize_size(); + // eval interface + /** + * get boolean + */ + virtual bool get_boolean(); + /** + * get double + */ + virtual double get_double(); + /** + * get float + */ + virtual float get_float(); + /** + * get int + */ + virtual int64_t get_int(); + /** + * get uint + */ + virtual uint64_t get_uint(); + /** + * get data + */ + virtual const char *get_data(); + /*** + * get data length + */ + virtual uint64_t get_data_length(); + /** + * get number + */ + virtual number::ObNumber get_decimal_data(); + /** + * get decimal precision + */ + virtual ObPrecision get_decimal_precision(); + /** + * get decimal scale + */ + virtual ObScale get_decimal_scale(); + + virtual ObTime get_time(); + + /* // for cast + * virtual int to_int(int64_t &value, bool check_range = false, bool force_convert = false) const = 0; + * virtual int to_uint(uint64_t &value, bool fail_on_negative = false, bool check_range = false) const = 0; + * virtual int to_double(double &value) const = 0; + * virtual int to_number(ObIAllocator *allocator, number::ObNumber &number) const = 0; + * virtual int to_datetime(int64_t &value, ObTimeConvertCtx *cvrt_ctx_t = nullptr) const = 0; + * virtual int to_date(int32_t &value) const = 0; + * virtual int to_otimestamp(common::ObOTimestampData &value, ObTimeConvertCtx *cvrt_ctx = nullptr) const = 0; + * virtual int to_time(int64_t &value) const = 0; + * virtual int to_bit(uint64_t &value) const = 0; + **/ + +protected: + ObNodeMetaData meta_; + ObIAllocator *allocator_; + +}; + +} // namespace common +} // namespace oceanbase + +#endif // OCEANBASE_SQL_OB_MULTI_MODE_INTERFACE \ No newline at end of file diff --git a/deps/oblib/src/lib/xml/ob_path_parser.cpp b/deps/oblib/src/lib/xml/ob_path_parser.cpp new file mode 100644 index 0000000000..65424972e3 --- /dev/null +++ b/deps/oblib/src/lib/xml/ob_path_parser.cpp @@ -0,0 +1,2329 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + * This file contains implementation support for the XML path abstraction. + */ + +#define USING_LOG_PREFIX SQL_RESV +#include "lib/xml/ob_xpath.h" +#include "lib/xml/ob_path_parser.h" +#include "lib/xml/ob_xml_util.h" +#include "lib/string/ob_sql_string.h" +#include "lib/ob_errno.h" +#include "lib/string/ob_string.h" +#include "rpc/obmysql/ob_mysql_global.h" // DOUBLE_TO_STRING_CONVERSION_BUFFER_SIZE +#include "common/data_buffer.h" +#include +#include + +namespace oceanbase { +namespace common { + +bool ObPathParserUtil::is_xml_name_start_char(const char ch) { + int ret_bool = false; + if (isalpha(ch) || (ch == ObPathItem::UNDERLINE)) { + ret_bool = true; + } + return ret_bool; +} + +bool ObPathParserUtil::is_end_of_xpathkeyword(const char ch) { + int ret_bool = false; + if (ObXPathUtil::is_whitespace(ch) + || ch == ObPathItem::COLON /* namespace or axis */ + || ch == ObPathItem::BRACE_START /* node(), text() or function */ + || ch == ObPathItem::BEGIN_ARRAY /*filter*/ ) { + ret_bool = true; + } + return ret_bool; +} + +bool ObPathParserUtil::is_xpath_ident_terminator(const char ch) { + int ret_bool = false; + if (ObXPathUtil::is_whitespace(ch) + || ch == ObPathItem::SLASH + || ch == ObPathItem::BRACE_START + || ch == ObPathItem::BEGIN_ARRAY) { + ret_bool = true; + } + return ret_bool; +} + +//use for xmltable transfrom xpath special in resolve +bool ObPathParserUtil::is_xpath_transform_terminator(const char ch) { + int ret_bool = false; + if (ObXPathUtil::is_whitespace(ch) + || ch == ObPathItem::SLASH + || ch == ObPathItem::BEGIN_ARRAY) { + ret_bool = true; + } + return ret_bool; +} + +bool ObPathParserUtil::is_boolean_ans(ObFilterType type) +{ + bool ret_bool = false; + if (type >= ObFilterType::PN_OR_COND && type <= ObFilterType::PN_CMP_GT) { + ret_bool = true; + } + return ret_bool; +} + +bool ObPathParserUtil::is_illegal_comp_for_filter(const ObFilterType& type, ObPathNode* left, ObPathNode* right) +{ + bool ret_bool = false; + if (OB_ISNULL(left) || OB_ISNULL(right)) { + ret_bool = false; + } else { + switch(type) { + case ObFilterType::PN_CMP_ADD: + case ObFilterType::PN_CMP_SUB: + case ObFilterType::PN_CMP_MUL: + case ObFilterType::PN_CMP_DIV: + case ObFilterType::PN_CMP_MOD: { + if ((left->node_type_.is_arg() && left->node_type_.get_arg_type() == ObArgType::PN_STRING) + || (right->node_type_.is_arg() && right->node_type_.get_arg_type() == ObArgType::PN_STRING) + || (left->node_type_.is_filter() && is_boolean_subpath_arg(left)) + || (right->node_type_.is_filter() && is_boolean_subpath_arg(right))) { + ret_bool = true; + } + break; + } + case ObFilterType::PN_CMP_EQUAL: + case ObFilterType::PN_CMP_UNEQUAL: + case ObFilterType::PN_CMP_LE: + case ObFilterType::PN_CMP_LT: + case ObFilterType::PN_CMP_GE: + case ObFilterType::PN_CMP_GT: { + if ((left->node_type_.is_filter() && is_boolean_subpath_arg(left)) + || (right->node_type_.is_filter() && is_boolean_subpath_arg(right))) { + ret_bool = true; + } + break; + } + default: + break; + } + } + return ret_bool; +} + +bool ObPathParserUtil::is_boolean_subpath_arg(ObPathNode* node) +{ + bool ret_bool = false; + if (OB_NOT_NULL(node) && node->node_type_.is_filter() ) { + ObPathFilterNode* filter = static_cast(node); + ret_bool = filter->is_boolean_; + } + return ret_bool; +} + +bool ObPathParserUtil::is_position(ObPathNode* node) +{ + bool ret_bool = false; + if (OB_NOT_NULL(node) && node->node_type_.is_arg() + && node->node_type_.get_arg_type() == ObArgType::PN_SUBPATH) + { + ObPathArgNode* arg = static_cast(node); + ObPathNode* subpath = arg->arg_.subpath_; + if (OB_NOT_NULL(subpath) && subpath->node_type_.is_root() + && subpath->size() == 1) { + ObPathNode* func = static_cast(subpath->member(0)); + if (OB_NOT_NULL(func) && func->node_type_.is_func() + && func->node_type_.get_func_type() == ObFuncType::PN_POSITION) { + ret_bool = true; + } // make sure is function postition + } // make sure only one child + } // make sure is subpath + return ret_bool; +} + +bool ObPathParserUtil::check_is_legal_tagname(const char* name, int length) +{ + bool ret_bool = true; + // An empty string is not a valid identifier. + // todo: use ob decode + if (OB_ISNULL(name)) ret_bool = false; + rapidjson::MemoryStream input_stream(name, length); + unsigned codepoint = 0; + uint64_t last_pos = 0; + + while (ret_bool && (input_stream.Tell() < length)) { + last_pos = input_stream.Tell(); + bool first_codepoint = (last_pos == 0); + if (!rapidjson::UTF8::Decode(input_stream, &codepoint)) { + ret_bool = false; + LOG_WARN_RET(OB_ERR_UNEXPECTED, "fail to decode.", + K(ret_bool), K(codepoint), K(input_stream.Tell()), KCSTRING(name)); + } + + // a unicode letter + uint64_t curr_pos = input_stream.Tell(); + if (ObXPathUtil::is_letter(codepoint, name, last_pos, curr_pos - last_pos) + || codepoint == 0x5F ) { + // letter is ok, _ is ok + } else if (first_codepoint) { + /* + the first character must be one of the above. + more possibilities are available for subsequent characters. + */ + ret_bool = false; + LOG_WARN_RET(OB_ERR_UNEXPECTED, "first character must be _ or letter.", + K(ret_bool), K(codepoint), K(input_stream.Tell()), KCSTRING(name)); + } else if (ObXPathUtil::unicode_combining_mark(codepoint) || isdigit(codepoint) + || ObXPathUtil::is_connector_punctuation(codepoint) + || codepoint == 0x2D || codepoint == 0x2E){ + // - . is ok + } else { + // nope + ret_bool = false; + LOG_WARN_RET(OB_ERR_UNEXPECTED, "not legal xml element name.", + K(ret_bool), K(codepoint), K(input_stream.Tell()), KCSTRING(name)); + } + } + return ret_bool; +} + +bool ObPathParserUtil::is_left_brace(const char ch) +{ + bool ret_bool = false; + if (ch == ObPathItem::BRACE_START || ch == ObPathItem::BEGIN_ARRAY) { + ret_bool = true; + } + return ret_bool; +} + +bool ObPathParserUtil::is_operator(const char ch) +{ + bool ret_bool = false; + switch (ch) { + case '+': + case '-': + case '*': + case '=': + case '!': + case '<': + case '>': + case '|': { + ret_bool = true; + break; + } + default: { + break; + } + } + return ret_bool; +} + +bool ObPathParserUtil::is_function_start_char(const char ch) +{ + bool ret_bool = false; + if (ch == 'b' || ch == 'c' || ch == 'f' + || ch == 'l' || ch == 'n' || ch == 'p' + || ch == 'r' || ch == 's' || ch == 't') { + ret_bool = true; + } + return ret_bool; +} + +bool ObPathParserUtil::is_func_must_in_pred(const ObFuncType& func_type) +{ + bool ret_bool = false; + switch (func_type) { + case ObFuncType::PN_POSITION: + case ObFuncType::PN_LAST: { + ret_bool = true; + break; + } + default: { + break; + } + } + return ret_bool; +} + +int ObPathParser::alloc_path_node(ObPathNode*& node) +{ + INIT_SUCC(ret); + if (OB_ISNULL(allocator_)) { + ret = OB_BAD_NULL_ERROR; + LOG_WARN("should not be null", K(ret)); + } else { + ObPathNode* path_node = + static_cast (allocator_->alloc(sizeof(ObPathNode))); + if (OB_ISNULL(path_node)) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("allocate row buffer failed at path_node", K(ret), K(index_), K(expression_)); + } else { + node = path_node; + } + } + return ret; +} + +int ObPathParser::alloc_root_node(ObPathRootNode*& node) +{ + INIT_SUCC(ret); + if (OB_ISNULL(allocator_)) { + ret = OB_BAD_NULL_ERROR; + LOG_WARN("should not be null", K(ret)); + } else { + ObPathRootNode* root_node = + static_cast (allocator_->alloc(sizeof(ObPathRootNode))); + if (OB_ISNULL(root_node)) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("allocate row buffer failed at path_node", K(ret), K(index_), K(expression_)); + } else { + node = root_node; + } + } + return ret; +} + +int ObPathParser::alloc_filter_op_node(ObPathFilterOpNode*& node) +{ + INIT_SUCC(ret); + if (OB_ISNULL(allocator_)) { + ret = OB_BAD_NULL_ERROR; + LOG_WARN("should not be null", K(ret)); + } else { + ObPathFilterOpNode* new_node = + static_cast (allocator_->alloc(sizeof(ObPathFilterOpNode))); + if (OB_ISNULL(new_node)) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("allocate row buffer failed at path_node", K(ret), K(index_), K(expression_)); + } else { + node = new_node; + } + } + return ret; +} + +int ObPathParser::alloc_location_node(ObPathLocationNode*& node) +{ + INIT_SUCC(ret); + if (OB_ISNULL(allocator_)) { + ret = OB_BAD_NULL_ERROR; + LOG_WARN("should not be null", K(ret)); + } else { + ObPathLocationNode* location_node = + static_cast (allocator_->alloc(sizeof(ObPathLocationNode))); + if (OB_ISNULL(location_node)) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("allocate row buffer failed at location_node", K(ret), K(index_), K(expression_)); + } else { + node = location_node; + location_node->set_ns_info(nullptr, 0); + } + } + return ret; +} + +int ObPathParser::alloc_filter_node(ObPathFilterNode*& node) +{ + INIT_SUCC(ret); + if (OB_ISNULL(allocator_)) { + ret = OB_BAD_NULL_ERROR; + LOG_WARN("should not be null", K(ret)); + } else { + ObPathFilterNode* filter_node = + static_cast (allocator_->alloc(sizeof(ObPathFilterNode))); + if (OB_ISNULL(filter_node)) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("allocate row buffer failed at location_node", K(ret), K(index_), K(expression_)); + } else { + node = filter_node; + } + } + return ret; +} + +int ObPathParser::alloc_func_node(ObPathFuncNode*& node) +{ + INIT_SUCC(ret); + if (OB_ISNULL(allocator_)) { + ret = OB_BAD_NULL_ERROR; + LOG_WARN("should not be null", K(ret)); + } else { + ObPathFuncNode* func_node = + static_cast (allocator_->alloc(sizeof(ObPathFuncNode))); + if (OB_ISNULL(func_node)) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("allocate row buffer failed at location_node", K(ret), K(index_), K(expression_)); + } else { + node = func_node; + } + } + return ret; +} + +int ObPathParser::alloc_arg_node(ObPathArgNode*& node) +{ + INIT_SUCC(ret); + if (OB_ISNULL(allocator_)) { + ret = OB_BAD_NULL_ERROR; + LOG_WARN("should not be null", K(ret)); + } else { + ObPathArgNode* arg_node = + static_cast (allocator_->alloc(sizeof(ObPathArgNode))); + if (OB_ISNULL(arg_node)) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("allocate row buffer failed at location_node", K(ret), K(index_), K(expression_)); + } else { + node = arg_node; + } + } + return ret; +} + +int ObPathParser::check_is_legal_xpath(const ObPathArgType& patharg_type) +{ + INIT_SUCC(ret); + int count = root_node_->size(); + for (int i = 0; i + 1 < count && OB_SUCC(ret); ++i) { + ObPathNode* node1 = static_cast(root_node_->member(i)); + ObPathNode* node2 = static_cast(root_node_->member(i + 1)); + if (node1->node_type_.is_location() && node2->node_type_.is_location()) { + ObPathLocationNode* location1 = static_cast (node1); + ObPathLocationNode* location2 = static_cast (node2); + if (patharg_type == ObPathArgType::NOT_SUBPATH) { + if (location1->get_seek_type() == ObSeekType::TEXT + && location1->get_prefix_ns_info() + && ObPathUtil::is_upper_axis(location2->get_axis())) { + ret = OB_ERR_PARSER_SYNTAX; // ORA-31011: XML parsing failed + LOG_WARN("Function call with invalid number of arguments", K(ret), K(location2->get_axis())); + } + if (OB_FAIL(ret)) { + } else if (location1->has_filter_ + || location1->get_seek_type() != ObSeekType::TEXT + || ObPathUtil::is_upper_axis(location1->get_axis())) { // do not need check + } else if (location1->get_axis() == ObPathNodeAxis::SELF) { + if (ObPathUtil::is_upper_axis(location2->get_axis())) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("Invalid XPATH expression.", K(ret), K(location2->get_axis())); + } + } else if (ObPathUtil::is_down_axis(location2->get_axis())) { + ret = OB_ERR_WRONG_VALUE; + LOG_WARN("Invalid Input.", K(ret), K(location2->get_axis())); + } + } else if (patharg_type == ObPathArgType::IN_FUNCTION) { + if (i == 0 && (location1->get_axis() == ObPathNodeAxis::SELF + || ObPathUtil::is_upper_axis(location1->get_axis()))) { + ret = OB_OP_NOT_ALLOW; + LOG_WARN("Given XPATH expression not supported", K(ret), K(location1->get_axis())); + } else if (location2->get_axis() == ObPathNodeAxis::SELF + || ObPathUtil::is_upper_axis(location2->get_axis())) { + ret = OB_OP_NOT_ALLOW; + LOG_WARN("Given XPATH expression not supported", K(ret), K(location1->get_axis())); + } + } // in filter, do not check + } // not location do not check + } // end for + return ret; +} + +bool ObPathParser::path_prefix_match(const char *str) +{ + bool ret_bool = false; + ObXPathUtil::skip_whitespace(expression_, index_); + if (index_ + strlen(str) <= len_) { + ObString substr(strlen(str), expression_.ptr() + index_); + ret_bool = substr.prefix_match(str); + } + return ret_bool; +} + +// and or div mod +bool ObPathParser::is_prefix_match_letter_operator() +{ + bool ret_bool = false; + if (index_ + 1 < len_) { + switch(expression_[index_]) { + case 'a': { + ret_bool = path_prefix_match(ObPathItem::AND); + if (ret_bool) { + if ((index_ > 1 && (expression_[index_ - 1] == ' ' || isdigit(expression_[index_ - 1]))) + && (index_ + strlen(ObPathItem::AND) < len_ && expression_[index_ + strlen(ObPathItem::AND)] == ' ')) { + ret_bool = true; + } else { + ret_bool = false; + } + } + break; + } + case 'o': { + ret_bool = path_prefix_match(ObPathItem::OR); + if (ret_bool) { + if ((index_ > 1 && (expression_[index_ - 1] == ' ' || isdigit(expression_[index_ - 1]))) + && (index_ + strlen(ObPathItem::OR) < len_ && expression_[index_ + strlen(ObPathItem::OR)] == ' ')) { + ret_bool = true; + } else { + ret_bool = false; + } + } + break; + } + case 'd': { + ret_bool = path_prefix_match(ObPathItem::DIV); + if (ret_bool) { + if ((index_ > 1 && (expression_[index_ - 1] == ' ' || isdigit(expression_[index_ - 1]))) + && (index_ + strlen(ObPathItem::DIV) < len_ && expression_[index_ + strlen(ObPathItem::DIV)] == ' ')) { + ret_bool = true; + } else { + ret_bool = false; + } + } + break; + } + case 'm': { + ret_bool = path_prefix_match(ObPathItem::MOD); + if (ret_bool) { + if ((index_ > 1 && (expression_[index_ - 1] == ' ' || isdigit(expression_[index_ - 1]))) + && (index_ + strlen(ObPathItem::MOD) < len_ && expression_[index_ + strlen(ObPathItem::MOD)] == ' ')) { + ret_bool = true; + } else { + ret_bool = false; + } + } + break; + } + default: { + break; + } + } + } + + return ret_bool; +} + +bool ObPathParser::is_prefix_match_function() +{ + bool ret_bool = false; + uint64_t old_index = index_; + ObFuncType func_type = ObFuncType::PN_FUNC_ERROR; + int ret = parse_func_type(func_type); + if (OB_FAIL(ret) || func_type == ObFuncType::PN_FUNC_ERROR) { + ret_bool = false; + } else { + ret_bool = true; + } + index_ = old_index; + return ret_bool; +} + +bool ObPathParser::is_path_end_with_brace() +{ + bool ret_bool = false; + int end = index_; + int start = len_ - 1; + while (start > end && ObXPathUtil::is_whitespace(expression_[start])) --start; + ret_bool = (expression_[start] == ObPathItem::BRACE_END); + return ret_bool; +} + +bool ObPathParser::is_function_path() +{ + bool ret_bool = false; + ObXPathUtil::skip_whitespace(expression_, index_); + if (ObPathParserUtil::is_function_start_char(expression_[index_]) + && is_path_end_with_brace() && is_prefix_match_function()) { + ret_bool = true; + } + return ret_bool; +} + +/* +'comment' +| 'text' +| 'processing-instruction' +| 'node' +*/ +int ObPathParser::check_nodetest(const ObString& str, ObSeekType& seek_type, char*& arg, uint64_t& arg_len) +{ + INIT_SUCC(ret); + seek_type = ObSeekType::ERROR_SEEK; + if (index_ >= len_ || str.length() < 1) { + ret = OB_ARRAY_OUT_OF_RANGE; + LOG_WARN("wrong path expression", K(ret), K(index_)); + } else { + bool is_nodetest = false; + switch (str[0]) { + case 'c': { + if (str.prefix_match(ObPathItem::COMMENT)) { + seek_type = ObSeekType::COMMENT; + is_nodetest = true; + } + break; + } + case 'n': { + if (str.prefix_match(ObPathItem::NODE)) { + seek_type = ObSeekType::NODES; + is_nodetest = true; + } + break; + } + case 't': { + if (str.prefix_match(ObPathItem::TEXT)) { + seek_type = ObSeekType::TEXT; + is_nodetest = true; + } + break; + } + case 'p': { + if (str.prefix_match(ObPathItem::PROCESSING_INSTRUCTION)) { + seek_type = ObSeekType::PROCESSING_INSTRUCTION; + is_nodetest = true; + ObXPathUtil::skip_whitespace(expression_, index_); + if (index_ + 2 < len_ && is_literal_begin()) { + if (OB_FAIL(get_xpath_literal(arg, arg_len))) { + LOG_WARN("fail to get literal of PROCESSING_INSTRUCTION", K(ret), K(index_)); + } + } + } + break; + } + default: { + is_nodetest = false; + break; + } + } + + if (OB_FAIL(ret)) { + } else if (!is_nodetest) { + seek_type = ObSeekType::ERROR_SEEK; + } else { + ObXPathUtil::skip_whitespace(expression_, index_); + if (index_ < len_ && expression_[index_] == ObPathItem::BRACE_END) { + ++index_; + } else { + ret = OB_INVALID_DATA; + LOG_WARN("must hava ')'", K(ret), K(index_)); + } + } + } + return ret; +} + +int ObPathParser::to_string(ObStringBuffer& str) +{ + INIT_SUCC(ret); + if (OB_ISNULL(root_node_)) { + ret = OB_BAD_NULL_ERROR; + LOG_WARN("should not be null", K(ret), K(str)); + } else { + if (OB_FAIL(root_node_->node_to_string(str))) { + LOG_WARN("fail to string", K(ret), K(str)); + } + } + return ret; +} + +int ObPathParser::parse_path(ObPathArgType patharg_type) +{ + INIT_SUCC(ret); + ObXPathUtil::skip_whitespace(expression_, index_); + if (index_ < len_) { + ObPathNode* path_node = nullptr; + if (parser_type_ == ObParserType::PARSER_XML_PATH) { + if (OB_FAIL(parse_filter_node(path_node, patharg_type))) { + LOG_WARN("parse failed", K(ret), K(index_), K(expression_)); + } else { + root_node_ = path_node; + } + } // TODO: else if (parser_type_ == ObParserType::PARSER_JSON_PATH) + } else { + ret = OB_ERR_NULL_VALUE; + LOG_WARN("path is null", K(ret), K(index_), K(expression_)); + } + return ret; +} + +int ObPathParser::parse_location_path(ObPathArgType patharg_type) +{ + INIT_SUCC(ret); + ObPathRootNode* root_node = nullptr; + if (OB_FAIL(alloc_root_node(root_node))) { + LOG_WARN("allocate row buffer failed at path_node", K(ret), K(index_), K(expression_)); + } else { + root_node = new (root_node) ObPathRootNode(ctx_, parser_type_); + root_node_ = root_node; + } + while (index_ < len_ && OB_SUCC(ret)) { + if (OB_FAIL(parse_xpath_node(patharg_type))) { + bad_index_ = index_; + LOG_WARN("fail to parse Path Expression!", K(ret), K(index_)); + } else { + ObXPathUtil::skip_whitespace(expression_, index_); + } + } // end while + if (OB_FAIL(ret)) { + } else if (OB_FALSE_IT(root_node->contain_relative_path_ = !(root_node->is_abs_subpath()))) { + } else if (OB_FALSE_IT(root_node->is_abs_path_ = root_node->is_abs_subpath())) { + } else if (patharg_type != ObPathArgType::IN_FILTER + && root_node_->size() > 1 && OB_FAIL(check_is_legal_xpath(patharg_type))) { + LOG_WARN("illegal Path Expression!", K(ret), K(index_)); + } else if (root_node->need_trans_ > 0) { + ObPathNode* op_root = nullptr; + if (OB_FAIL(trans_to_filter_op(root_node, root_node->need_trans_, true, op_root))) { + LOG_WARN("Converting location to filter op failed!", K(ret), K(root_node->need_trans_)); + } else { + root_node_ = op_root; + } + } + return ret; +} + +int ObPathParser::trans_to_filter_op(ObPathRootNode*& origin_root, int filter_num, bool is_first, ObPathNode*& op_root) +{ + INIT_SUCC(ret); + int filter_idx = -1; + // left node must be a location node, start with origin_root + // right node could be a location node or another filter_op, if is filter_op, convert later + ObPathRootNode* right = nullptr; + ObPathFilterOpNode* op = nullptr; + if (OB_ISNULL(origin_root)) { + ret = OB_BAD_NULL_ERROR; + LOG_WARN("should not be null", K(ret)); + } else if (OB_FAIL(alloc_filter_op_node(op))) { + LOG_WARN("allocate row buffer failed at path_node", K(ret), K(index_), K(expression_)); + } else { + op = new (op) ObPathFilterOpNode(ctx_, parser_type_); + if (!is_first) { + origin_root->is_abs_path_ = false; + } + op->init_left(origin_root); + } + for (int i = 0; OB_SUCC(ret) && i < origin_root->size() && filter_num > 0;) { + ObPathNode* tmp = static_cast(origin_root->member(i)); + if (OB_ISNULL(tmp)) { + ret = OB_BAD_NULL_ERROR; + LOG_WARN("should not be null", K(ret)); + } else if (tmp->get_node_type().is_location()) { + ObPathLocationNode* location = static_cast(tmp); + // if not the first time of converting, must start search with relative_path + if (i == 0 && !is_first && filter_idx < 0) { + location->is_absolute_ = false; + } + // first location with filter + if (filter_idx < 0 && location->has_filter_) { + filter_idx = i; + for (int j = 0; OB_SUCC(ret) && j < location->size();) { + ObPathNode* tmp_filter = static_cast(location->member(j)); + if (OB_FAIL(op->append_filter(tmp_filter))) { + LOG_WARN("fail to append filter", K(ret)); + } else if (OB_FAIL(location->remove(j))) { + LOG_WARN("fail to remove filter", K(ret)); + } + } + location->has_filter_ = false; + // if the first location with filter is the last location node, right arg of op is null + // and there must only on location with filter + ++i; + if (i == origin_root->size()) { + if (filter_num != 1) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("there must only on location node with filter", K(ret), K(filter_num)); + } else { + op->init_left(origin_root); + --filter_num; + } + } else if (OB_FAIL(alloc_root_node(right))) { + LOG_WARN("fail to alloc right", K(ret)); + } else { + // if not the last location, need a new root for right arg + right = new (right) ObPathRootNode(ctx_, parser_type_); + right->is_abs_path_ = false; + } + } else if (filter_idx >= 0) { // location after filter + if (i != filter_idx + 1 || OB_ISNULL(right)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("wrong idx", K(ret), K(i), K(filter_idx)); + } else if (OB_FAIL(origin_root->remove(i))) { + LOG_WARN("fail to append location", K(ret)); + } else if (OB_FAIL(right->append(location))) { + LOG_WARN("fail to append location", K(ret)); + } else if (right->size() == 1) { + location->is_absolute_ = false; + } + if (origin_root->size() == i) { + --filter_num; + } + } else { + ++i; + } + } else { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("wrong path node type", K(ret)); + } + } // end for + + if (OB_FAIL(ret)) { + } else if (filter_num == 0) { + op->init_right(right); + op_root = op; + } else { + ObPathNode* right_op = nullptr; + if (OB_FAIL(trans_to_filter_op(right, filter_num, false, right_op))) { + LOG_WARN("fail to get right op", K(ret)); + } else if (OB_NOT_NULL(right_op)){ + op->init_right(right_op); + op_root = op; + } + } + return ret; +} + +int ObPathParser::parse_xpath_node(ObPathArgType patharg_type) +{ + INIT_SUCC(ret); + ObXPathUtil::skip_whitespace(expression_, index_); + + if (OB_ISNULL(root_node_)) { + ret = OB_BAD_NULL_ERROR; + LOG_WARN("root is null", K(ret), K(index_), K(expression_)); + } else if (index_ >= len_) { + ret = OB_ARRAY_OUT_OF_RANGE; + LOG_WARN("wrong path expression", K(ret), K(index_)); + } else if (path_prefix_match(ObPathItem::DOUBLE_SLASH)) { + ++index_; + if (OB_FAIL(parse_double_slash_node())) { + LOG_WARN("failed to parse location node.", K(ret), K(index_), K(expression_)); + } + is_first_node_ = false; + } else if (expression_[index_] == ObPathItem::SLASH) { + ++index_; + if (OB_FAIL(parse_location_node(true))) { + LOG_WARN("failed to parse location node.", K(ret), K(index_), K(expression_)); + } + is_first_node_ = false; + } else if (is_first_node_) { + if (OB_FAIL(parse_primary_expr_node(patharg_type))) { + LOG_WARN("failed to parse location node.", K(ret), K(index_), K(expression_)); + } + is_first_node_ = false; + } else { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("failed to parse location node.", K(ret), K(index_), K(expression_)); + } + + return ret; +} + +int ObPathParser::parse_location_node(bool is_absolute) +{ + INIT_SUCC(ret); + ObXPathUtil::skip_whitespace(expression_, index_); + if (index_ >= len_) { + // if is_first_node == true, in this case, the path expression is '/', do nothing and return root + if (!is_first_node_) { + ret = OB_ARRAY_OUT_OF_RANGE; + LOG_WARN("wrong path expression", K(ret), K(index_)); + } + // location node: must be '/' + axis + nodetest + filter + // .. and . have definite axis and nodetest + // if not .. or . , should parse axis first and then nodetest + } else if (path_prefix_match(ObPathItem::DOUBLE_DOT)) { + if (OB_FAIL(parse_double_dot_node(is_absolute))) { + LOG_WARN("fail to parse '..' ", K(ret), K(index_)); + } + } else if (expression_[index_] == ObPathItem::DOT) { + if (OB_FAIL(parse_single_dot_node(is_absolute))) { + LOG_WARN("fail to parse '.' ", K(ret), K(index_)); + } + } else if (OB_FAIL(parse_non_abbrevited_location_node(is_absolute))) { + LOG_WARN("fail to parse absolute_location ", K(ret), K(index_)); + } + return ret; +} + +int ObPathParser::parse_non_abbrevited_location_node(bool is_absolute) +{ + INIT_SUCC(ret); + ObPathLocationNode* location_node = nullptr; + if (OB_FAIL(alloc_location_node(location_node))) { + LOG_WARN("fail to alloc location node", K(ret), K(index_), K(expression_)); + } else { + location_node = new (location_node) ObPathLocationNode(ctx_, parser_type_); + location_node->is_absolute_ = is_absolute; + if (OB_FAIL(location_node->init(ObLocationType::PN_KEY))) { + LOG_WARN("fail to init ellipsis_node", K(ret), K(index_), K(expression_)); + } else if (OB_FAIL(parse_axis_info(location_node))) { + LOG_WARN("fail to parse_axis_info ", K(ret), K(index_)); + } else if (OB_FAIL(parse_nodetest_info(location_node))) { + LOG_WARN("fail to parse_axis_info ", K(ret), K(index_)); + } else if (index_ < len_ && expression_[index_] == ObPathItem::BEGIN_ARRAY) { + while (OB_SUCC(ret) && index_ < len_ && expression_[index_] == ObPathItem::BEGIN_ARRAY) { + ObPathNode* filter = nullptr; + if (OB_FAIL(parse_filter_node(filter, ObPathArgType::IN_FILTER))) { + LOG_WARN("fail to parse filter", K(ret), K(index_)); + } else if (OB_FAIL(location_node->append(filter))) { + LOG_WARN("fail to append filter", K(ret), K(index_)); + } + ObXPathUtil::skip_whitespace(expression_, index_); + } + if (OB_SUCC(ret)) { + location_node->has_filter_ = true; + ObPathRootNode* root_node = static_cast(root_node_); + root_node->need_trans_++; + } + } + // if successed, add to root + if (OB_FAIL(ret)) { + } else if (OB_FAIL(root_node_->append(location_node))) { + LOG_WARN("fail to append location node", K(ret), K(index_), K(expression_)); + } + } + return ret; +} + +int ObPathParser::parse_func_type(ObFuncType& func_type) +{ + INIT_SUCC(ret); + // now there are only three function: position, last, count + // todo: add more func + func_type = ObFuncType::PN_FUNC_ERROR; + if (index_ + 3 > len_) { + ret = OB_ARRAY_OUT_OF_RANGE; + LOG_WARN("index out of range!", K(ret), K(index_), K(expression_)); + } else { + switch(expression_[index_]) { + case 'b': { + if (path_prefix_match("boolean")) { + func_type = ObFuncType::PN_BOOLEAN_FUNC; + } + break; + } + case 'c': { + if (path_prefix_match("contains")) { + func_type = ObFuncType::PN_CONTAINS; + } else if (path_prefix_match("concat")) { + func_type = ObFuncType::PN_CONCAT; + } else if (path_prefix_match("count")) { + func_type = ObFuncType::PN_COUNT; + } + break; + } + case 'f': { + if (path_prefix_match("false")) { + func_type = ObFuncType::PN_FALSE; + } else if (path_prefix_match("floor")) { + func_type = ObFuncType::PN_FLOOR; + } + break; + } + case 'l': { + if (path_prefix_match("local-name")) { + func_type = ObFuncType::PN_LOCAL_NAME; + } else if (path_prefix_match("last")) { + func_type = ObFuncType::PN_LAST; + } else if (path_prefix_match("lang")) { + func_type = ObFuncType::PN_LANG; + } + break; + } + case 'n': { + if (path_prefix_match("normalize-space")) { + func_type = ObFuncType::PN_NORMALIZE_SPACE; + } else if (path_prefix_match("namespace-uri")) { + func_type = ObFuncType::PN_NS_URI; + } else if (path_prefix_match("number")) { + func_type = ObFuncType::PN_NUMBER_FUNC; + } else if (path_prefix_match("name")) { + func_type = ObFuncType::PN_NAME; + } else if (path_prefix_match("not")) { + func_type = ObFuncType::PN_NOT_FUNC; + } + break; + } + case 'p': { + if (path_prefix_match("position")) { + func_type = ObFuncType::PN_POSITION; + } + break; + } + case 'r': { + if (path_prefix_match("round")) { + func_type = ObFuncType::PN_ROUND; + } + break; + } + case 's': { + if (path_prefix_match("string-length")) { + func_type = ObFuncType::PN_LENGTH; + } else if (path_prefix_match("substring")) { + func_type = ObFuncType::PN_SUBSTRING_FUNC; + } else if (path_prefix_match("string")) { + func_type = ObFuncType::PN_STRING_FUNC; + } else if (path_prefix_match("sum")) { + func_type = ObFuncType::PN_SUM; + } + break; + } + case 't': { + if (path_prefix_match("true")) { + func_type = ObFuncType::PN_TRUE; + } + break; + } + default: { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("wrong func name!", K(ret), K(index_), K(expression_)); + break; + } + } + if (func_type != ObFuncType::PN_FUNC_ERROR) { + index_ += func_name_len[func_type - ObFuncType::PN_ABS]; + ObXPathUtil::skip_whitespace(expression_, index_); + if (OB_FAIL(ret)) { + } else if (index_ >= len_ || expression_[index_] != ObPathItem::BRACE_START) { + ret = OB_ARRAY_OUT_OF_RANGE; + LOG_WARN("index out of range or not brace after function!", K(ret)); + } else { + ++index_; + ObXPathUtil::skip_whitespace(expression_, index_); + } + } + } + return ret; +} + +int ObPathParser::parse_func_arg(ObPathFuncNode*& func_node, ObPathArgType patharg_type) +{ + INIT_SUCC(ret); + if (OB_ISNULL(func_node)) { + ret = OB_BAD_NULL_ERROR; + LOG_WARN("should not be null", K(ret)); + } else if (func_node->get_max_arg_num() > 0) { + while (OB_SUCC(ret) && index_ < len_ && expression_[index_] != ObPathItem::BRACE_END) { + ObPathNode* arg = nullptr; + if (OB_FAIL(parse_arg(arg, patharg_type, false, false))) { + LOG_WARN("fail to get arg", K(ret), K(index_), K(expression_)); + } else if (OB_FAIL(func_node->append(arg))) { + LOG_WARN("fail to append arg", K(ret), K(index_), K(expression_)); + } else { + ObXPathUtil::skip_whitespace(expression_, index_); + if (index_ < len_ && expression_[index_] == ',') { + ++index_; + } + } + ObXPathUtil::skip_whitespace(expression_, index_); + } // end while + } + + // must end with ')' + if (OB_FAIL(ret)) { + } else if (index_ < len_ && expression_[index_] == ObPathItem::BRACE_END) { + ++index_; + } else { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("must end with brace!", K(ret), K(index_), K(expression_)); + } + + // check arg_num + if (OB_FAIL(ret)) { + } else if (OB_FAIL(func_node->check_is_legal_arg())) { + LOG_WARN("Function call with invalid number of arguments", K(ret)); + } + return ret; +} + +int ObPathParser::parse_func_node(ObPathArgType patharg_type) +{ + INIT_SUCC(ret); + ObFuncType func_type = ObFuncType::PN_FUNC_ERROR; + if (OB_FAIL(parse_func_type(func_type))) { + } else if (func_type != ObFuncType::PN_FUNC_ERROR) { + ObPathFuncNode* func_node = nullptr; + if (OB_FAIL(alloc_func_node(func_node))) { + LOG_WARN("allocate row buffer failed at path_node", K(ret), K(index_), K(expression_)); + } else { + func_node = new (func_node) ObPathFuncNode(ctx_, parser_type_); + if (OB_FAIL(func_node->init(func_type))) { + LOG_WARN("fail to init ellipsis_node", K(ret), K(index_), K(expression_)); + } else if (patharg_type == ObPathArgType::NOT_SUBPATH + && ObPathParserUtil::is_func_must_in_pred(func_type)) { + //ORA-31012: Given XPATH expression not supported + ret = OB_OP_NOT_ALLOW; + LOG_WARN("Given XPATH expression not supported", K(ret), K(index_), K(expression_)); + } + if (OB_SUCC(ret) && patharg_type == ObPathArgType::NOT_SUBPATH) { + patharg_type = ObPathArgType::IN_FUNCTION; + } + if (OB_FAIL(ret)) { + } else if (OB_FAIL(parse_func_arg(func_node, patharg_type))) { + LOG_WARN("fail to parse function arg", K(ret), K(index_), K(expression_)); + } else if (OB_FAIL(func_node->checek_cache_and_abs())) { + LOG_WARN("fail to init function bool", K(ret)); + } else { + root_node_ = func_node; + } + } + } else { + ret = OB_ARRAY_OUT_OF_RANGE; + LOG_WARN("index out of range!", K(ret), K(index_), K(expression_)); + } + return ret; +} + +int ObPathParser::parse_primary_expr_node(ObPathArgType patharg_type) +{ + INIT_SUCC(ret); + ObXPathUtil::skip_whitespace(expression_, index_); + // 记得index + n + bool is_cmp = false; + if (index_ < len_) { + if (expression_[index_] == ObPathItem::DOLLAR) { + ret = OB_ERR_WRONG_VALUE_FOR_VAR; + LOG_WARN("Invalid reference.", K(ret), K(index_)); + } else { + if (OB_FAIL(parse_location_node(false))) { + LOG_WARN("fail to parse function.", K(ret), K(index_)); + } // is function + } + } // all space, not error + return ret; +} + +int ObPathParser::get_xpath_ident(char*& str, uint64_t& length, bool& is_func) +{ + INIT_SUCC(ret); + uint64_t start = 0, end = 0, str_len = 0; + + if (OB_ISNULL(allocator_)) { + ret = OB_BAD_NULL_ERROR; + LOG_WARN("should not be null", K(ret)); + } else if (index_ < len_) { + start = index_; + while (OB_SUCC(ret) && index_ < len_ && !ObPathParserUtil::is_xpath_ident_terminator(expression_[index_])) { + ++index_; + } + if (OB_FAIL(ret)) { + } else { + end = index_ - 1; + ObXPathUtil::skip_whitespace(expression_, index_); + if (index_ < len_ && expression_[index_] == ObPathItem::BRACE_START) { + // fun_name + () + is_func = true; + ++index_; + } + } + } else { + ret = OB_ARRAY_OUT_OF_RANGE; + LOG_WARN("index out of range!", K(ret), K(index_), K(expression_)); + } + + if (OB_SUCC(ret)) { + if (end < start) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("get keyname: end (allocator_->alloc(length)); + if (OB_ISNULL(str)) { + ret = (length > 0)? OB_ALLOCATE_MEMORY_FAILED : OB_ERR_NULL_VALUE; + LOG_WARN("fail to allocate memory for member_name.",K(ret), K(str_len),K(start_ptr)); + } else { + MEMCPY(str, start_ptr, str_len); + if (is_func) str[str_len] = ObPathItem::BRACE_START; + } + } + } + + return ret; +} + +int ObPathParser::get_xpath_literal(char*& str, uint64_t& length) +{ + INIT_SUCC(ret); + bool is_double_quoted = true; + uint64_t start = 0, end = 0, str_len = 0; + if (index_ < len_ && is_literal_begin()) { + if (expression_[index_] == ObPathItem::SINGLE_QUOTE) is_double_quoted = false; + ++index_; + start = index_; + while (OB_SUCC(ret) && index_ < len_ && end == 0 ) { + if ((expression_[index_] == ObPathItem::DOUBLE_QUOTE && is_double_quoted) + || (expression_[index_] == ObPathItem::SINGLE_QUOTE && !is_double_quoted)) { + end = index_ - 1; + ++index_; + } else { + ++index_; + } + } + if (OB_FAIL(ret)) { + } else if (end == 0 && index_ == len_) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("should end with DOUBLE_QUOTE/SINGLE_QUOTE!", K(ret), K(index_), K(expression_)); + } + } else { + ret = OB_ARRAY_OUT_OF_RANGE; + LOG_WARN("index out of range!", K(ret), K(index_), K(expression_)); + } + + if (OB_SUCC(ret)) { + if (end < start) { + // could be "" + if (expression_[end] == expression_[start] + && ((expression_[end] == ObPathItem::DOUBLE_QUOTE && is_double_quoted) + || (expression_[end] == ObPathItem::SINGLE_QUOTE && !is_double_quoted))) { + str = nullptr; + length = 0; + } else { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("get keyname: end (allocator_->alloc(str_len)); + if (OB_ISNULL(str)) { + ret = (str_len > 0)? OB_ALLOCATE_MEMORY_FAILED : OB_ERR_NULL_VALUE; + LOG_WARN("fail to allocate memory for member_name.",K(ret), K(str_len),K(start_ptr)); + } else { + MEMCPY(str, start_ptr, str_len); + length = str_len; + } + } + } + return ret; +} + +int ObPathParser::get_xpath_number(double& num) +{ + INIT_SUCC(ret); + char* num_ptr = expression_.ptr() + index_; + uint64_t num_len = 0; + bool in_loop = true; + + while (in_loop && OB_SUCC(ret) && index_ < len_) { + if (expression_[index_] == ObPathItem::DOT + || isdigit(expression_[index_])) { + ++index_; + ++num_len; + } else { + in_loop = false; + } + } + if (OB_SUCC(ret)) { + double ret_val = 0.0; + char *endptr = NULL; + int err = 0; + ret_val = ObCharset::strntod(num_ptr, num_len, &endptr, &err); + if (err == 0) { + num = ret_val; + } else { + ret = OB_INVALID_DATA; + LOG_WARN("invalid double value", K(num_ptr), K(num_len), K(ret)); + } + } + + return ret; +} + +int ObPathParser::get_subpath_str(bool is_filter, ObString& subpath) +{ + INIT_SUCC(ret); + char* subpath_ptr = expression_.ptr() + index_; + uint64_t subpath_start = index_; + uint64_t subpath_len = 0; + bool in_loop = true; + + while (in_loop && OB_SUCC(ret) && index_ < len_) { + ObXPathUtil::skip_whitespace(expression_, index_); + if (index_ >= len_) { + in_loop = false; + } else if (is_literal_begin()) { + if (OB_FAIL(jump_over_quote())) { + LOG_WARN("invalid quoted value", K(ret)); + } + } else if (expression_[index_] == ObPathItem::BEGIN_ARRAY) { + if (OB_FAIL(jump_over_brace(false))) { + LOG_WARN("invalid value", K(ret)); + } + } else if (expression_[index_] == ObPathItem::BRACE_START) { + if (OB_FAIL(jump_over_brace(true))) { + LOG_WARN("invalid value", K(ret)); + } + } else { + ObXPathUtil::skip_whitespace(expression_, index_); + if (OB_FAIL(ret) || index_ > len_) { + } else if (!is_filter && (expression_[index_] == ',' || expression_[index_] == ')')) { + in_loop = false; + } else if (is_filter && (expression_[index_] == ObPathItem::MINUS)) { + if (index_ - 1 > 0 && ObPathParserUtil::is_xml_name_start_char(expression_[index_ - 1]) + && index_ + 1 < len_ && ObPathParserUtil::is_xml_name_start_char(expression_[index_ + 1])) { + ++index_; + } else { + in_loop = false; + } + } else if (is_filter && expression_[index_] == ObPathItem::WILDCARD) { + if (is_last_letter_location(index_) || is_negtive()) { + ++index_; + } else { + in_loop = false; + } + } else if (is_filter && (ObPathParserUtil::is_operator(expression_[index_]) + || expression_[index_] == ObPathItem::BRACE_END + || expression_[index_] == ObPathItem::END_ARRAY)) { + in_loop = false; + } else if (is_prefix_match_letter_operator()) { + in_loop = false; + } else { + subpath_len = index_ - subpath_start; + ++index_; + } + } + } + + if (OB_FAIL(ret)) { + } else { + subpath_len = (index_ < len_) ? (index_ - subpath_start) : (len_ - subpath_start); + subpath = ObString(subpath_len, subpath_ptr); + } + return ret; +} + +int ObPathParser::parse_subpath(ObString& subpath, ObPathNode*& node, bool is_filter, ObPathArgType patharg_type) +{ + INIT_SUCC(ret); + if (OB_ISNULL(allocator_)) { + ret = OB_BAD_NULL_ERROR; + LOG_WARN("should not be null", K(ret)); + } else { + ObPathParser* subpath_parser = + static_cast (allocator_->alloc(sizeof(ObPathParser))); + if (OB_ISNULL(subpath_parser)) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("allocate row buffer failed at location_node", K(ret), K(index_), K(expression_)); + } else { + subpath_parser = new (subpath_parser) ObPathParser(ctx_, parser_type_, subpath, default_ns_, pass_var_); + if (is_filter) { + if (subpath_parser->is_function_path()) { + if (OB_FAIL(subpath_parser->parse_func_node(patharg_type))) { + bad_index_ = subpath_parser->bad_index_; + LOG_WARN("fail to parse function.", K(ret), K(index_)); + } // is function + } else if (OB_FAIL(subpath_parser->parse_location_path(patharg_type))) { + bad_index_ = subpath_parser->bad_index_; + LOG_WARN("fail to parse", K(ret)); + } + } else if (OB_FAIL(subpath_parser->parse_path(IN_FUNCTION))) { + bad_index_ = subpath_parser->bad_index_; + LOG_WARN("fail to parse", K(ret)); + } + if (OB_FAIL(ret)) { + } else if (OB_NOT_NULL(subpath_parser->get_root())) { + node = subpath_parser->get_root(); + } + } + } + return ret; +} + +int ObPathParser::get_xpath_subpath(ObPathNode*& node, bool is_filter, ObPathArgType patharg_type) +{ + INIT_SUCC(ret); + ObString subpath; + if (OB_FAIL(get_subpath_str(is_filter, subpath))) { + LOG_WARN("fail to get subpath str", K(ret)); + } else if (OB_FAIL(parse_subpath(subpath, node, is_filter, patharg_type))) { + if (patharg_type != NOT_SUBPATH) ret = OB_ERR_PARSER_SYNTAX; // subpath parsing failed + LOG_WARN("fail to parse subpath", K(ret)); + } + return ret; +} + +int ObPathParser::parse_namespace_info(ObPathLocationNode*& location, ObString& ns_str) +{ + INIT_SUCC(ret); + if (OB_NOT_NULL(ns_str.ptr())) { + location->set_prefix_ns_info(true); + location->set_default_prefix_ns(false); + if (0 == ns_str.compare("xmlns") || 0 == ns_str.compare("xml")) { + if (OB_NOT_NULL(pass_var_) || OB_NOT_NULL(default_ns_.ptr())) { + location->set_default_prefix_ns(true); + location->set_check_ns_info(true); + } + } else if (OB_ISNULL(pass_var_)) { + // ORA-31013: Invalid XPATH expression + // no passing var, prefix ns is not allowed + ret = OB_INVALID_ARGUMENT; + LOG_WARN("Invalid XPATH expression.", K(ret), K(index_), K(expression_)); + } else { + ObDatum* pass_data = pass_var_->get_value(ns_str); + if (OB_ISNULL(pass_data) || pass_data->is_null()) { + // no passing var, prefix ns is not allowed + ret = OB_INVALID_ARGUMENT; + LOG_WARN("Invalid XPATH expression.", K(ret), K(index_), K(expression_)); + } else { + ObString ns_str; + if (OB_FAIL(ob_write_string(*allocator_, pass_data->get_string(), ns_str))) { + LOG_WARN("fail to save ns str", K(ret)); + } else { + location->set_ns_info(ns_str.ptr(), ns_str.length()); + location->set_check_ns_info(true); + } + } + } + } else { + // if no prefix namespace, use default namespace + location->set_prefix_ns_info(false); + location->set_ns_info(default_ns_.ptr(), default_ns_.length()); + } + return ret; +} + +int ObPathParser::parse_nodetest_info(ObPathLocationNode*& location) +{ + INIT_SUCC(ret); + ObXPathUtil::skip_whitespace(expression_, index_); + if (index_ >= len_) { + ret = OB_ARRAY_OUT_OF_RANGE; + LOG_WARN("wrong path expression", K(ret), K(index_)); + } else { + // if have '(', must be nodetest + // xmlns:tag, couldn't have space before ':' + // so, split on ':' + char* name = nullptr; + uint64_t name_len = 0; + bool is_nodetest = false; + if (OB_FAIL(get_xpath_ident(name, name_len, is_nodetest))) { + LOG_WARN("wrong path expression", K(ret), K(index_)); + } else { + ObString name_ident(name_len, name); + ObString ns_str = name_ident.split_on(ObPathItem::COLON); + name_len = name_ident.length(); + if (name_ident.length() < 1) { + ret = OB_INVALID_ARGUMENT; // could be tagname or nodetest or wildcard, must >= 1 + LOG_WARN("wrong name ident", K(ret), K(ns_str)); + } else if (OB_FAIL(parse_namespace_info(location, ns_str))) { + LOG_WARN("fail to set ns info", K(ret), K(index_)); + } else if (is_nodetest) { // node(), text(), pi(), comment() + // get_nodetest + ObSeekType nodetest = ObSeekType::ERROR_SEEK; + char* arg = nullptr; + uint64_t arg_len = 0; + if (OB_FAIL(check_nodetest(name_ident, nodetest, arg, arg_len))) { + LOG_WARN("fail to get nodetest", K(ret), K(name_ident)); + } else if (nodetest != ObSeekType::ERROR_SEEK) { + location->set_nodetest_by_name(nodetest, arg, arg_len); + } else { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("wrong nodetest arg", K(ret), K(name_ident)); + } + if (OB_FAIL(ret)) { + } else if (ns_str.length() > 0) { + allocator_->free(location->get_ns_name().ptr()); + if (location->get_seek_type() != ObSeekType::TEXT) { + ret = OB_ERR_PARSER_SYNTAX; // ORA-31011: XML parsing failed + LOG_WARN("Function call with invalid arguments", K(ret)); + } else if (ObPathUtil::is_upper_axis(location->get_axis())) { + ret = OB_ERR_PARSER_SYNTAX; // ORA-31011: XML parsing failed + LOG_WARN("Function call with invalid arguments", K(ret)); + } else { + location->set_prefix_ns_info(true); + } + } + + // oracle adaptation: + if (OB_SUCC(ret) && location->get_axis() == ATTRIBUTE + && location->get_seek_type() == ObSeekType::TEXT) { + location->set_axis(CHILD); + } + } else if (name_len == 1 && name_ident[0] == ObPathItem::WILDCARD) { + location->set_wildcard_info(true); + location->set_nodetest_by_axis(); + } else if (!(ObPathParserUtil::check_is_legal_tagname(name_ident.ptr(), name_ident.length()))) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("wrong element name", K(ret), K(name_ident)); + } else { + // legal tagname: could be namespace, attribute or element + location->set_key_info(name_ident.ptr(), name_ident.length()); + // must with tag_name and no wildcard + location->set_wildcard_info(false); + // if axis is attribute or ns, set seek type , tagname is not for element + location->set_nodetest_by_axis(); + } + if (OB_FAIL(ret)) { + } else if (OB_FAIL(location->set_check_ns_by_nodetest(allocator_, default_ns_))) { + LOG_WARN("wrong type for ns", K(ret), K(name_ident)); + } + } + } + return ret; +} + +int ObPathParser::parse_axis_info(ObPathLocationNode*& location) +{ + INIT_SUCC(ret); + ObXPathUtil::skip_whitespace(expression_, index_); + if (OB_ISNULL(location)) { + ret = OB_BAD_NULL_ERROR; + LOG_WARN("should not be null", K(ret)); + } else if (index_ >= len_) { + ret = OB_ARRAY_OUT_OF_RANGE; + LOG_WARN("wrong path expression", K(ret), K(index_)); + } else { + bool default_axis = true; + switch (expression_[index_]) { + case '@': { + location->set_axis(ObPathNodeAxis::ATTRIBUTE); + ++index_; + default_axis = false; + location->set_wildcard_info(true); + break; + } + case 'a': { + if (path_prefix_match(ObPathItem::ANCESTOR_OR_SELF)) { + location->set_axis(ObPathNodeAxis::ANCESTOR_OR_SELF); + index_ += strlen(ObPathItem::ANCESTOR_OR_SELF); + default_axis = false; + } else if (path_prefix_match(ObPathItem::ANCESTOR)) { + location->set_axis(ObPathNodeAxis::ANCESTOR); + index_ += strlen(ObPathItem::ANCESTOR); + default_axis = false; + } else if (path_prefix_match(ObPathItem::ATTRIBUTE)) { + location->set_axis(ObPathNodeAxis::ATTRIBUTE); + index_ += strlen(ObPathItem::ATTRIBUTE); + default_axis = false; + } + break; + } + case 'c': { + if (path_prefix_match(ObPathItem::CHILD)) { + location->set_axis(ObPathNodeAxis::CHILD); + index_ += strlen(ObPathItem::CHILD); + default_axis = false; + } + break; + } + case 'd': { + if (path_prefix_match(ObPathItem::DESCENDANT_OR_SELF)) { + location->set_axis(ObPathNodeAxis::DESCENDANT_OR_SELF); + index_ += strlen(ObPathItem::DESCENDANT_OR_SELF); + default_axis = false; + } else if (path_prefix_match(ObPathItem::DESCENDANT)) { + location->set_axis(ObPathNodeAxis::DESCENDANT); + index_ += strlen(ObPathItem::DESCENDANT); + default_axis = false; + } + break; + } + case 'f': { + if (path_prefix_match(ObPathItem::FOLLOWING_SIBLING)) { + location->set_axis(ObPathNodeAxis::FOLLOWING_SIBLING); + index_ += strlen(ObPathItem::FOLLOWING_SIBLING); + default_axis = false; + } else if (path_prefix_match(ObPathItem::FOLLOWING)) { + location->set_axis(ObPathNodeAxis::FOLLOWING); + index_ += strlen(ObPathItem::FOLLOWING); + default_axis = false; + } + break; + } + case 'n': { + if (path_prefix_match(ObPathItem::NAMESPACE)) { + location->set_axis(ObPathNodeAxis::NAMESPACE); + index_ += strlen(ObPathItem::NAMESPACE); + location->set_wildcard_info(true); + default_axis = false; + } + break; + } + case 'p': { + if (path_prefix_match(ObPathItem::PRECEDING_SIBLING)) { + location->set_axis(ObPathNodeAxis::PRECEDING_SIBLING); + index_ += strlen(ObPathItem::PRECEDING_SIBLING); + default_axis = false; + } else if (path_prefix_match(ObPathItem::PRECEDING)) { + location->set_axis(ObPathNodeAxis::PRECEDING); + index_ += strlen(ObPathItem::PRECEDING); + default_axis = false; + } else if (path_prefix_match(ObPathItem::PARENT)) { + location->set_axis(ObPathNodeAxis::PARENT); + index_ += strlen(ObPathItem::PARENT); + default_axis = false; + } + break; + } + case 's': { + if (path_prefix_match(ObPathItem::SELF)) { + location->set_axis(ObPathNodeAxis::SELF); + index_ += strlen(ObPathItem::SELF); + default_axis = false; + } + break; + } + default: { + break; + } + } // end switch + // if didn't match any axis, use child axis + if (default_axis) { + location->set_axis(ObPathNodeAxis::CHILD); + } + } + return ret; +} + +int ObPathParser::parse_double_slash_node() +{ + INIT_SUCC(ret); + uint64_t old_index = index_; + ++index_; + ObXPathUtil::skip_whitespace(expression_, index_); + ObPathLocationNode* ellipsis_node = nullptr; + if (index_ >= len_ || expression_[index_] == ObPathItem::SLASH) { + ret = OB_ERR_PARSER_SYNTAX; // ORA-31011: XML parsing failed + LOG_WARN("Function call with invalid arguments", K(ret)); + } else if (OB_FAIL(alloc_location_node(ellipsis_node))) { + LOG_WARN("allocate row buffer failed at path_node", K(ret), K(index_), K(expression_)); + } else { + ellipsis_node = new (ellipsis_node) ObPathLocationNode(ctx_, parser_type_); + if (OB_FAIL(ellipsis_node->init(ObLocationType::PN_ELLIPSIS))) { + LOG_WARN("fail to init ellipsis_node", K(ret), K(index_), K(expression_)); + } else { + ObXPathUtil::skip_whitespace(expression_, index_); + ellipsis_node->is_absolute_ = true; + bool is_abb = false; + if (index_ >= len_) { + ret = OB_ARRAY_OUT_OF_RANGE; + LOG_WARN("wrong path expression", K(ret), K(index_)); + } else if (path_prefix_match(ObPathItem::DOUBLE_DOT)) { + index_ += strlen(ObPathItem::DOUBLE_DOT); + ellipsis_node->set_axis(ObPathNodeAxis::PARENT); + ellipsis_node->set_nodetest(ObSeekType::NODES); + ellipsis_node->set_wildcard_info(true); + } else if (expression_[index_] == ObPathItem::DOT) { + is_abb = true; + ++index_; + ellipsis_node->set_axis(ObPathNodeAxis::SELF); + ellipsis_node->set_nodetest(ObSeekType::NODES); + ellipsis_node->set_wildcard_info(true); + } else if (OB_FAIL(parse_axis_info(ellipsis_node))) { + LOG_WARN("fail to parse_axis_info ", K(ret), K(index_)); + } + + // Compatible with Oracle: + // if is down_axis or self_axis, combine "//" node and the filter after it + // for example: "//a" equal to "/descendant-or-self::a" + // "//parent::a" equal to "/descendant-or-self::node/parent::a" + if (OB_FAIL(ret)) { + } else if (is_abb || ObPathUtil::is_down_axis(ellipsis_node->get_axis())) { + ObXPathUtil::skip_whitespace(expression_, index_); + uint64_t idx_before_nodetest = index_; + if ((index_ < len_ && expression_[index_] != ObPathItem::SLASH) + && OB_FAIL(parse_nodetest_info(ellipsis_node))) { + LOG_WARN("fail to parse_axis_info ", K(ret), K(index_)); + } else if (index_ < len_ && expression_[index_] == ObPathItem::BEGIN_ARRAY) { + if (!is_abb) { + while (OB_SUCC(ret) && index_ < len_ && expression_[index_] == ObPathItem::BEGIN_ARRAY) { + ObPathNode* filter = nullptr; + if (OB_FAIL(parse_filter_node(filter, ObPathArgType::IN_FILTER))) { + LOG_WARN("fail to parse filter", K(ret), K(index_)); + } else if (OB_FAIL(ellipsis_node->append(filter))) { + LOG_WARN("fail to append filter", K(ret), K(index_)); + } + ObXPathUtil::skip_whitespace(expression_, index_); + } + if (OB_SUCC(ret)) { + ellipsis_node->has_filter_ = true; + ObPathRootNode* root_node = static_cast(root_node_); + root_node->need_trans_++; + } + } else { + ret = OB_ERR_PARSER_SYNTAX; // ORA-31011: XML parsing failed + LOG_WARN("Function call with invalid arguments", K(ret)); + } + } + if (is_abb && (idx_before_nodetest < index_) && ellipsis_node->get_wildcard_info()) { + ObSeekType seek_type = ellipsis_node->get_seek_type(); + if (seek_type == ObSeekType::TEXT) { // legal, do nothing + } else if (seek_type == ObSeekType::ELEMENT) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid path expression", K(ret), K(index_), K(expression_)); + } else { + ret = OB_ERR_PARSER_SYNTAX; // ORA-31011: XML parsing failed + LOG_WARN("Function call with invalid arguments", K(ret)); + } + } + } else if (OB_FAIL(ellipsis_node->init(ObLocationType::PN_KEY, + ObSeekType::NODES, + ObPathNodeAxis::DESCENDANT_OR_SELF))) { + LOG_WARN("fail to init ellipsis_node", K(ret), K(index_), K(expression_)); + } else { + ellipsis_node->set_wildcard_info(true); + ellipsis_node->is_absolute_ = true; + index_ = old_index; + } + } + // if successed, add to root + if (OB_FAIL(ret)) { + } else if (OB_FAIL(root_node_->append(ellipsis_node))) { + LOG_WARN("fail to append location node", K(ret), K(index_), K(expression_)); + } + } + return ret; +} + +int ObPathParser::parse_double_dot_node(bool is_absolute) +{ + INIT_SUCC(ret); + ObPathLocationNode* parent_node = nullptr; + if (OB_FAIL(alloc_location_node(parent_node))) { + LOG_WARN("allocate row buffer failed at path_node", K(ret), K(index_), K(expression_)); + } else { + // skip .. and space + index_ += strlen(ObPathItem::DOUBLE_DOT); + ObXPathUtil::skip_whitespace(expression_, index_); + parent_node = new (parent_node) ObPathLocationNode(ctx_, parser_type_); + parent_node->is_absolute_ = is_absolute; + if (OB_FAIL(parent_node->init(ObLocationType::PN_KEY, + ObSeekType::NODES, + ObPathNodeAxis::PARENT))) { + LOG_WARN("fail to init parent_node", K(ret), K(index_), K(expression_)); + } else { + parent_node->set_wildcard_info(true); + if (OB_FAIL(root_node_->append(parent_node))) { + LOG_WARN("failed to append location node.", K(ret), K(index_), K(expression_)); + } else if (index_ < len_ && expression_[index_] != ObPathItem::SLASH) { + // '/..' must be followed by a new step, if not end + ret = OB_ERR_PARSER_SYNTAX; // ORA-31011: XML parsing failed + LOG_WARN("Function call with invalid arguments", K(ret)); + } + } + } + return ret; +} + +int ObPathParser::parse_single_dot_node(bool is_absolute) +{ + INIT_SUCC(ret); + ObPathLocationNode* self_node = nullptr; + if (OB_FAIL(alloc_location_node(self_node))) { + LOG_WARN("allocate row buffer failed at path_node", K(ret), K(index_), K(expression_)); + } else { + // skip . and space + ++index_; + ObXPathUtil::skip_whitespace(expression_, index_); + self_node = new (self_node) ObPathLocationNode(ctx_, parser_type_); + self_node->is_absolute_ = is_absolute; + if (OB_FAIL(self_node->init(ObLocationType::PN_KEY, + ObSeekType::NODES, + ObPathNodeAxis::SELF))) { + LOG_WARN("fail to init self_node", K(ret), K(index_), K(expression_)); + } else { + self_node->set_wildcard_info(true); + if (OB_FAIL(root_node_->append(self_node))) { + LOG_WARN("failed to append location node.", K(ret), K(index_), K(expression_)); + } else if (index_ < len_ && expression_[index_] == ObPathItem::BEGIN_ARRAY) { + while (OB_SUCC(ret) && index_ < len_ && expression_[index_] == ObPathItem::BEGIN_ARRAY) { + ObPathNode* filter = nullptr; + if (OB_FAIL(parse_filter_node(filter, ObPathArgType::IN_FILTER))) { + LOG_WARN("fail to parse filter", K(ret), K(index_)); + } else if (OB_FAIL(self_node->append(filter))) { + LOG_WARN("fail to append filter", K(ret), K(index_)); + } + ObXPathUtil::skip_whitespace(expression_, index_); + } + if (OB_SUCC(ret)) { + self_node->has_filter_ = true; + ObPathRootNode* root_node = static_cast(root_node_); + root_node->need_trans_++; + } + } else if (index_ < len_ && expression_[index_] == ObPathItem::WILDCARD) { + // ORA-31012: Given XPATH expression not supported + ret = OB_OP_NOT_ALLOW; + LOG_WARN("/.* is not allowed", K(ret), K(index_), K(expression_)); + } else if (index_ < len_ && ObPathParserUtil::is_xml_name_start_char(expression_[index_])) { + if (OB_FAIL(parse_nodetest_info(self_node))) { + LOG_WARN("wrong node test", K(ret), K(index_), K(expression_)); + } else if (self_node->get_seek_type() != ObSeekType::ELEMENT + || (self_node->get_seek_type() == ObSeekType::ELEMENT && self_node->is_key_null())) { + self_node->set_nodetest(ObSeekType::NODES); + self_node->set_wildcard_info(true); + } + } else if (index_ < len_ && expression_[index_] == ObPathItem::WILDCARD) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid path expression", K(ret), K(index_), K(expression_)); + } + } + } + return ret; +} + +bool ObPathParser::is_last_letter_location(int last_idx) +{ + bool ret_bool = false; + if (last_idx >= 0 && !ObXPathUtil::is_whitespace(expression_[last_idx])) { + ret_bool = true; + } else { + while (last_idx >= 0 && ObXPathUtil::is_whitespace(expression_[last_idx])) --last_idx; + if (last_idx >= 0 && (expression_[last_idx] == ObPathItem::SLASH + || expression_[last_idx] == ObPathItem::AT + || expression_[last_idx] == ObPathItem::COLON + || expression_[last_idx] == ObPathItem::UNDERLINE)) { + ret_bool = true; + } + } + return ret_bool; +} +bool ObPathParser::is_last_letter_operator(const int& last_idx) +{ + bool ret_bool = false; + switch (expression_[last_idx]) { + case 'r': { + // maybe 'or' + if (last_idx - 1 > 0 && expression_[last_idx - 1] == 'o') { + ret_bool = true; + } + break; + } + case 'd': { + // maybe 'and' or 'mod' + if (last_idx - 2 > 0) { + ObString op(3, expression_.ptr() + last_idx - 2); + if (op == ObPathItem::AND || op == ObPathItem::MOD) { + ret_bool = true; + } + } + break; + } + case 'v': { + // maybe 'div' + if (last_idx - 2 > 0) { + ObString op(3, expression_.ptr() + last_idx - 2); + if (op == ObPathItem::DIV) { + ret_bool = true; + } + } + break; + } + default: { + break; + } + } + return ret_bool; +} + +// if is operator before -, like +- 1, then '-' is negtive, else is minus +bool ObPathParser::is_negtive() +{ + bool ret_bool = false; + if (expression_[index_] == ObPathItem::MINUS || expression_[index_] == ObPathItem::WILDCARD) { + int last_idx = index_ - 1; + while (last_idx >= 0 && ObXPathUtil::is_whitespace(expression_[last_idx])) --last_idx; + if (last_idx < 0 ) { + ret_bool = true; + } else if (ObPathParserUtil::is_left_brace(expression_[last_idx]) + || ObPathParserUtil::is_operator(expression_[last_idx]) + || is_last_letter_operator(last_idx)) { + ret_bool = true; + } + } + return ret_bool; +} + +bool ObPathParser::is_number_begin() +{ + bool ret_bool = false; + if (isdigit(expression_[index_]) // number + || (expression_[index_] == ObPathItem::DOT && index_ + 1 < len_ && isdigit(expression_[index_ + 1]))) { // decimal + ret_bool = true; + } + return ret_bool; +} + +bool ObPathParser::is_literal_begin() +{ + bool ret_bool = false; + if (expression_[index_] == ObPathItem::DOUBLE_QUOTE + || expression_[index_] == ObPathItem::SINGLE_QUOTE) { + ret_bool = true; + } + return ret_bool; +} + +int ObPathParser::check_cmp(bool &is_cmp) +{ + INIT_SUCC(ret); + is_cmp = false; + uint64_t old_idx = index_; + while (!is_cmp && index_ < len_ && OB_SUCC(ret)) { + if (expression_[index_] == ObPathItem::BEGIN_ARRAY) { + if (OB_FAIL(jump_over_brace(false))) { + LOG_WARN("failed in brace check!", K(ret)); + } + } else if (expression_[index_] == ObPathItem::BRACE_START) { + if (OB_FAIL(jump_over_brace(true))) { + LOG_WARN("failed in brace check!", K(ret)); + } + } else if (is_literal_begin()) { + if (OB_FAIL(jump_over_quote())) { + LOG_WARN("failed in brace check!", K(ret)); + } + } else if (expression_[index_] == ObPathItem::MINUS) { + if (index_ - 1 > 0 && ObPathParserUtil::is_xml_name_start_char(expression_[index_ - 1]) + && index_ + 1 < len_ && ObPathParserUtil::is_xml_name_start_char(expression_[index_ + 1])) { + ++index_; + } else { + is_cmp = true; + } + } else if (expression_[index_] == ObPathItem::WILDCARD) { + if (is_last_letter_location(index_)) { + ++index_; + } else { + is_cmp = true; + } + } else if (ObPathParserUtil::is_operator(expression_[index_])) { + is_cmp = true; + } else if (is_prefix_match_letter_operator()) { + is_cmp = true; + } else { + ++index_; + } + } + + if (OB_FAIL(ret)) { + } else { + index_ = old_idx; + } + return ret; +} + +int ObPathParser::jump_over_brace(bool is_brace) +{ + INIT_SUCC(ret); + char start_brace; + char end_brace; + if (is_brace) { + start_brace = ObPathItem::BRACE_START; + end_brace = ObPathItem::BRACE_END; + } else { + start_brace = ObPathItem::BEGIN_ARRAY; + end_brace = ObPathItem::END_ARRAY; + } + if (expression_[index_] == start_brace) { + int brace = 1; + ++index_; + while (OB_SUCC(ret) && index_ < len_ && brace > 0) { + if (expression_[index_] == start_brace) { + ++brace; + ++index_; + } else if (expression_[index_] == end_brace) { + --brace; + ++index_; + } else { + ++index_; + } + } // end while + if (OB_SUCC(ret) && index_ <= len_ && brace == 0) { + } else { + ret = OB_ITEM_NOT_MATCH; + LOG_WARN("there should be a ')'!", K(ret)); + } + } + return ret; +} + +int ObPathParser::jump_over_quote() +{ + INIT_SUCC(ret); + char quote; + if (expression_[index_] == ObPathItem::DOUBLE_QUOTE) { + quote = ObPathItem::DOUBLE_QUOTE; + } else if (expression_[index_] == ObPathItem::SINGLE_QUOTE) { + quote = ObPathItem::SINGLE_QUOTE; + } else { + ret = OB_ITEM_NOT_MATCH; + LOG_WARN("there should be a '\"' or '''!", K(ret), K(index_), K(expression_)); + } + if (OB_FAIL(ret)) { + } else { + ++index_; + while (OB_SUCC(ret) && index_ < len_ && expression_[index_] != quote) { + ++index_; + } + if (OB_SUCC(ret) && index_ < len_ && expression_[index_] == quote) { + ++index_; + } else { + ret = OB_ITEM_NOT_MATCH; + LOG_WARN("there should be a '\"'!", K(ret), K(quote)); + } + } + return ret; +} + +int ObPathParser::parse_arg(ObPathNode*& arg, ObPathArgType patharg_type, bool is_filter, const bool negtive) +{ + INIT_SUCC(ret); + ObXPathUtil::skip_whitespace(expression_, index_); + + if (OB_FAIL(ret)) { + } else if (index_ >= len_) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("invalid xpath", K(ret), K(expression_)); + } else if (is_filter && (negtive || is_number_begin() || is_literal_begin())) { // is_scalar + ObPathArgNode* arg_node = nullptr; + // filter arg: string, number, subpath + if (OB_FAIL(alloc_arg_node(arg_node))) { + LOG_WARN("fail to alloc arg node", K(ret), K(index_), K(expression_)); + } else { + arg_node = new (arg_node) ObPathArgNode(ctx_, parser_type_); + } + if (OB_FAIL(ret)) { + } else if (negtive || is_number_begin()) { + // parse number + double num; + if (OB_FAIL(get_xpath_number(num))) { + LOG_WARN("fail to get literal", K(ret), K(index_), K(expression_)); + } else { + num = (negtive) ? -num : num; + if (OB_FAIL(arg_node->init(num, patharg_type == ObPathArgType::IN_FILTER))) { + LOG_WARN("fail to init arg node", K(ret), K(num)); + } else { + arg = arg_node; + } + } + } else { + // parse literal + char* literal_ptr = nullptr; + uint64_t literal_len = 0; + if (OB_FAIL(get_xpath_literal(literal_ptr, literal_len))) { + LOG_WARN("fail to get literal", K(ret), K(index_), K(expression_)); + } else { + // if the filter string has entity ref, need to transform to entity char, like '<' -> '<' + ObString escape_str = ObString(literal_len, literal_ptr); + if (OB_FAIL(ObXmlUtil::revert_escape_character(*allocator_, escape_str, escape_str))) { + LOG_WARN("fail to revert escape str", K(ret), K(escape_str)); + } else if (OB_FAIL(arg_node->init(escape_str.ptr(), escape_str.length(), patharg_type == ObPathArgType::IN_FILTER))) { + LOG_WARN("fail to init arg node", K(ret), K(literal_ptr), K(literal_len)); + } else { + arg = arg_node; + } + } + } // is literal + } else if (OB_FAIL(get_xpath_subpath(arg, is_filter, patharg_type))) { + LOG_WARN("fail to get literal", K(ret), K(index_), K(expression_)); + } // parse subpath + return ret; +} + +int ObPathParser::get_filter_char_type(ObXpathFilterChar& filter_char) +{ + INIT_SUCC(ret); + ObXPathUtil::skip_whitespace(expression_, index_); + bool space_error = false; + if (index_ >= len_) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("invalid xpath", K(ret), K(expression_)); + } else { + switch (expression_[index_]) { + case '[': + filter_char = ObXpathFilterChar::CHAR_BEGIN_FILTER; + break; + case '(': + filter_char = ObXpathFilterChar::CHAR_LEFT_BRACE; + break; + case '|': + filter_char = ObXpathFilterChar::CHAR_UNION; + break; + case 'o': + if (path_prefix_match("or ")) { + if (index_ > 1 && (expression_[index_ - 1] == ' ' || isdigit(expression_[index_ - 1]))) { + filter_char = ObXpathFilterChar::CHAR_OR; + index_ += strlen(ObPathItem::OR); + } else { + space_error = true; + } + } + break; + case 'a': + if (path_prefix_match("and ")) { + if (index_ > 1 && (expression_[index_ - 1] == ' ' || isdigit(expression_[index_ - 1]))) { + filter_char = ObXpathFilterChar::CHAR_AND; + index_ += strlen(ObPathItem::AND); + } else { + space_error = true; + } + } + break; + case '=': + filter_char = ObXpathFilterChar::CHAR_EQUAL; + break; + case '!': + if (path_prefix_match(ObPathItem::COM_NE)) { + filter_char = ObXpathFilterChar::CHAR_UNEQUAL; + ++index_; + } + break; + case '<' : + if (path_prefix_match(ObPathItem::COM_LE)) { + filter_char = ObXpathFilterChar::CHAR_LESS_EQUAL; + ++index_; + } else { + filter_char = ObXpathFilterChar::CHAR_LESS; + } + break; + case '>' : + if (path_prefix_match(ObPathItem::COM_GE)) { + filter_char = ObXpathFilterChar::CHAR_GREAT_EQUAL; + ++index_; + } else { + filter_char = ObXpathFilterChar::CHAR_GREAT; + } + break; + case '+': + filter_char = ObXpathFilterChar::CHAR_ADD; + break; + case '-': + filter_char = ObXpathFilterChar::CHAR_SUB; + break; + case '*': + filter_char = ObXpathFilterChar::CHAR_MULTI; + break; + case 'd': + if (path_prefix_match("div ")) { + if (index_ > 1 && (expression_[index_ - 1] == ' ' || isdigit(expression_[index_ - 1]))) { + filter_char = ObXpathFilterChar::CHAR_DIV; + index_ += strlen(ObPathItem::DIV); + } else { + space_error = true; + } + } + break; + case 'm': + if (path_prefix_match("mod ")) { + if (index_ > 1 && (expression_[index_ - 1] == ' ' || isdigit(expression_[index_ - 1]))) { + filter_char = ObXpathFilterChar::CHAR_MOD; + index_ += strlen(ObPathItem::MOD); + } else { + space_error = true; + } + } + break; + case ')': + filter_char = ObXpathFilterChar::CHAR_RIGHT_BRACE; + break; + case ']': + filter_char = ObXpathFilterChar::CHAR_END_FILTER; + break; + default: + filter_char = ObXpathFilterChar::CMP_CHAR_MAX; + break; + } + } + if (space_error) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("should have space before and after letter operator", K(ret)); + } else if (filter_char != ObXpathFilterChar::CMP_CHAR_MAX) { + ++index_; + } + return ret; +} + +// 0 means that the priority is the same, but it also means that types are directly comparable, +// i.e. decimal, int, uint, and double are all comparable. +// 1 means this_type has a higher priority +// -1 means this_type has a lower priority + +// if top < in ( -1), push into stack +// if top > in ( 1 ), pop the top char, then push +// if top = in ( 0 ), pop the top and do not push, only for '(' and ')', or '[' and ']' +// if error ( -2), like ']', ')' should not in char stack +enum ObPathPriority { + ERROR_OP = -2, + PUSH_STACK= -1, + MATCH = 0, + POP_STACK = 1, +}; +static constexpr int filter_comparison[CMP_CHAR_MAX][CMP_CHAR_MAX] = { + /* in [ ( | or & = != <= < >= > + - * div mod ) ] */ + /* top 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 */ + /* 0 [ */ { -2, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -2, 0 }, + /* 1 ( */ { -2, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, -2 }, + /* 2 | */ { -2, -1, 1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -2, 1 }, + /* 3 or */ { -2, -1, 1, 1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 1, 1 }, + /* 4 and */ { -2, -1, 1, 1, 1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 1, 1 }, + /* 5 = */ { -2, -1, 1, 1, 1, 1, 1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 1, 1 }, + /* 6 != */ { -2, -1, 1, 1, 1, 1, 1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 1, 1 }, + /* 7 <= */ { -2, -1, 1, 1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1, -1, 1, 1 }, + /* 8 < */ { -2, -1, 1, 1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1, -1, 1, 1 }, + /* 9 >= */ { -2, -1, 1, 1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1, -1, 1, 1 }, + /* 10 > */ { -2, -1, 1, 1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1, -1, 1, 1 }, + /* 11 + */ { -2, -1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, 1, 1 }, + /* 12 - */ { -2, -1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, 1, 1 }, + /* 13 * */ { -2, -1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 }, + /* 14 div */ { -2, -1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 }, + /* 15 mod */ { -2, -1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 }, + /* 16 ) */ { -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2 }, + /* 17 ] */ { -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2 } +}; + +int ObPathParser::push_filter_char_in(const ObXpathFilterChar& in, ObPathVectorPointers& node_stack, + ObFilterCharPointers& char_stack, ObPathArgType patharg_type) +{ + INIT_SUCC(ret); + uint64_t size_c = char_stack.size(); + if (size_c <= 0) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("empty char stack", K(ret), K(index_), K(expression_)); + } else { + ObXpathFilterChar top = char_stack[size_c - 1]; + int priority = filter_comparison[top][in]; + switch (priority) { + case ObPathPriority::PUSH_STACK: { + if (OB_FAIL(char_stack.push_back(in))) { + LOG_WARN("fail to push char", K(ret), K(in)); + } + break; + } + case ObPathPriority::MATCH: { + if (OB_FAIL(ObPathUtil::pop_char_stack(char_stack))) { + LOG_WARN("fail to pop char", K(ret), K(in)); + } + break; + } + case ObPathPriority::POP_STACK: { + while (OB_SUCC(ret) && size_c > 1 && filter_comparison[top][in] == ObPathPriority::POP_STACK) { + ObPathFilterNode* filter_node = nullptr; + if (OB_FAIL(alloc_filter_node(filter_node))) { + LOG_WARN("fail to alloc filter node", K(ret), K(index_), K(expression_)); + } else { + filter_node = new (filter_node) ObPathFilterNode(ctx_, parser_type_); + ObPathNode *left = nullptr; + ObPathNode *right = nullptr; + if (OB_FAIL(ObPathUtil::pop_node_stack(node_stack, right))) { + LOG_WARN("fail to get arg", K(ret), K(index_), K(expression_)); + } else if (OB_FAIL(ObPathUtil::pop_node_stack(node_stack, left))) { + LOG_WARN("fail to get arg", K(ret), K(index_), K(expression_)); + } else if (OB_FAIL(filter_node->init(top, left, right, patharg_type == ObPathArgType::IN_FILTER))) { + LOG_WARN("fail to init", K(ret), K(index_), K(expression_)); + } else if (OB_FAIL(node_stack.push_back(filter_node))) { + LOG_WARN("fail to push filter node", K(ret), K(index_), K(expression_)); + } else if (OB_FAIL(ObPathUtil::pop_char_stack(char_stack))) { // pop char top + LOG_WARN("fail to pop char", K(ret), K(in)); + } else { + size_c = char_stack.size(); + } + } + if (OB_FAIL(ret)) { + } else if (size_c > 0) { + top = char_stack[size_c - 1]; + } + } + + if (OB_FAIL(ret)) { + } else if (filter_comparison[top][in] == ObPathPriority::PUSH_STACK) { + if (OB_FAIL(char_stack.push_back(in))) { + LOG_WARN("fail to push char", K(ret), K(in)); + } + } else if (OB_FAIL(ObPathUtil::pop_char_stack(char_stack))) { + LOG_WARN("fail to pop char", K(ret), K(in)); + } + break; + } + case ObPathPriority::ERROR_OP: { + ret = OB_OP_NOT_ALLOW; + LOG_WARN("wrong operator", K(ret), K(priority), K(top), K(in)); + } + default: { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("wrong priority", K(ret), K(priority), K(top), K(in)); + break; + } + } // end switch + } + return ret; +} + +int ObPathParser::parse_filter_node(ObPathNode*& filter, ObPathArgType patharg_type) +{ + INIT_SUCC(ret); + ObXPathUtil::skip_whitespace(expression_, index_); + ObPathVectorPointers node_stack; + ObFilterCharPointers char_stack; + if (index_ < expression_.length()) { + if (patharg_type != ObPathArgType::IN_FILTER + || (patharg_type == ObPathArgType::IN_FILTER + && expression_[index_] == ObPathItem::BEGIN_ARRAY && ++index_)) { + if (OB_FAIL(char_stack.push_back(ObXpathFilterChar::CHAR_BEGIN_FILTER))) { + LOG_WARN("fail to push_back charactor", K(ret), K(index_), K(expression_)); + } else { + ObXPathUtil::skip_whitespace(expression_, index_); + while (OB_SUCC(ret) && index_ < len_ && char_stack.size() > 0) { + ObXpathFilterChar filter_char = ObXpathFilterChar::CMP_CHAR_MAX; + bool minus = false; + bool multi = false; + // if is ObXpathFilterChar, push in, else get arg + if ((expression_[index_] == '-' || expression_[index_] == '*') && is_negtive()) { + minus = (expression_[index_] == '-'); + multi = (expression_[index_] == '*'); + } + if (!minus && !multi && OB_FAIL(get_filter_char_type(filter_char))) { + LOG_WARN("fail to get char type", K(ret), K(index_), K(expression_)); + } else if (filter_char < ObXpathFilterChar::CMP_CHAR_MAX) { + if (OB_FAIL(push_filter_char_in(filter_char, node_stack, char_stack, patharg_type))) { + LOG_WARN("fail to push char", K(ret), K(filter_char)); + } + } else { // must be arg + ObPathNode* arg = nullptr; + if (minus) { + int count_minus = 0; + while (index_ < len_ && expression_[index_] == '-' && OB_SUCC(ret)) { + ++count_minus; + ++index_; + ObXPathUtil::skip_whitespace(expression_, index_); + } + minus = (count_minus % 2 == 0) ? false : true; + } + if (OB_FAIL(parse_arg(arg, patharg_type, true, minus))) { + LOG_WARN("fail to parse filter arg", K(ret), K(index_), K(expression_)); + } else if (OB_FAIL(node_stack.push_back(arg))) { + LOG_WARN("fail to push arg", K(ret), K(index_), K(expression_)); + } + } + ObXPathUtil::skip_whitespace(expression_, index_); + } // end while + // if not in predicate + if (OB_SUCC(ret) && (index_ >= len_ && patharg_type != ObPathArgType::IN_FILTER)) { + // push ']' in + if (OB_FAIL(push_filter_char_in(ObXpathFilterChar::CHAR_END_FILTER, node_stack, char_stack, patharg_type))) { + LOG_WARN("fail to push char", K(ret)); + } + } + if (OB_FAIL(ret)) { + } else if (node_stack.size() == 1 && char_stack.size() == 0) { + if (OB_ISNULL(node_stack[0])) { + ret = OB_BAD_NULL_ERROR; + LOG_WARN("should not be null", K(ret)); + } else { + filter = node_stack[0]; + } + } else { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("wrong expression", K(ret), K(expression_), K(index_)); + } + } + } else { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("wrong filter!", K(ret), K(index_), K(expression_)); + } + } else { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("wrong path expression!", K(ret), K(index_), K(expression_)); + } + return ret; +} + +} // namespace common +} // namespace oceanbase \ No newline at end of file diff --git a/deps/oblib/src/lib/xml/ob_path_parser.h b/deps/oblib/src/lib/xml/ob_path_parser.h new file mode 100644 index 0000000000..aa6ec031cb --- /dev/null +++ b/deps/oblib/src/lib/xml/ob_path_parser.h @@ -0,0 +1,182 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + * This file contains interface support for the Xml Path abstraction. + */ + +#ifndef OCEANBASE_SQL_OB_PATH_PARSER +#define OCEANBASE_SQL_OB_PATH_PARSER + +#include "ob_xpath.h" +#include "ob_tree_base.h" +#include "lib/string/ob_string.h" +#include "lib/container/ob_array.h" +#include "lib/container/ob_vector.h" +#include "src/share/datum/ob_datum.h" +#include "lib/json_type/ob_json_path.h" +#include "lib/xml/ob_mul_mode_reader.h" +#include "lib/xml/ob_multi_mode_interface.h" +namespace oceanbase { +namespace common { + +class ObPathItem +{ +public: + static constexpr char DOLLAR = '$'; + static constexpr char SLASH = '/'; + static constexpr char BEGIN_ARRAY = '['; + static constexpr char END_ARRAY = ']'; + static constexpr char DOUBLE_QUOTE = '"'; + static constexpr char SINGLE_QUOTE = '\''; + static constexpr char WILDCARD = '*'; + static constexpr char AT = '@'; + static constexpr char BRACE_START = '('; + static constexpr char BRACE_END = ')'; + static constexpr char COLON = ':'; + static constexpr char DOT = '.'; + static constexpr char UNDERLINE = '_'; + static constexpr char UNION = '|'; + static constexpr char MINUS = '-'; + static constexpr char COM_LT = '<'; + static constexpr char COM_GT = '>'; + static constexpr char COM_EQ = '='; + static constexpr char CAL_ADD = '+'; + static constexpr char CAL_SUB = '-'; + static constexpr char CAL_MUL = '*'; + static constexpr char* COM_LE = const_cast("<="); + static constexpr char* COM_GE = const_cast(">="); + static constexpr char* COM_NE = const_cast("!="); + static constexpr char* OR = const_cast("or"); + static constexpr char* AND = const_cast("and"); + static constexpr char* DIV = const_cast("div"); + static constexpr char* MOD = const_cast("mod"); + static constexpr char* DOUBLE_SLASH = const_cast("//"); + static constexpr char* DOUBLE_COLON = const_cast("::"); + static constexpr char* DOUBLE_DOT = const_cast(".."); + static constexpr char* ANCESTOR = const_cast("ancestor::"); + static constexpr char* ANCESTOR_OR_SELF = const_cast("ancestor-or-self::"); + static constexpr char* ATTRIBUTE = const_cast("attribute::"); + static constexpr char* CHILD = const_cast("child::"); + static constexpr char* DESCENDANT = const_cast("descendant::"); + static constexpr char* DESCENDANT_OR_SELF = const_cast("descendant-or-self::"); + static constexpr char* FOLLOWING = const_cast("following::"); + static constexpr char* FOLLOWING_SIBLING = const_cast("following-sibling::"); + static constexpr char* NAMESPACE = const_cast("namespace::"); + static constexpr char* PARENT = const_cast("parent::"); + static constexpr char* PRECEDING = const_cast("preceding::"); + static constexpr char* PRECEDING_SIBLING = const_cast("preceding-sibling::"); + static constexpr char* SELF = const_cast("self::"); + static constexpr char* NODE = const_cast("node("); + static constexpr char* TEXT = const_cast("text("); + static constexpr char* COMMENT = const_cast("comment("); + static constexpr char* PROCESSING_INSTRUCTION = const_cast("processing-instruction("); +}; + +// todo: path cache +class ObPathParser { +public: + ObPathParser(ObMulModeMemCtx* ctx, const ObParserType& parser_type, const ObString& path, + ObString& default_ns, ObPathVarObject* pass_var) : + allocator_(ctx->allocator_), parser_type_(parser_type), expression_(path), default_ns_(default_ns), + pass_var_(pass_var), bad_index_(-1), index_(0), len_(path.length()), + is_first_node_(true), ctx_(ctx) {} + explicit ObPathParser(const ObString& path, common::ObIAllocator *allocator); + virtual ~ObPathParser() {} + int to_string(ObStringBuffer& str); // transfer all pathnodes to string + int parse_path(ObPathArgType patharg_type = NOT_SUBPATH); + int parse_location_path(ObPathArgType patharg_type = NOT_SUBPATH); + ObPathNode* get_root() {return root_node_;} +private: + int parse_xpath_node(ObPathArgType patharg_type); + int parse_location_node(bool is_absolute); + int parse_primary_expr_node(ObPathArgType patharg_type); + int parse_non_abbrevited_location_node(bool is_absolute); + int parse_double_slash_node(); + int parse_double_dot_node(bool is_absolute); + int parse_single_dot_node(bool is_absolute); + int parse_axis_info(ObPathLocationNode*& location); + int parse_nodetest_info(ObPathLocationNode*& location); + int parse_namespace_info(ObPathLocationNode*& location, ObString& ns_str); + int parse_union_node(ObPathArgType patharg_type); + int get_filter_char_type(ObXpathFilterChar& filter_char); + int parse_arg(ObPathNode*& arg, ObPathArgType patharg_type, bool is_filter, bool negtive = false); + int parse_filter_node(ObPathNode*& filter, ObPathArgType patharg_type); + int parse_func_type(ObFuncType& func_type); + int parse_func_arg(ObPathFuncNode*& func_node, ObPathArgType patharg_type = NOT_SUBPATH); + int parse_func_node(ObPathArgType patharg_type = NOT_SUBPATH); + int trans_to_filter_op(ObPathRootNode*& origin_root, int filter_num, bool is_first, ObPathNode*& op_root); + int push_filter_char_in(const ObXpathFilterChar& in, ObPathVectorPointers& node_stack, + ObFilterCharPointers& char_stack, ObPathArgType patharg_type); + int get_xpath_ident(char*& str, uint64_t& length, bool& is_func); + int get_xpath_literal(char*& str, uint64_t& length); + int get_xpath_number(double& num); + int get_subpath_str(bool is_filter, ObString& subpath); + int parse_subpath(ObString& subpath,ObPathNode*& node, bool is_filter, ObPathArgType patharg_type); + int get_xpath_subpath(ObPathNode*& node, bool is_filter, ObPathArgType patharg_type); + int check_nodetest(const ObString& str, ObSeekType& seek_type, char*& arg, uint64_t& arg_len); + int alloc_path_node(ObPathNode*& node); + int alloc_root_node(ObPathRootNode*& node); + int alloc_filter_op_node(ObPathFilterOpNode*& node); + int alloc_location_node(ObPathLocationNode*& node); + int alloc_filter_node(ObPathFilterNode*& node); + int alloc_func_node(ObPathFuncNode*& node); + int alloc_arg_node(ObPathArgNode*& node); + int jump_over_filter(); + int jump_over_quote(); + int jump_over_brace(bool is_brace); + int trans_to_pure_index_filter(ObPathNode*& node); + int check_cmp(bool& is_cmp); + int check_is_legal_xpath(const ObPathArgType& patharg_type); + bool is_prefix_match_letter_operator(); + bool is_prefix_match_function(); + bool path_prefix_match(const char *str); + bool is_path_end_with_brace(); + bool is_function_path(); + bool is_number_begin(); + bool is_literal_begin(); + bool is_last_letter_location(int last_idx); + bool is_last_letter_operator(const int& last_idx); + bool is_negtive(); + common::ObIAllocator *allocator_; + ObParserType parser_type_; + ObString expression_; + ObString default_ns_; + ObPathVarObject* pass_var_; + ObPathNode* root_node_; + uint64_t bad_index_; + uint64_t index_; + uint64_t len_; + bool is_first_node_; + ObMulModeMemCtx* ctx_; +}; + +class ObPathParserUtil +{ +public: + static bool is_xml_name_start_char(const char ch); + static bool is_xml_name_char(const char ch); + static bool is_end_of_xpathkeyword(const char ch); + static bool is_xpath_ident_terminator(const char ch); + static bool check_is_legal_tagname(const char* name, int length); + static bool is_xpath_transform_terminator(const char ch); + static bool is_left_brace(const char ch); + static bool is_operator(const char ch); + static bool is_nodetest_start_char(const char ch); + static bool is_function_start_char(const char ch); + static bool is_func_must_in_pred(const ObFuncType& func_type); + static bool is_illegal_comp_for_filter(const ObFilterType& type, ObPathNode* left, ObPathNode* right); + static bool is_boolean_ans(ObFilterType type); + static bool is_boolean_subpath_arg(ObPathNode* node); + static bool is_position(ObPathNode* node); +}; + +} +} +#endif // OCEANBASE_SQL_OB_XPATH_PARSE \ No newline at end of file diff --git a/deps/oblib/src/lib/xml/ob_tree_base.cpp b/deps/oblib/src/lib/xml/ob_tree_base.cpp new file mode 100644 index 0000000000..b04392c16d --- /dev/null +++ b/deps/oblib/src/lib/xml/ob_tree_base.cpp @@ -0,0 +1,1072 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + * This file contains interface implement for the tree base abstraction. + */ + +#define USING_LOG_PREFIX SQL_ENG +#include "lib/ob_errno.h" +#include "lib/utility/ob_macro_utils.h" +#include "lib/oblog/ob_log.h" +#include "lib/oblog/ob_log_module.h" +#include "lib/ob_define.h" +#include "lib/container/ob_array_iterator.h" +#include "lib/xml/ob_multi_mode_interface.h" +#include "lib/xml/ob_tree_base.h" +#include "lib/xml/ob_xml_tree.h" + +namespace oceanbase { +namespace common { + + +struct ObLibNodePtrEqual { + ObLibNodePtrEqual(ObLibTreeNodeBase* target) { + target_ = target; + } + + bool operator()(ObLibTreeNodeBase* iter_node) { + return iter_node == target_; + } + + ObLibTreeNodeBase* target_; +}; + +struct ObLibTreeKeyCompare { + int operator()(ObLibTreeNodeBase* left, ObLibTreeNodeBase* right) + { + INIT_SUCC(ret); + + ObString left_key; + ObString right_key; + + left->get_key(left_key); + right->get_key(right_key); + + return (left_key.compare(right_key) < 0); + } +}; + +struct ObXmlNodeKeyCompare { + int operator()(const ObString& left_key, ObLibTreeNodeBase* right) { + INIT_SUCC(ret); + ObString right_key; + + right->get_key(right_key); + + return left_key.compare(right_key) < 0; + } + + int operator()(ObLibTreeNodeBase* left, const ObString& right) + { + INIT_SUCC(ret); + ObString left_key; + + left->get_key(left_key); + + return (left_key.compare(right) < 0); + } +}; + +int ObLibTreeNodeBase::insert_slibing(ObLibTreeNodeBase* new_node, int64_t relative_index) +{ + INIT_SUCC(ret); + ObLibTreeNodeBase* parent = get_parent(); + if (OB_ISNULL(parent)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("fail to insert, parent is null", K(ret), K(pos_), K(flags_)); + } else if (parent->is_leaf_node()) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("fail to insert, parent is not container", K(ret), K(pos_), K(flags_), K(type_)); + } else if (OB_FAIL(parent->insert(pos_ + relative_index, new_node))) { + LOG_WARN("fail to insert", K(ret), K(pos_), K(flags_), + K(type_), K(parent->count())); + } + + return ret; +} + +int ObLibTreeNodeBase::get_key(ObString& key) +{ + INIT_SUCC(ret); + if (type_ == OB_XML_TYPE + && OB_FAIL((static_cast(this))->get_key(key))) { + LOG_WARN("fail to get key", K(ret), K(pos_), K(flags_)); + } + return ret; +} + +int ObLibTreeNodeBase::insert_after(ObLibTreeNodeBase* new_node) +{ + // insert after current node + // relative index is 1 + return insert_slibing(new_node, 1); +} + +int ObLibTreeNodeBase::insert_prev(ObLibTreeNodeBase* new_node) +{ + // insert before current node + // relative index is 0 + return insert_slibing(new_node, 0); +} + +ObLibContainerNode* ObLibContainerNode::iterator::operator*() +{ + ObLibContainerNode* res = nullptr; + if (cur_node_->is_leaf_node()) { + res = cur_node_; + } else if (cur_node_->is_using_child_buffer()) { + res = cur_node_->child_[0]; + } else if (OB_NOT_NULL(vector_) && vector_->size() > cur_pos_ && cur_pos_ >= 0) { + res = static_cast(vector_->at(cur_pos_)); + } + return res; +} + +ObLibContainerNode* ObLibContainerNode::iterator::operator[](int64_t pos) +{ + ObLibContainerNode* res = nullptr; + + if (cur_node_->is_leaf_node()) { + if (pos == 0) { + res = cur_node_; + } + } else if (cur_node_->is_using_child_buffer()) { + if (pos == 0) { + res = cur_node_->child_[0]; + } + } else { + res = static_cast(vector_->at(pos)); + } + return res; +} + +ObLibContainerNode::iterator ObLibContainerNode::iterator::next() +{ + cur_pos_++; + return *this; +} + +ObLibContainerNode::iterator ObLibContainerNode::iterator::operator++() +{ + cur_pos_++; + return *this; +} + +ObLibContainerNode::iterator ObLibContainerNode::iterator::operator--() +{ + cur_pos_--; + return *this; +} + +ObLibContainerNode::iterator ObLibContainerNode::iterator::operator++(int) +{ + iterator res(*this); + cur_pos_++; + return res; +} + +ObLibContainerNode::iterator ObLibContainerNode::iterator::operator--(int) +{ + iterator res(*this); + cur_pos_--; + return res; +} + +void ObLibContainerNode::iterator::set_range(int64_t start, int64_t finish) +{ + cur_pos_ = start; + if (finish < total_) { + total_ = finish; + } +} + +ObLibContainerNode::iterator ObLibContainerNode::iterator::operator+(int size) +{ + iterator res(*this); + res.cur_pos_ += size; + return res; +} + +ObLibContainerNode::iterator ObLibContainerNode::iterator::operator-(int size) +{ + iterator res(*this); + res.cur_pos_ -= size; + return res; +} + +bool ObLibContainerNode::iterator::operator<(const iterator& iter) +{ + return cur_pos_ < iter.cur_pos_; +} + +bool ObLibContainerNode::iterator::operator>(const iterator& iter) +{ + return cur_pos_ > iter.cur_pos_; +} + +int64_t ObLibContainerNode::iterator::operator-(const ObLibContainerNode::iterator& iter) +{ + int64_t different = cur_pos_ - iter.cur_pos_; + return different; +} + +ObLibContainerNode::iterator ObLibContainerNode::iterator::operator+=(int size) +{ + cur_pos_ += size; + return *this; +} + +ObLibContainerNode::iterator ObLibContainerNode::iterator::operator-=(int size) +{ + cur_pos_ -= size; + return *this; +} + +bool ObLibContainerNode::iterator::operator==(const iterator& rhs) +{ + return cur_node_ == rhs.cur_node_ && vector_ == rhs.vector_ && cur_pos_ == rhs.cur_pos_; +} + +bool ObLibContainerNode::iterator::operator<=(const iterator& rhs) +{ + return cur_node_ == rhs.cur_node_ && vector_ == rhs.vector_ && cur_pos_ <= rhs.cur_pos_; +} + +bool ObLibContainerNode::iterator::operator!=(const iterator& rhs) +{ + return !(*this == rhs); +} + +bool ObLibContainerNode::iterator::end() +{ + return cur_pos_ >= total_; +} + +ObLibContainerNode* ObLibContainerNode::iterator::current() +{ + return cur_node_; +} + +ObLibContainerNode::iterator ObLibContainerNode::begin() +{ + ObLibContainerNode::iterator iter; + iter.cur_node_ = this; + iter.cur_pos_ = 0; + + if (is_leaf_node()) { + iter.total_ = 1; + iter.vector_ = nullptr; + } else if (is_using_child_buffer()) { + iter.total_ = child_[0] == nullptr ? 0 : 1; + iter.vector_ = nullptr; + } else { + ObLibTreeNodeVector* data_vector = nullptr; + if (has_sequent_member()) { + data_vector = children_; + } else if (has_sorted_member()) { + data_vector = sorted_children_; + } + iter.total_ = data_vector->size(); + iter.vector_ = data_vector; + } + return iter; +} + +ObLibContainerNode::iterator ObLibContainerNode::end() +{ + iterator iter; + iter.cur_node_ = this; + if (is_leaf_node()) { + iter.cur_pos_ = 1; + iter.total_ = 1; + iter.vector_ = nullptr; + } else if (is_using_child_buffer()) { + iter.total_ = child_[0] == nullptr ? 0 : 1; + iter.cur_pos_ = iter.total_; + iter.vector_ = nullptr; + } else { + ObLibTreeNodeVector* data_vector = nullptr; + if (has_sequent_member()) { + data_vector = children_; + } else if (has_sorted_member()) { + data_vector = sorted_children_; + } + iter.cur_pos_ = iter.total_ = data_vector->size(); + iter.vector_ = data_vector; + } + return iter; +} + +ObLibContainerNode::iterator ObLibContainerNode::sorted_begin() +{ + iterator res = begin(); + if (!is_using_child_buffer()) { + res.vector_ = sorted_children_; + } + return res; +} + +ObLibContainerNode::iterator ObLibContainerNode::sorted_end() +{ + iterator res = end(); + if (!is_using_child_buffer()) { + res.vector_ = sorted_children_; + } + return res; +} + +int ObLibContainerNode::tree_iterator::start() +{ + INIT_SUCC(ret); + stack_.reset(); + + if (OB_ISNULL(root_)) { + ret = OB_ITER_END; + } else if (type_ == POST_ORDER) { + ObLibContainerNode* tmp = root_; + while (OB_SUCC(ret) && OB_NOT_NULL(tmp)) { + if (OB_FAIL(stack_.push(tmp))) { + LOG_WARN("fail to push ObStack", K(ret), K(stack_.count())); + } else if (!tmp->is_leaf_node() && tmp->size() > 0) { + tmp = static_cast(tmp->member(0)); + } else { + break; + } + } + } else { + if (OB_FAIL(stack_.push(root_))) { + LOG_WARN("fail to push ObStack", K(ret)); + } + } + + return ret; +} + +int ObLibContainerNode::tree_iterator::next(ObLibContainerNode*& res) +{ + INIT_SUCC(ret); + + if (stack_.empty()) { + ret = OB_ITER_END; + } else if (type_ == POST_ORDER) { + bool is_iter_valid = true; + iterator& cur_iter = stack_.top(); + ObLibContainerNode* node = cur_iter.current(); + ObLibContainerNode* tmp = nullptr; + if (cur_iter.end() || node->is_leaf_node()) { + stack_.pop(); + res = node; + if (!stack_.empty()) { + iterator& tmp_iter = stack_.top(); + tmp_iter.next(); + } + } else { + tmp = *cur_iter; + if (!tmp->is_leaf_node() && tmp->size() > 0) { + while (OB_NOT_NULL(tmp) && OB_SUCC(ret)) { + if (OB_FAIL(stack_.push(tmp))) { + LOG_WARN("fail to push ObStack", K(ret), K(stack_.size())); + } else if (!tmp->is_leaf_node() && tmp->size() > 0) { + tmp = static_cast(tmp->member(0)); + if (tmp->is_leaf_node()) { + if (!stack_.empty()) { + iterator& tmp_iter = stack_.top(); + tmp_iter.next(); + } + break; + } + } else { + break; + } + } + } else { + cur_iter.next(); + } + + res = tmp; + } + } else /* (type_ == PRE_ORDER) */ { + iterator& cur_iter = stack_.top(); + ObLibContainerNode* node = cur_iter.current(); + if (cur_iter == node->begin() && !cur_iter.is_eval_current()) { + res = node; + if ((node->is_leaf_node() || node->size() == 0)) { + stack_.pop(); + if (!stack_.empty()) { + iterator& tmp_iter = stack_.top(); + tmp_iter.next(); + } + } else if (node->size() > 0 && !node->is_leaf_node()) { + ObLibContainerNode* tmp = *cur_iter; + if (OB_FAIL(stack_.push(tmp))) { + LOG_WARN("fail to push ObStack", K(ret), K(stack_.size())); + } + } + } else { // container has more than 0 element + if (cur_iter.end()) { + stack_.pop(); + if (!stack_.empty()) { + iterator& tmp_iter = stack_.top(); + tmp_iter.next(); + } + + if (OB_FAIL(next(res))) { + if (ret != OB_ITER_END) { + LOG_WARN("fail to get next", K(ret), K(stack_.size())); + } + } + } else { + res = *cur_iter; + ObLibContainerNode* tmp = res; + if (!tmp->is_leaf_node() && tmp->size() > 0) { + if (OB_FAIL(stack_.push(iterator(tmp, true)))) { + LOG_WARN("fail to push ObStack", K(ret), K(stack_.size())); + } + } else { + cur_iter.next(); + } + } + } + } + + return ret; +} + +int ObLibContainerNode::alter_member_sort_policy(bool actived) +{ + INIT_SUCC(ret); + bool is_do_scan = false; + bool is_do_sort = false; + + + if (actived && is_lazy_sort()) { + is_do_scan = true; + is_do_sort = true; + } else if (!actived && !is_lazy_sort()) { + is_do_scan = true; + is_do_sort = false; + } + + ObLibContainerNode* current = this; + ObLibContainerNode::tree_iterator iter(current, ctx_->allocator_); + + if (!is_do_scan) { + } else if (OB_FAIL(iter.start())) { + LOG_WARN("fail to prepare scan iterator", K(ret)); + } else { + ObLibContainerNode* tmp = nullptr; + while (OB_SUCC(iter.next(tmp))) { + if (OB_ISNULL(tmp)) { + ret = OB_ERR_UNEXPECTED; + LOG_ERROR("get iter value null pointer", K(ret)); + } else if (is_do_sort) { + tmp->do_sort(); + tmp->del_flags(MEMBER_LAZY_SORTED); + } else if (!is_do_sort) { + tmp->set_flags(MEMBER_LAZY_SORTED); + } + } + + if (ret == OB_ITER_END || OB_SUCC(ret)) { + ret = OB_SUCCESS; + } else { + LOG_WARN("fail scan liberty tree", K(ret)); + } + } + + return ret; +} + +int ObLibContainerNode::get_children(const ObString& key, ObIArray& res) +{ + INIT_SUCC(ret); + if (is_using_child_buffer()) { + if (OB_NOT_NULL(child_[0])) { + ObString tmp; + get_key(tmp); + + if (key.compare(key) && OB_FAIL(res.push_back(child_[0]))) { + LOG_WARN("fail to store node", K(ret), K(res.count())); + } + } + } else { + ObXmlNodeKeyCompare cmp; + ObLibTreeNodeVector::iterator low_iter = std::lower_bound(sorted_children_->begin(), + sorted_children_->end(), key, cmp); + + ObLibTreeNodeVector::iterator up_iter = std::upper_bound(sorted_children_->begin(), + sorted_children_->end(), key, cmp); + + if (low_iter != sorted_children_->end()) { + for (; OB_SUCC(ret) && low_iter != up_iter; low_iter++) { + if (OB_FAIL(res.push_back(*low_iter))) { + LOG_WARN("fail to store node", K(ret), K(res.count())); + } + } + } + } + return ret; +} + +int ObLibContainerNode::get_children(const ObString& key, IterRange& range) +{ + INIT_SUCC(ret); + if (is_using_child_buffer()) { + if (OB_NOT_NULL(child_[0])) { + ObString tmp; + child_[0]->get_key(tmp); + if (key.compare(tmp) == 0) { + range.first = sorted_begin(); + range.second = sorted_end(); + } + } + } else { + ObXmlNodeKeyCompare cmp; + ObLibTreeNodeVector::iterator low_iter = std::lower_bound(sorted_children_->begin(), + sorted_children_->end(), key, cmp); + + ObLibTreeNodeVector::iterator up_iter = std::upper_bound(sorted_children_->begin(), + sorted_children_->end(), key, cmp); + + if (low_iter != sorted_children_->end()) { + range.first = sorted_begin() + (low_iter - sorted_children_->begin()); + range.second = sorted_begin() + (up_iter - 1 - sorted_children_->begin()); + } + } + return ret; +} + +IndexRange ObLibContainerNode::get_effective_range(int64_t start, int64_t end) +{ + int64_t res_start = 0; + int64_t res_end = 0; + ObLibTreeNodeVector* data_vector = nullptr; + + if (is_leaf_node()) { + } else if (is_using_child_buffer()) { + } else if (has_sequent_member()) { + data_vector = children_; + } else if (has_sorted_member()) { + data_vector = sorted_children_; + } + if (data_vector) { + int64_t count = data_vector->size(); + res_start = start < 0 ? 0 : start; + res_end = end >= count ? (count - 1) : end; + } + + return std::make_pair(res_start, res_end); +} + +int ObLibContainerNode::get_range(int64_t start, int64_t end, ObIArray& res) +{ + INIT_SUCC(ret); + ObLibTreeNodeVector* data_vector = nullptr; + if (is_leaf_node()) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("fail to get all children, cur node is leaf node", K(ret), K(flags_)); + } else if (is_using_child_buffer()) { + if (OB_NOT_NULL(child_[0]) + && OB_FAIL(res.push_back(child_[0]))) { + LOG_WARN("fail to store current node", K(ret), K(res.count())); + } + } else { + if (has_sequent_member()) { + data_vector = children_; + } else if (has_sorted_member()) { + data_vector = sorted_children_; + } + + int64_t count = data_vector->size(); + start = start < 0 ? 0 : start; + end = end >= count ? (count - 1) : end; + + if (start > end) { + } else if (OB_FAIL(res.reserve(end - start + 1))) { + LOG_WARN("fail to get all children, reserve memory failed", K(ret), K(data_vector->size())); + } + + for (ObLibTreeNodeVector::iterator iter = data_vector->begin() + start; + OB_SUCC(ret) && iter <= data_vector->begin() + end; iter++) { + if (OB_FAIL(res.push_back(*iter))) { + LOG_WARN("fail to store current node", K(ret), K(res.count())); + } + } + } + + return ret; +} + +int ObLibContainerNode::get_children(ObIArray& res) +{ + return get_range(0, static_cast(-1), res); +} + +int64_t ObLibContainerNode::size() const +{ + int64_t res = 0; + if (is_leaf_node()) { + res = 1; + } else if (is_using_child_buffer()) { + res = child_[0] == nullptr ? 0 : 1; + } else if (has_sequent_member()) { + res = children_->size(); + } else { + res = sorted_children_->size(); + } + return res; +} + +ObLibTreeNodeBase* ObLibContainerNode::member(size_t pos) +{ + int64_t count = size(); + ObLibTreeNodeBase* member = nullptr; + if (pos < 0 || pos >= count || is_leaf_node()) { + } else if (is_using_child_buffer()) { + member = child_[0]; + } else if (has_sequent_member()) { + member = children_->at(pos); + } else if (has_sorted_member()) { + member = sorted_children_->at(pos); + } + return member; +} + +int64_t ObLibContainerNode::count() const +{ + return size(); +} + +int64_t ObLibContainerNode::get_member_index(ObLibTreeNodeVector& container, ObLibTreeNodeBase* node) +{ + INIT_SUCC(ret); + int64_t pos = -1; + if (is_using_child_buffer()) { + pos = node == child_[0] ? 0 : -1; + } else { + for (int64_t idx = 0; idx < container.size(); ++idx) { + if (container.at(idx) != node) { + } else { + pos = idx; + break; + } + } + } + + return pos; +} + +void ObLibContainerNode::do_sort() +{ + if (has_sorted_member() && !is_using_child_buffer()) { + ObLibTreeKeyCompare cmp; + std::stable_sort(sorted_children_->begin(), sorted_children_->end(), cmp); + } +} + +void ObLibContainerNode::sort() +{ + if (has_sorted_member() && !is_lazy_sort() && !is_using_child_buffer()) { + ObLibTreeKeyCompare cmp; + std::stable_sort(sorted_children_->begin(), sorted_children_->end(), cmp); + } +} + // 数据修改接口, 修改的是孩子 +int ObLibContainerNode::append(ObLibTreeNodeBase* node) +{ + INIT_SUCC(ret); + + if (is_leaf_node()) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("fail to append child on leaf node", K(ret), K(flags_), K(type_)); + } else if (OB_FAIL(append_into_sequent_container(node))) { + LOG_WARN("fail to store new node in order array", K(ret)); + } else if (OB_FAIL(append_into_sorted_container(node))) { + LOG_WARN("fail to store new node in sorted array", K(ret)); + } else { + node->set_parent(this); + } + + return ret; +} + +void ObLibContainerNode::increase_index_after(int64_t pos) +{ + for (ObLibTreeNodeVector::iterator iter = children_->begin() + pos; + iter != children_->end(); ++iter) { + ObLibTreeNodeBase* node = *iter; + node->increase_index(); + } +} + +void ObLibContainerNode::decrease_index_after(int64_t pos) +{ + for (ObLibTreeNodeVector::iterator iter = children_->begin() + pos; + iter != children_->end(); ++iter) { + ObLibTreeNodeBase* node = *iter; + node->decrease_index(); + } +} + +int ObLibContainerNode::insert(int64_t pos, ObLibTreeNodeBase* node) +{ + INIT_SUCC(ret); + if (OB_ISNULL(node)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("new node is null", K(ret)); + } else { + node->set_parent(this); + int64_t count = size(); + pos = pos <= 0 ? 0 : pos; + if (is_leaf_node()) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("fail to insert child on leaf node", K(ret), K(flags_), K(type_)); + } else if (pos >= count) { + if (OB_FAIL(append(node))) { + LOG_WARN("fail to append.", K(ret), K(count), K(pos)); + } + } else if (OB_FAIL(insert_into_sequent_container(pos, node))) { + LOG_WARN("fail to insert new node into sequent container.", K(ret), K(count), K(pos)); + } else if (OB_FAIL(append_into_sorted_container(node))) { + LOG_WARN("fail to insert new node into sorted container.", K(ret), K(count), K(pos)); + } + } + return ret; +} + +bool ObLibContainerNode::check_container_valid() +{ + bool bool_ret = true; + if (HAS_CONTAINER_MEMBER(this)) { + } else if (is_using_child_buffer()) { + } else { + bool_ret = children_->size() == sorted_children_->size(); + } + return bool_ret; +} + +int ObLibContainerNode::remove(ObLibTreeNodeBase* node) +{ + INIT_SUCC(ret); + if (OB_ISNULL(node)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("node input is null", K(ret)); + } else if (has_sequent_member() && OB_FAIL(remove_from_sequent_container(node->get_index()))) { + LOG_WARN("fail to remove from sequent", K(ret), K(node->get_index())); + } else if (OB_FAIL(remove_from_sorted_container(node))) { + LOG_WARN("fail to remove from sorted children", K(ret), K(count()), K(pos_)); + } else if (!check_container_valid()) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("fail to remove node, as too array number not consistent", K(ret)); + } + + return ret; +} + +int ObLibContainerNode::remove(int64_t pos) +{ + INIT_SUCC(ret); + size_t count = size(); + if (is_leaf_node()) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("fail to append child on leaf node", K(ret), K(flags_), K(type_)); + } else if (pos < 0 || pos >= count) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("fail to remove, index out of range", K(ret), K(count), K(pos)); + } else if (HAS_CONTAINER_MEMBER(this)) { + if (is_using_child_buffer()) { + child_[0] = nullptr; + } else { + ObLibTreeNodeBase* cur = children_->at(pos); + if (OB_FAIL(children_->remove(pos))) { + LOG_WARN("fail to remove child node", K(ret), K(count)); + } else if (OB_FAIL(remove_from_sorted_container(cur))) { + LOG_WARN("fail to remove from sorted children", K(ret), K(count), K(pos)); + } else if (!check_container_valid()) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("fail to remove node, as too array number not consistent", K(ret)); + } else { + decrease_index_after(pos); + } + } + } else if (has_sequent_member()) { + if (OB_FAIL(remove_from_sequent_container(pos))) { + LOG_WARN("fail to remove from sequent", K(ret), K(pos)); + } + } else if (has_sorted_member()) { + remove_from_sorted_container(pos); + } + + return ret; +} + + +int ObLibContainerNode::remove_from_sequent_container(int64_t pos) +{ + INIT_SUCC(ret); + if (!has_sequent_member()) { + } else if (is_using_child_buffer()) { + child_[0] = nullptr; + } else if (OB_FAIL(children_->remove(pos))) { + LOG_WARN("fail to remove child from sequent", K(ret), K(pos)); + } else { + decrease_index_after(pos); + } + return ret; +} + +int ObLibContainerNode::remove_from_sorted_container(ObLibTreeNodeBase* node) +{ + INIT_SUCC(ret); + if (OB_ISNULL(node)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("node input is null", K(ret)); + } else if (has_sorted_member()) { + if (is_using_child_buffer()) { + if (OB_ISNULL(child_[0])) { + } else if (child_[0] == node) { + child_[0] = nullptr; + } else { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("fail to remvoe node from container, as old node not exist", K(ret), K(size())); + } + } else { + int64_t pos = get_member_index(*sorted_children_, node); + if (pos != common::OB_INVALID_ID) { + sorted_children_->remove(pos); + } else { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("fail to remvoe node from container, as old node not exist", K(ret), K(size())); + } + } + } + + return ret; +} + +void ObLibContainerNode::remove_from_sorted_container(int64_t pos) +{ + if (!has_sorted_member()) { + } else if (is_using_child_buffer()) { + child_[0] = nullptr; + } else { + sorted_children_->remove(pos); + } +} + +int ObLibContainerNode::append_into_sequent_container(ObLibTreeNodeBase* node) +{ + INIT_SUCC(ret); + if (OB_ISNULL(node)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("node input is null", K(ret)); + } else if (!has_sequent_member()) { + } else if (is_using_child_buffer()) { + if (OB_ISNULL(child_[0])) { + child_[0] = static_cast(node); + node->set_index(0); + } else if (OB_FAIL(extend())) { + LOG_WARN("failed to extend", K(ret)); + } else { + ret = append_into_sequent_container(node); + } + } else { + if (OB_FAIL(children_->push_back(node))) { + LOG_WARN("fail to store new node in order array", K(ret), K(children_->size())); + } else { + node->set_index(children_->size() - 1); + } + } + + return ret; +} + +int ObLibContainerNode::insert_into_sequent_container(int64_t pos, ObLibTreeNodeBase* node) +{ + INIT_SUCC(ret); + if (!has_sequent_member()) { + } else if (is_using_child_buffer()) { + if (child_[0] == node) { + } else if (OB_ISNULL(child_[0])) { + child_[0] = static_cast(node); + node->set_index(0); + } else if (OB_FAIL(extend())) { + LOG_WARN("failed to extend", K(ret)); + } else { + node->set_index(0); + ret = insert_into_sequent_container(pos, node); + } + } else { + ObLibTreeNodeVector::iterator iter = pos > children_->size() ? + children_->end() : children_->begin() + pos; + if (OB_FAIL(children_->insert(iter, node))) { + LOG_WARN("fail to insert new node in order array", K(ret), K(children_->size())); + } else { + node->set_index(pos); + increase_index_after(pos+1); + } + } + + return ret; +} + +int ObLibContainerNode::append_into_sorted_container(ObLibTreeNodeBase* node) +{ + INIT_SUCC(ret); + if (!has_sorted_member()) { + } else if (is_using_child_buffer()) { + if (child_[0] == node) { + } else if (OB_ISNULL(child_[0])) { + child_[0] = static_cast(node); + node->set_index(0); + } else if (OB_FAIL(extend())) { + LOG_WARN("failed to extend", K(ret)); + } else { + ret = append_into_sorted_container(node); + } + } else if (OB_FAIL(sorted_children_->push_back(node))) { + LOG_WARN("fail to append into sorted children", K(ret), K(sorted_children_->size())); + } else { + sort(); + } + return ret; +} + +int ObLibContainerNode::update(int64_t pos, ObLibTreeNodeBase* new_node) +{ + INIT_SUCC(ret); + if (OB_ISNULL(new_node)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("new node is null", K(ret)); + } else { + size_t count = size(); + new_node->set_parent(this); + new_node->set_index(pos); + + if (is_leaf_node()) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("fail to append child on leaf node", K(ret), K(flags_), K(type_)); + } else if (pos < 0 || pos >= count) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("fail to remove, index out of range", K(ret), K(count), K(pos)); + } else if (has_sequent_member()) { + if (is_using_child_buffer()) { + child_[0] = static_cast(new_node); + new_node->set_index(0); + } else { + ObLibTreeNodeBase* old_node = children_->at(pos); + ObLibTreeNodeVector::iterator iter = children_->begin() + pos; + ObLibTreeNodeBase* tmp_node = nullptr; + if (OB_FAIL(children_->replace(iter, new_node, tmp_node))) { + LOG_WARN("fail to replace child node", K(ret), K(count), K(pos)); + } else { + if (OB_FAIL(remove_from_sorted_container(old_node))) { + LOG_WARN("fail to remove from sorted children", K(ret), K(count), K(pos)); + } else if (OB_FAIL(append_into_sorted_container(new_node))) { + LOG_WARN("fail to append into sorted children", K(ret), K(count), K(pos)); + } + } + } + } else { + if (is_using_child_buffer()) { + child_[0] = static_cast(new_node); + new_node->set_index(0); + } else { + remove_from_sorted_container(pos); + if (OB_FAIL(append_into_sorted_container(new_node))) { + LOG_WARN("fail to append into sorted children", K(ret), K(count), K(pos)); + } + } + } + } + + return ret; +} + +int ObLibContainerNode::update(ObLibTreeNodeBase* old_node, ObLibTreeNodeBase* new_node) +{ + INIT_SUCC(ret); + if (OB_ISNULL(old_node)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("new node is null", K(ret)); + } else { + + new_node->set_parent(this); + if (is_leaf_node()) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("fail to update child on leaf node", K(ret), K(flags_), K(type_)); + } else if (has_sequent_member()) { + if (is_using_child_buffer()) { + if (child_[0] == old_node) { + child_[0] = static_cast(new_node); + new_node->set_index(0); + } else { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("fail to update child as old node not exist", K(ret), K(flags_)); + } + } else if (OB_FAIL(update(old_node->get_index(), new_node))) { + LOG_WARN("fail to update node", K(ret), K(flags_), K(type_), K(old_node->get_index())); + } + } else if (OB_FAIL(remove_from_sorted_container(old_node))) { + LOG_WARN("fail to remove from sorted children", K(ret), K(count()), K(old_node->get_index())); + } else if (OB_FAIL(append_into_sorted_container(new_node))) { + LOG_WARN("fail to append into sorted children", K(ret)); + } + } + + return ret; +} + +int ObLibContainerNode::extend() +{ + INIT_SUCC(ret); + if (OB_ISNULL(ctx_) || OB_ISNULL(ctx_->allocator_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("ctx is null", K(ret), K(ctx_)); + } else { + ObLibContainerNode* tmp = child_[0]; + if (has_sorted_member()) { + sorted_children_ = static_cast(ctx_->allocator_->alloc(sizeof(ObLibTreeNodeVector))); + if (OB_ISNULL(sorted_children_)) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("allocate array failed", K(ret), K(ctx_)); + } else { + new (sorted_children_) ObLibTreeNodeVector(&ctx_->mode_arena_, common::ObModIds::OB_MODULE_PAGE_ALLOCATOR); + } + } + + if (OB_SUCC(ret) && has_sequent_member()) { + children_ = static_cast(ctx_->allocator_->alloc(sizeof(ObLibTreeNodeVector))); + if (OB_ISNULL(children_)) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("allocate array failed", K(ret), K(ctx_)); + } else { + new (children_) ObLibTreeNodeVector(&ctx_->mode_arena_, common::ObModIds::OB_MODULE_PAGE_ALLOCATOR); + } + } + + if (OB_SUCC(ret)) { + flags_ &= (~MEMBER_USING_BUFFER); + } + + if (OB_FAIL(ret)) { + } else if (OB_FAIL(append_into_sequent_container(tmp))) { + LOG_WARN("failed to add sequent array", K(ret), K(ctx_)); + } else if (OB_FAIL(append_into_sorted_container(tmp))) { + LOG_WARN("failed to add sorted array", K(ret), K(ctx_)); + } + + } + + return ret; +} + +} +} diff --git a/deps/oblib/src/lib/xml/ob_tree_base.h b/deps/oblib/src/lib/xml/ob_tree_base.h new file mode 100644 index 0000000000..bce85e37e6 --- /dev/null +++ b/deps/oblib/src/lib/xml/ob_tree_base.h @@ -0,0 +1,316 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + * This file contains interface define for the tree base abstraction. + */ + +#ifndef OCEANBASE_SQL_OB_TREE_BASE +#define OCEANBASE_SQL_OB_TREE_BASE + +#include "lib/allocator/ob_mod_define.h" +#include "lib/allocator/page_arena.h" +#include "lib/container/ob_array.h" +#include "lib/container/ob_vector.h" +#include "lib/number/ob_number_v2.h" // for number::ObNumber +#include "lib/xml/ob_multi_mode_interface.h" + + +namespace oceanbase { +namespace common { +class ObLibTreeNodeBase; +class ObIMulModeBase; + +const int64_t DEFAULT_PAGE_SIZE = 4096L; // 4kb + +static const int32_t MEMBER_SCALAR_FLAG = 0x1; +static const int32_t MEMBER_SEQUENT_FLAG = 0x2; +static const int32_t MEMBER_IN_SET_FLAG = 0x4; +static const int32_t MEMBER_USING_BUFFER = 0x8; +static const int32_t MEMBER_LAZY_SORTED = 0x10; + +typedef std::pair IndexRange; +#define HAS_CONTAINER_MEMBER(node) ((node)->has_sequent_member() && (node)->has_sorted_member()) + +#pragma pack(4) +class ObLibTreeNodeBase { +public: + ObLibTreeNodeBase(int32_t flags, ObNodeDataType type) + : type_(type), + flags_(flags), + pos_(-1), + parent_(nullptr) + {} + + virtual ~ObLibTreeNodeBase() {} + // tree node interface + ObLibTreeNodeBase* get_parent() { return parent_; } + void set_parent(ObLibTreeNodeBase* parent) { parent_ = parent; } + void set_index(int64_t pos) { pos_ = static_cast(pos); } + void increase_index() { pos_++; } + void decrease_index() { pos_--; } + virtual void set_flags(uint32_t flags) { flags_ |= flags; } + + // same meaning + virtual int64_t size() const { return 1; } + virtual int64_t count() const { return 1; } + + bool is_leaf_node() const { return static_cast(MEMBER_SCALAR_FLAG & flags_); } + bool is_lazy_sort() const { return static_cast(MEMBER_LAZY_SORTED & flags_); } + bool has_sorted_member() const { return static_cast(MEMBER_IN_SET_FLAG & flags_); } + bool has_sequent_member() const { return static_cast(MEMBER_SEQUENT_FLAG & flags_); } + bool is_using_child_buffer() const { return static_cast(MEMBER_USING_BUFFER & flags_); } + + int get_key(ObString& key); + + int insert_prev(ObLibTreeNodeBase* new_node); + int insert_after(ObLibTreeNodeBase* new_node); + + // 返回节点具体类型 + // 例如:json返回jsonInt,jsonDouble + // xml 返回xmlElment, XmlAttribute + virtual int node_type() { return type_; } + // 数据修改接口, 修改的是孩子 + virtual int append(ObLibTreeNodeBase* node) = 0; + virtual int insert(int64_t pos, ObLibTreeNodeBase* node) = 0; + virtual int remove(int64_t pos) = 0; + virtual int remove(ObLibTreeNodeBase* node) = 0; + virtual int update(int64_t pos, ObLibTreeNodeBase* new_node) = 0; + virtual int update(ObLibTreeNodeBase* old_node, ObLibTreeNodeBase* new_node) = 0; + int32_t get_flags() const { return flags_; } + int64_t get_index() const { return pos_; } + int insert_slibing(ObLibTreeNodeBase* new_node, int64_t relative_index); + + int64_t to_string(char *buf, const int64_t buf_len) const { + int64_t pos = 0; + databuff_printf(buf, buf_len, pos, "type = %d, flags_=%d, pos_=%d", type_, flags_, pos_); + return pos; + } +protected: + ObNodeDataType type_; + int32_t flags_; + int32_t pos_; + /* 父节点,公共 */ + ObLibTreeNodeBase* parent_; +}; + +#pragma pack() + +// need order and set both +// orginal document order and sorted order for query quickly +class ObLibContainerNode : public ObLibTreeNodeBase { +public: + + ObLibContainerNode(ObNodeDataType type, ObMulModeMemCtx *ctx) + : ObLibTreeNodeBase(MEMBER_SEQUENT_FLAG | MEMBER_IN_SET_FLAG, type), + ctx_(ctx), + children_(nullptr), + sorted_children_(nullptr) + { + flags_ |= MEMBER_USING_BUFFER; + } + + ObLibContainerNode(ObNodeDataType type, int32_t flags, ObMulModeMemCtx *ctx) + : ObLibTreeNodeBase(flags, type), + ctx_(ctx), + children_(nullptr), + sorted_children_(nullptr) + { + if (flags & MEMBER_SEQUENT_FLAG) { + flags_ |= MEMBER_USING_BUFFER; + } + + if (flags & MEMBER_IN_SET_FLAG) { + flags_ |= MEMBER_USING_BUFFER; + } + } + + ObLibContainerNode(ObNodeDataType type) + : ObLibTreeNodeBase(MEMBER_SCALAR_FLAG, type), + ctx_(nullptr), + children_(nullptr), + sorted_children_(nullptr) + { + } + + ~ObLibContainerNode() {} + + virtual int64_t size() const; + virtual int64_t count() const; + + // 数据修改接口, 修改的是孩子 + int append(ObLibTreeNodeBase* node) override; + int insert(int64_t pos, ObLibTreeNodeBase* node) override; + int remove(int64_t pos) override; + int remove(ObLibTreeNodeBase* node) override; + int update(int64_t pos, ObLibTreeNodeBase* new_node) override; + int update(ObLibTreeNodeBase* old_node, ObLibTreeNodeBase* new_node) override; + int get_range(int64_t start, int64_t end, ObIArray& res); + int get_children(ObIArray& res); + int get_children(const ObString& key, ObIArray& res); + virtual void set_flags(uint32_t flags) { ObLibTreeNodeBase::set_flags(flags); } + virtual void del_flags(uint32_t flags) { flags_ &= (~flags); } + ObLibTreeNodeBase* member(size_t pos); + ObMulModeMemCtx* get_mem_ctx() { return ctx_; } + int alter_member_sort_policy(bool enable); + + class iterator { + public: + friend class ObLibContainerNode; + friend class tree_iterator; + iterator() + : is_eval_current_(false), + cur_pos_(-1), + total_(-1), + vector_(nullptr), + cur_node_(nullptr) + {} + // construct + iterator(const iterator& iter) + : is_eval_current_(iter.is_eval_current_), + cur_pos_(iter.cur_pos_), + total_(iter.total_), + vector_(iter.vector_), + cur_node_(iter.cur_node_) + {} + + // construct + iterator(ObLibContainerNode* node, bool is_eval_current) + { + cur_node_ = node; + is_eval_current_ = is_eval_current; + if (node->is_leaf_node()) { + cur_pos_ = 0; + total_ = 1; + vector_ = nullptr; + } else if (node->is_using_child_buffer()) { + cur_pos_ = 0; + total_ = node->child_[0] == nullptr ? 0 : 1; + vector_ = nullptr; + } else { + ObLibTreeNodeVector* data_vector = nullptr; + if (node->has_sequent_member()) { + data_vector = node->children_; + } else { + data_vector = node->sorted_children_; + } + cur_pos_ = 0; + total_ = data_vector->size(); + vector_ = data_vector; + } + } + + iterator(ObLibContainerNode* node) + : iterator(node, false) {} + + ObLibContainerNode* current(); + ObLibContainerNode* operator*(); + ObLibContainerNode* operator[](int64_t pos); + + bool end(); + iterator next(); + iterator operator++(); + iterator operator--(); + iterator operator++(int); + iterator operator--(int); + bool operator<(const iterator& iter); + bool operator>(const iterator& iter); + iterator operator-(int size); + iterator operator+(int size); + iterator operator+=(int size); + iterator operator-=(int size); + int64_t operator-(const iterator& iter); + bool operator==(const iterator& rhs); + bool operator!=(const iterator& rhs); + bool operator<=(const iterator& rhs); + void set_range(int64_t start, int64_t finish); + + int64_t to_string(char *buf, const int64_t buf_len) const { + int64_t pos = 0; + databuff_printf(buf, buf_len, pos, "cur_pos = %ld, total_=%ld", cur_pos_, total_); + return pos; + } + private: + bool is_eval_current() { return is_eval_current_; } + void set_eval_current() { is_eval_current_ = true; } + + private: + bool is_eval_current_; + int64_t cur_pos_; + int64_t total_; + ObLibTreeNodeVector* vector_; + ObLibContainerNode* cur_node_; + }; + + iterator begin(); + iterator end(); + iterator sorted_begin(); + iterator sorted_end(); + + typedef std::pair IterRange; + int get_children(const ObString& key, IterRange& range); + +protected: + class tree_iterator { + friend class ObLibContainerNode; + public: + tree_iterator(ObLibContainerNode* root, ObIAllocator* allocator) + : type_(POST_ORDER), + root_(root), + stack_(allocator) {} + + tree_iterator(ObLibContainerNode* root, scan_type type, ObIAllocator* allocator) + : type_(type), + root_(root), + stack_(allocator) {} + + int start(); + int next(ObLibContainerNode*& res); + + private: + scan_type type_; + ObLibContainerNode* root_; + ObStack stack_; + }; + IndexRange get_effective_range(int64_t start, int64_t end); + +protected: + int64_t get_member_index(ObLibTreeNodeVector& container, ObLibTreeNodeBase* node); + int insert_slibing(ObLibTreeNodeBase* new_node, int is_after_cur); + void increase_index_after(int64_t pos); + void decrease_index_after(int64_t pos); + void sort(); + void do_sort(); + int remove_from_sequent_container(int64_t pos); + void remove_from_sequent_container(ObLibTreeNodeBase* node); + void remove_from_sorted_container(int64_t pos); + int remove_from_sorted_container(ObLibTreeNodeBase* node); + int append_into_sorted_container(ObLibTreeNodeBase* node); + bool check_container_valid(); + int append_into_sequent_container(ObLibTreeNodeBase* node); + int insert_into_sequent_container(int64_t pos, ObLibTreeNodeBase* node); + int extend(); + +protected: + ObMulModeMemCtx* ctx_; + union { + ObLibContainerNode* child_[2]; + struct { + ObLibTreeNodeVector *children_; // document order array + ObLibTreeNodeVector *sorted_children_; // sorted array + }; + }; +}; + +typedef ObLibContainerNode::IterRange IterRange; + +} +} + +#endif // OCEANBASE_SQL_OB_TREE_BASE \ No newline at end of file diff --git a/deps/oblib/src/lib/xml/ob_xml.cpp b/deps/oblib/src/lib/xml/ob_xml.cpp new file mode 100644 index 0000000000..a9e005bf61 --- /dev/null +++ b/deps/oblib/src/lib/xml/ob_xml.cpp @@ -0,0 +1,173 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ +#ifdef OB_BUILD_ORACLE_PL +#define USING_LOG_PREFIX LIB_XML2 + +#include "lib/xml/ob_xml.h" +#include "lib/ob_errno.h" +#include "lib/utility/ob_macro_utils.h" +#include "lib/oblog/ob_log.h" +#include "lib/oblog/ob_log_module.h" +#include "lib/ob_define.h" +#include "common/object/ob_object.h" +#include "libxslt/transform.h" +#include "libxslt/xsltutils.h" + +namespace oceanbase +{ +namespace common +{ + +int ObXml::parse_xml(const ObString &input) { + int ret = OB_SUCCESS; + xmlDocPtr xml_doc_ptr = NULL; + if (OB_FAIL(parse_str_to_xml(input, xml_doc_ptr))) { + COMMON_LOG(WARN, "parse xml failed"); + } + xmlFreeDoc(xml_doc_ptr); + return ret; +} + +int ObXml::xslt_transform(ObIAllocator *allocator, + const ObString &input, + const ObString &xslt_sheet, + const ObIArray ¶ms, + ObString &output) +{ + int ret = OB_SUCCESS; + lib::ObMallocHookAttrGuard malloc_guard(lib::ObMemAttr(common::OB_SERVER_TENANT_ID, "XSLTCache")); + if (xslt_sheet.empty()) { + ret = OB_INVALID_ARGUMENT; + COMMON_LOG(WARN, "xsl sheet is empty", K(xslt_sheet), K(ret)); + } else if (input.empty()) { + ret = OB_INVALID_ARGUMENT; + COMMON_LOG(WARN, "input xml is empty", K(input), K(ret)); + } else { + xmlDocPtr input_xml_ptr = NULL; + xmlDocPtr output_xml_ptr = NULL; + xmlDocPtr sheet_xml_ptr = NULL; + xsltStylesheetPtr style_sheet = NULL; + if (OB_FAIL(parse_str_to_xml(input, input_xml_ptr))) { + COMMON_LOG(WARN, "parse_str_to_xml failed", K(input), K(input_xml_ptr), K(ret)); + } else if (OB_FAIL(parse_str_to_xml(xslt_sheet, sheet_xml_ptr))){ + COMMON_LOG(WARN, "parse_str_to_xml failed", K(xslt_sheet), K(sheet_xml_ptr), K(ret)); + } else if (OB_FAIL(get_style_sheet(sheet_xml_ptr, style_sheet))) { + COMMON_LOG(WARN, "get_style_sheet failed", K(ret)); + } else if (OB_FAIL(xslt_apply_style_sheet(input_xml_ptr, output_xml_ptr, style_sheet, params))) { + COMMON_LOG(WARN, "xslt_apply_style_sheet failed", K(ret)); + } else if (OB_FAIL(xslt_save_to_string(allocator, output_xml_ptr, style_sheet, output))) { + COMMON_LOG(WARN, "xslt_save_to_string failed"); + } + xmlFreeDoc(output_xml_ptr); + xsltFreeStylesheet(style_sheet); + xmlFreeDoc(input_xml_ptr); + // If style_sheet is empty, then sheet_xml_ptr has no xlst style, + // and the memory of sheet_xml_ptr should be released; + if (OB_ISNULL(style_sheet)) { + xmlFreeDoc(sheet_xml_ptr); + } + } + return ret; +} + +int ObXml::parse_str_to_xml(const ObString &input, xmlDocPtr &xml_ptr) +{ + int ret = OB_SUCCESS; + + if (input.empty()) { + ret = OB_INVALID_ARGUMENT; + COMMON_LOG(WARN, "xml string is empty", K(input), K(ret)); + } else { + xml_ptr = xmlReadMemory(input.ptr(), input.length(), NULL, "utf-8", XML_PARSE_PEDANTIC); + xmlErrorPtr xml_err = xmlGetLastError(); + if (OB_NOT_NULL(xml_err)) { + ret = OB_ERR_PARSER_SYNTAX; + COMMON_LOG(WARN, "parse xml failed", KCSTRING(xml_err->message), K(input), K(ret)); + ObLibXml2SaxHandler::reset_libxml_last_error(); + } else if (OB_ISNULL(xml_ptr)) { + ret = OB_ERR_PARSER_SYNTAX; + COMMON_LOG(WARN, "parse xml failed", K(input), K(ret)); + } + } + + return ret; +} + +int ObXml::xslt_apply_style_sheet(const xmlDocPtr input_xml_ptr, + xmlDocPtr &output_xml_ptr, + const xsltStylesheetPtr xslt_ptr, + const ObIArray ¶ms) +{ + int ret = OB_SUCCESS; + if (OB_UNLIKELY(0 != params.count())) { + ret = OB_NOT_SUPPORTED; + COMMON_LOG(WARN, "params count is not zero", K(ret)); + } + if (OB_SUCC(ret)) { + output_xml_ptr = xsltApplyStylesheet(xslt_ptr, input_xml_ptr, NULL); + xmlErrorPtr xml_err = xmlGetLastError(); + if (OB_NOT_NULL(xml_err)) { + ret = OB_ERR_PARSER_SYNTAX; + COMMON_LOG(WARN, "xsltApplyStylesheet failed", KCSTRING(xml_err->message), K(ret)); + ObLibXml2SaxHandler::reset_libxml_last_error(); + } else if (OB_ISNULL(output_xml_ptr)) { + ret = OB_ERR_UNEXPECTED; + COMMON_LOG(WARN, "xslt xslt_apply_style_sheet failed", K(ret)); + } + } + return ret; +} + +int ObXml::xslt_save_to_string(ObIAllocator *allocator, + const xmlDocPtr result, + const xsltStylesheetPtr xslt_ptr, + ObString &output) +{ + int ret = OB_SUCCESS; + xmlChar *xml_result_buffer = NULL; + int32_t xml_result_length = 0; + int32_t res = xsltSaveResultToString(&xml_result_buffer, &xml_result_length, result, xslt_ptr); + if (-1 == res) { + xmlFree(xml_result_buffer); + ret = OB_ERR_UNEXPECTED; + COMMON_LOG(WARN, "xslt save to string failed", K(ret)); + } else { + char *result_buf = NULL; + if (OB_ISNULL(result_buf = static_cast(allocator->alloc(xml_result_length + 1)))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + COMMON_LOG(WARN, "alloc memory failed", K(ret)); + } else { + MEMCPY(result_buf, xml_result_buffer, xml_result_length); + result_buf[xml_result_length] = '\0'; + output.assign_buffer(result_buf, xml_result_length + 1); + output.set_length(xml_result_length); + } + } + xmlFree(xml_result_buffer); + return ret; +} + +int ObXml::get_style_sheet(const xmlDocPtr &sheet_xml_ptr, xsltStylesheetPtr &xslt_ptr) +{ + int ret = OB_SUCCESS; + xsltFreeStylesheet(xslt_ptr); + xslt_ptr = xsltParseStylesheetDoc(sheet_xml_ptr); + if (OB_ISNULL(xslt_ptr)) { + ret = OB_ERR_XSLT_PARSE; + COMMON_LOG(WARN, "document is not a stylesheet", K(ret)); + } + return ret; +} + +} // end namespace common +} // end namespace oceanbase +#endif \ No newline at end of file diff --git a/deps/oblib/src/lib/xml/ob_xml.h b/deps/oblib/src/lib/xml/ob_xml.h new file mode 100644 index 0000000000..02cdb3625f --- /dev/null +++ b/deps/oblib/src/lib/xml/ob_xml.h @@ -0,0 +1,57 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ +#ifdef OB_BUILD_ORACLE_PL +#ifndef OB_XML_H_ +#define OB_XML_H_ + +#include "libxml2/libxml/parser.h" +#include "libxslt/transform.h" +#include "libxslt/xsltInternals.h" +#include "lib/string/ob_string.h" +#include "lib/container/ob_iarray.h" +#include "lib/xml/ob_libxml2_sax_handler.h" + +namespace oceanbase +{ +namespace common +{ + +class ObXml +{ +public: + static int parse_xml(const ObString &input); + + static int xslt_transform(ObIAllocator *allocator, + const ObString &input, + const ObString &xslt_sheet, + const ObIArray ¶ms, + ObString &output); + +private: + static int parse_str_to_xml(const ObString &input, + xmlDocPtr &xml_ptr); + static int xslt_apply_style_sheet(const xmlDocPtr input_xml_ptr, + xmlDocPtr &output_xml_ptr, + const xsltStylesheetPtr xslt_ptr, + const ObIArray ¶ms); + static int xslt_save_to_string(ObIAllocator *allocator, + const xmlDocPtr reuslt, + const xsltStylesheetPtr xslt_ptr, + ObString &output); + static int get_style_sheet(const xmlDocPtr &sheet_xml_ptr, xsltStylesheetPtr &xslt_ptr); +}; + +} // end namespace common +} // end namespace oceanbase + +#endif // OB_XML_H_ +#endif \ No newline at end of file diff --git a/deps/oblib/src/lib/xml/ob_xml_bin.cpp b/deps/oblib/src/lib/xml/ob_xml_bin.cpp new file mode 100644 index 0000000000..6c2769baca --- /dev/null +++ b/deps/oblib/src/lib/xml/ob_xml_bin.cpp @@ -0,0 +1,3434 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + * This file contains implement for the xml bin abstraction. + */ +#define USING_LOG_PREFIX SQL_ENG +#include "lib/ob_errno.h" +#include "lib/utility/ob_macro_utils.h" +#include "lib/oblog/ob_log.h" +#include "lib/oblog/ob_log_module.h" +#include "lib/xml/ob_xml_parser.h" +#include "lib/xml/ob_xml_bin.h" +#include "lib/xml/ob_xml_tree.h" +#include "lib/xml/ob_xml_util.h" + +namespace oceanbase { +namespace common { + +uint32_t ObXmlElementBinHeader::header_size() +{ + uint32_t len = sizeof(uint8_t); + + if (is_prefix_) { + len += prefix_len_size_ + prefix_len_; + } + + return len; +} + +int ObXmlElementBinHeader::serialize(ObStringBuffer& buffer) +{ + INIT_SUCC(ret); + uint32_t header_len = header_size(); + if (OB_FAIL(buffer.reserve(header_len))) { + LOG_WARN("failed to reserve header", K(ret)); + } else { + /** + * | flag | prefix | standalone | + */ + char* data = buffer.ptr(); + int64_t pos = buffer.length(); + + *reinterpret_cast(data + pos) = flags_; + pos += sizeof(uint8_t); + buffer.set_length(pos); + + uint32_t left = header_len - sizeof(uint8_t); + if (is_prefix_) { + if (OB_FAIL(serialization::encode_vi64(data, pos + left, pos, prefix_len_))) { + LOG_WARN("failed to serialize for str xml obj", K(ret), K(prefix_len_size_)); + } else if (OB_FAIL(buffer.set_length(pos))) { + LOG_WARN("failed to update len for str obj", K(ret), K(pos)); + } else if (OB_FAIL(buffer.append(prefix_.ptr(), prefix_len_))) { + LOG_WARN("failed to append string obj value", K(ret)); + } else { + pos += prefix_len_; + buffer.set_length(pos); + } + } + } + + return ret; +} + +int ObXmlElementBinHeader::deserialize(const char* data, int64_t length) +{ + INIT_SUCC(ret); + + if (length < sizeof(uint8_t) || OB_ISNULL(data)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("failed to deserialize element header.", K(ret), K(length)); + } else { + flags_ = *reinterpret_cast(data); + int64_t pos = sizeof(uint8_t); + + if (is_prefix_) { + int64_t val = 0; + + if (OB_FAIL(serialization::decode_vi64(data, length, pos, &val))) { + LOG_WARN("failed to deserialize element header.", K(ret), K(length)); + } else if (length < pos + val) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("failed to deserialize element header.", K(ret), K(length), K(pos), K(val)); + } else if (val == 0) { + prefix_len_size_ = val; + prefix_len_ = val; + } else { + prefix_.assign_ptr(data + pos, val); + prefix_len_ = val; + prefix_len_size_ = pos - sizeof(uint8_t); + pos += prefix_len_; + } + } + } + + return ret; +} + +uint32_t ObXmlAttrBinHeader::header_size() +{ + return is_prefix_ ? + sizeof(int8_t) + prefix_len_size_ + prefix_len_ + sizeof(int8_t) + : sizeof(int8_t) + sizeof(int8_t); +} + +int ObXmlAttrBinHeader::serialize(ObStringBuffer* buffer) +{ + INIT_SUCC(ret); + uint32_t header_len = header_size(); + if (OB_FAIL(buffer->reserve(header_len))) { + LOG_WARN("failed to reserve header", K(ret)); + } else { + /** + * | type_ | prefix_ | + */ + char* data = buffer->ptr(); + int64_t pos = buffer->length(); + + data[pos++] = type_; + + *reinterpret_cast(data + pos) = flags_; + pos += sizeof(int8_t); + + uint32_t left = header_len - sizeof(int8_t); + if (is_prefix_) { + if (OB_FAIL(serialization::encode_vi64(data, pos + left, pos, prefix_len_))) { + LOG_WARN("failed to serialize for str xml obj", K(ret), K(prefix_len_size_)); + } else if (OB_FAIL(buffer->set_length(pos))) { + LOG_WARN("failed to update len for str obj", K(ret), K(pos)); + } else if (OB_FAIL(buffer->append(prefix_.ptr(), prefix_len_))) { + LOG_WARN("failed to append string obj value", K(ret)); + } + } else { + buffer->set_length(pos); + } + } + + return ret; +} + +int ObXmlAttrBinHeader::deserialize(const char* data, int64_t length) +{ + INIT_SUCC(ret); + + if (length <= sizeof(int8_t) || OB_ISNULL(data)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("failed to deserialize attibute header.", K(ret), K(length)); + } else { + int64_t pos = 0; + type_ = static_cast(data[pos++]); + flags_ = static_cast(data[pos++]); + if (is_prefix_) { + int64_t val = 0; + + if (OB_FAIL(serialization::decode_vi64(data, length, pos, &val))) { + LOG_WARN("failed to deserialize attibute header.", K(ret), K(length)); + } else if (length < pos + val) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("failed to deserialize attibute header.", K(ret), K(length), K(pos), K(val)); + } else if (val == 0) { + prefix_len_size_ = val; + } else { + prefix_.assign_ptr(data + pos, val); + prefix_len_ = val; + prefix_len_size_ = pos - (sizeof(uint8_t) * 2); + } + } + } + + return ret; +} + +uint64_t ObXmlDocBinHeader::header_size() +{ + uint32_t len = version_len_ + encode_len_ + + is_version_ + is_encoding_ + + is_standalone_ + sizeof(uint16_t) + + elem_header_.header_size(); + return len; +} + +int ObXmlDocBinHeader::serialize(ObStringBuffer& buffer) +{ + INIT_SUCC(ret); + + uint64_t header_len = header_size(); + if (OB_FAIL(buffer.reserve(header_len))) { + LOG_WARN("failed to reserve document header.", K(ret), K(header_len), K(buffer.length())); + } else { + /** + * | flag | version | encoding | standalone | element_header | + */ + + char* data = buffer.ptr(); + uint64_t pos = buffer.length(); + + *reinterpret_cast(data + pos) = flags_; + pos += sizeof(uint16_t); + buffer.set_length(pos); + + + if (is_version_) { + data[pos++] = version_len_; + MEMCPY(data + pos, version_.ptr(), version_len_); + pos += version_len_; + buffer.set_length(pos); + } + + if (is_encoding_) { + data[pos++] = encode_len_; + MEMCPY(data + pos, encoding_.ptr(), encode_len_); + pos += encode_len_; + buffer.set_length(pos); + } + + if (is_standalone_) { + data[pos++] = static_cast(standalone_); + buffer.set_length(pos); + } + + if (OB_FAIL(elem_header_.serialize(buffer))) { + LOG_WARN("failed to serialize element header.", K(ret), K(header_len), K(buffer.length())); + } + } + + return ret; +} + +int ObXmlDocBinHeader::deserialize(const char* data, int64_t length) +{ + INIT_SUCC(ret); + + if (length < sizeof(uint16_t) || OB_ISNULL(data)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("failed to deserialize element header.", K(ret), K(length)); + } else { + flags_ = *reinterpret_cast(data); + int32_t pos = sizeof(uint16_t); + + + if (is_version_) { + if (length - pos < 1) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("failed to deserialize version header.", K(ret), K(length), K(pos)); + } else { + version_len_ = data[pos++]; + if (length - pos < version_len_) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("failed to deserialize version header.", K(ret), K(length), K(pos), K(version_len_)); + } else { + version_.assign_ptr(data + pos, version_len_); + pos += version_len_; + } + } + } + + if (OB_SUCC(ret) && is_encoding_) { + if (length - pos < 1) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("failed to deserialize encoding header.", K(ret), K(length), K(pos)); + } else { + encode_len_ = data[pos++]; + if (length - pos < encode_len_) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("failed to deserialize encoding header.", K(ret), K(length), K(pos), K(encode_len_)); + } else { + encoding_.assign_ptr(data + pos, encode_len_); + pos += encode_len_; + } + } + } + + if (OB_SUCC(ret) && is_standalone_) { + if (length - pos < 1) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("failed to deserialize standalone.", K(ret), K(length), K(pos)); + } else { + standalone_ = data[pos++]; + } + } + + if (OB_SUCC(ret) && OB_FAIL(elem_header_.deserialize(data + pos, length - pos))) { + LOG_WARN("failed to deserialize element header.", K(ret), K(length), K(pos), K(encode_len_)); + } + } + + return ret; +} + +ObXmlAttributeSerializer::ObXmlAttributeSerializer(ObIMulModeBase* root, ObStringBuffer& buffer) + : root_(root), + buffer_(&buffer), + header_((static_cast(root))->get_prefix(), (static_cast(root))->type()) +{ +} + +ObXmlAttributeSerializer::ObXmlAttributeSerializer(const char* data, int64_t length, ObMulModeMemCtx* ctx) + : header_(), + data_(data), + data_len_(length), + allocator_(ctx->allocator_), + ctx_(ctx) {} + +int ObXmlAttributeSerializer::serialize() +{ + INIT_SUCC(ret); + + ObXmlAttribute* attr = static_cast(root_); + if (OB_FAIL(header_.serialize(buffer_))) { + LOG_WARN("failed to serialize attribute header.", K(ret), K(buffer_->length()), K(header_.type_)); + } else { + ObString value = attr->get_value(); + + int64_t ser_len = serialization::encoded_length_vi64(value.length()); + int64_t pos = buffer_->length(); + + if (OB_FAIL(buffer_->reserve(ser_len))) { + LOG_WARN("failed to reserver serialize size for str obj", K(ret), K(ser_len)); + } else if (OB_FAIL(serialization::encode_vi64(buffer_->ptr(), buffer_->capacity(), pos, value.length()))) { + LOG_WARN("failed to serialize for str obj", K(ret), K(ser_len)); + } else if (OB_FAIL(buffer_->set_length(pos))) { + LOG_WARN("failed to update len for str obj", K(ret), K(pos)); + } else if (OB_FAIL(buffer_->append(value.ptr(), value.length()))) { + LOG_WARN("failed to append string value", K(ret)); + } + } + + return ret; +} + +int ObXmlAttributeSerializer::deserialize(ObIMulModeBase*& handle) +{ + INIT_SUCC(ret); + if (OB_FAIL(header_.deserialize(data_, data_len_))) { + LOG_WARN("failed to serialize attribute header.", K(ret), K(data_len_)); + } else if (header_.header_size() > data_len_) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("failed to serialize attribute header.", K(ret), K(header_.header_size()), K(data_len_)); + } else { + int64_t val = 0; + int64_t pos = header_.header_size(); + + ObString value; + + if (OB_FAIL(serialization::decode_vi64(data_, data_len_, pos, &val))) { + LOG_WARN("failed to deserialize attribute value string.", K(ret), K(val)); + } else if (data_len_ < pos + val) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("failed to deserialize attribute value string.", K(ret), K(data_len_), K(pos), K(val)); + } else { + value.assign_ptr(data_ + pos, val); + } + + ObXmlAttribute* attr = nullptr; + if (OB_FAIL(ret)) { + } else if (OB_ISNULL(attr = static_cast(allocator_->alloc(sizeof(ObXmlAttribute))))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("failed to allocate attribute node.", K(ret)); + } else { + attr = new(attr) ObXmlAttribute(header_.type_, ctx_); + attr->set_prefix(header_.prefix_); + attr->set_value(value); + } + + if (OB_SUCC(ret)) { + handle = attr; + } + } + + return ret; +} + +ObXmlTextSerializer::ObXmlTextSerializer(ObIMulModeBase* root, ObStringBuffer& buffer) + : root_(root), + buffer_(&buffer) +{ + type_ = root->type(); +} + +ObXmlTextSerializer::ObXmlTextSerializer(const char* data, int64_t length, ObMulModeMemCtx* ctx) + : data_(data), + data_len_(length), + allocator_(ctx->allocator_), + ctx_(ctx) +{ +} + +int ObXmlTextSerializer::serialize() +{ + INIT_SUCC(ret); + + ObXmlText* text = static_cast(root_); + ObString value; + text->get_value(value); + + int8_t header_len = header_size(); + + int64_t ser_len = serialization::encoded_length_vi64(value.length()); + + if (OB_FAIL(buffer_->reserve(ser_len + header_len + value.length()))) { + LOG_WARN("failed to reserver serialize size for str obj", K(ret), K(ser_len)); + } else if (OB_FAIL(ObMulModeVar::set_var(type_, ObMulModeBinLenSize::MBL_UINT8, buffer_->ptr() + buffer_->length()))) { + LOG_WARN("failed to set var", K(ret), K(type_)); + } else { + buffer_->set_length(buffer_->length() + header_len); + } + + int64_t pos = buffer_->length(); + + if (OB_FAIL(ret)) { + } else if (OB_FAIL(serialization::encode_vi64(buffer_->ptr(), buffer_->capacity(), pos, value.length()))) { + LOG_WARN("failed to serialize for str obj", K(ret), K(ser_len)); + } else if (OB_FAIL(buffer_->set_length(pos))) { + LOG_WARN("failed to update len for str obj", K(ret), K(pos)); + } else if (OB_FAIL(buffer_->append(value.ptr(), value.length()))) { + LOG_WARN("failed to append string value", K(ret)); + } + + return ret; +} + +int ObXmlTextSerializer::deserialize(ObIMulModeBase*& handle) +{ + INIT_SUCC(ret); + + if (data_len_ <= 0 || OB_ISNULL(data_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("failed to deserialize text", K(ret), K(data_len_)); + } else { + type_ = static_cast(data_[0]); + + int64_t val = 0; + int64_t pos = header_size(); + ObString value; + + if (OB_FAIL(serialization::decode_vi64(data_, data_len_, pos, &val))) { + LOG_WARN("failed to deserialize text string.", K(ret), K(val)); + } else if (data_len_ < pos + val) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("failed to deserialize text string.", K(ret), K(data_len_), K(pos), K(val)); + } else { + value.assign_ptr(data_ + pos, val); + } + + ObXmlText* text = nullptr; + if (OB_FAIL(ret)) { + } else if (OB_ISNULL(text = static_cast(allocator_->alloc(sizeof(ObXmlText))))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("failed to allocate text node.", K(ret)); + } else { + text = new(text) ObXmlText(type_, ctx_); + text->set_value(value); + } + + if (OB_SUCC(ret)) { + handle = text; + } + } + + return ret; +} + +ObXmlElementSerializer::ObXmlElementSerializer(const char* data, int64_t length, ObMulModeMemCtx* ctx) + : ObMulModeContainerSerializer(data, length), + attr_count_(0), + child_count_(0), + data_(data), + data_len_(length), + allocator_(ctx->allocator_), + ctx_(ctx) +{ +} + +// root must be xml_element or xml_document +ObXmlElementSerializer::ObXmlElementSerializer(ObIMulModeBase* root, ObStringBuffer* buffer, bool serialize_key) + : ObMulModeContainerSerializer(root, buffer), + child_arr_(), + serialize_key_(serialize_key), + serialize_try_time_(0) +{ + attr_count_ = root->attribute_count(); + child_count_ = root->size(); + int64_t children_count = size(); + + if (ObMulModeVar::get_var_type(children_count) != ObMulModeVar::get_var_type(child_count_)) { + new (this) ObMulModeContainerSerializer(root, buffer, children_count); + } + + // child_arr_[0] : attribute, namespace + // child_arr_[1] : child such as pi, element, text, comment, cdata + + int is_has_attr = attr_count_ > 0 ? 1 : 0; + if (is_has_attr) { + // only element node have attribute, so root must be element node + child_arr_[0].l_start_ = child_arr_[0].g_start_ = 0; + child_arr_[0].l_last_ = child_arr_[0].g_last_ = attr_count_ - 1; + child_arr_[0].entry_ = root->get_attribute_handle(); + } + + if (child_count_ > 0) { + child_arr_[is_has_attr].l_start_ = 0; + child_arr_[is_has_attr].l_last_ = child_count_ - 1; + + child_arr_[is_has_attr].g_start_ = attr_count_; + child_arr_[is_has_attr].g_last_ = attr_count_ + child_count_ - 1; + child_arr_[is_has_attr].entry_ = root; + } + + int64_t header_len = header_.header_size(); + if (type_ == M_DOCUMENT || type_ == M_UNPARSED || type_ == M_CONTENT || type_ == M_UNPARESED_DOC) { + new (&doc_header_) ObXmlDocBinHeader(root->get_version(), + root->get_encoding(), + root->get_encoding_flag(), + root->get_standalone(), + root->has_xml_decl()); + new(&doc_header_.elem_header_) ObXmlElementBinHeader(root->is_unparse(), + root->get_prefix()); + header_len += doc_header_.header_size(); + } else { + new(&ele_header_) ObXmlElementBinHeader(root->is_unparse(), + root->get_prefix()); + header_len += ele_header_.header_size(); + } + + index_start_ = header_.start() + header_len; + index_entry_size_ = header_.count_var_size_; + + // offset start + key_entry_start_ = index_start_ + children_count * index_entry_size_; + key_entry_size_ = header_.get_entry_var_size(); + + value_entry_start_ = children_count * (key_entry_size_ * 2) + key_entry_start_; + value_entry_size_ = header_.get_entry_var_size(); + + key_start_ = (value_entry_size_ + sizeof(uint8_t)) * size() + value_entry_start_; +} + +void ObXmlElementSerializer::set_index_entry(int64_t origin_index, int64_t sort_index) +{ + int64_t offset = index_start_ + origin_index * header_.get_count_var_size(); + char* write_buf = header_.buffer()->ptr() + offset; + ObMulModeVar::set_var(sort_index, header_.get_count_var_size_type(), write_buf); +} + +void ObXmlElementSerializer::set_key_entry(int64_t entry_idx, int64_t key_offset, int64_t key_len) +{ + int64_t offset = key_entry_start_ + entry_idx * (header_.get_entry_var_size() * 2); + char* write_buf = header_.buffer()->ptr() + offset; + ObMulModeVar::set_var(key_offset, header_.get_entry_var_size_type(), write_buf); + + write_buf += header_.get_entry_var_size(); + ObMulModeVar::set_var(key_len, header_.get_entry_var_size_type(), write_buf); +} + +int ObXmlElementSerializer::reserve_meta() +{ + INIT_SUCC(ret); + ObStringBuffer& buffer = *header_.buffer(); + + int64_t pos = buffer.length(); + uint32_t reserve_size = key_start_ - index_start_; + if (OB_FAIL(buffer.reserve(reserve_size))) { + LOG_WARN("failed to reserve buffer.", K(ret), K(reserve_size), K(header_.start())); + } else { + buffer.set_length(pos + reserve_size); + } + return ret; +} + +void ObXmlElementSerializer::set_value_entry(int64_t entry_idx, uint8_t type, int64_t value_offset) +{ + int64_t offset = value_entry_start_ + entry_idx * (header_.get_entry_var_size() + sizeof(uint8_t)); + char* write_buf = header_.buffer()->ptr() + offset; + *reinterpret_cast(write_buf) = type; + ObMulModeVar::set_var(value_offset, header_.get_entry_var_size_type(), write_buf + sizeof(uint8_t)); +} + +int ObXmlElementSerializer::serialize_child_key(const ObString& key, int64_t idx) +{ + INIT_SUCC(ret); + ObStringBuffer& buffer = *header_.buffer(); + int64_t key_offset = buffer.length() - header_.start(); + + if (OB_FAIL(buffer.append(key.ptr(), key.length()))) { + LOG_WARN("failed to append key string.", K(ret), K(buffer.length()), K(key.length())); + } else { + // idx fill later + set_key_entry(idx, key_offset, key.length()); + } + + return ret; +} + +int ObXmlElementSerializer::serialize_key(int arr_idx, int64_t depth) +{ + INIT_SUCC(ret); + if (child_arr_[arr_idx].is_valid()) { + ObXmlNode* xnode = static_cast(child_arr_[arr_idx].entry_); + int64_t g_idx = child_arr_[arr_idx].g_start_; + ObXmlNode::iterator iter = xnode->sorted_begin(); + ObXmlNode::iterator end = xnode->sorted_end(); + ObStringBuffer& buffer = *header_.buffer(); + for (; OB_SUCC(ret) && iter < end ; ++iter, g_idx++) { + ObXmlNode* cur = static_cast(*iter); + if (OB_ISNULL(cur)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get cur null", K(ret)); + } else { + ObMulModeNodeType cur_type = cur->type(); + switch (cur_type) { + case M_UNPARSED: + case M_UNPARESED_DOC: + case M_DOCUMENT: + case M_ELEMENT: + case M_CONTENT: + case M_ATTRIBUTE: + case M_NAMESPACE: + case M_INSTRUCT: + case M_TEXT: + case M_COMMENT: + case M_CDATA: { + if (OB_FAIL(serialize_child_key(cur->get_key(), g_idx))) { + LOG_WARN("failed to serialize key string.", K(ret), K(cur->get_key().length()), K(buffer.length())); + } else { + set_index_entry( cur->get_index() + child_arr_[arr_idx].g_start_, g_idx); + } + break; + } + default: { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("failed to serialize key, current node type not correct.", K(ret), K(cur_type)); + break; + } + } + } + } + } + + return ret; +} + +int ObXmlElementSerializer::serialize_value(int arr_idx, int64_t depth) +{ + INIT_SUCC(ret); + if (child_arr_[arr_idx].is_valid()) { + ObXmlNode* xnode = static_cast(child_arr_[arr_idx].entry_); + int64_t g_idx = child_arr_[arr_idx].g_start_; + + ObXmlNode::iterator iter = xnode->sorted_begin(); + ObXmlNode::iterator end = xnode->sorted_end(); + ObStringBuffer& buffer = *header_.buffer(); + + for (; OB_SUCC(ret) && iter < end ; ++iter, g_idx++) { + ObXmlNode* cur = static_cast(*iter); + ObMulModeNodeType cur_type = cur->type(); + + int64_t value_start = buffer.length() - header_.start(); + + switch (cur_type) { + case M_UNPARSED: + case M_UNPARESED_DOC: + case M_DOCUMENT: + case M_ELEMENT: + case M_CONTENT: { + ObXmlElementSerializer ele_serializer(cur, header_.buffer()); + if (OB_FAIL(ele_serializer.serialize(depth + 1))) { + LOG_WARN("failed to serialize element child", K(ret), K(buffer.length())); + } else { + set_value_entry(g_idx, cur_type, value_start); + } + break; + } + case M_ATTRIBUTE: + case M_NAMESPACE: + case M_INSTRUCT: { + ObXmlAttributeSerializer attr_serializer(cur, buffer); + if (OB_FAIL(attr_serializer.serialize())) { + LOG_WARN("failed to serialize attribute.", K(ret), K(cur_type), K(buffer.length())); + } else { + set_value_entry(g_idx, cur_type, value_start); + } + break; + } + case M_TEXT: + case M_COMMENT: + case M_CDATA: { + ObXmlTextSerializer serializer(cur, buffer); + if (OB_FAIL(serializer.serialize())) { + LOG_WARN("failed to serialize text.", K(ret), K(cur_type), K(buffer.length())); + } else { + set_value_entry(g_idx, cur_type, value_start); + } + break; + } + default: { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("failed to serialize key, current node type not correct.", K(ret), K(cur_type)); + break; + } + } + } + } + + return ret; +} + +int ObXmlElementSerializer::deserialize(ObIMulModeBase*& node) +{ + INIT_SUCC(ret); + int64_t pos = 0; + int64_t left_data_len = data_len_; + const char* data = data_; + + ObXmlElement *handle = nullptr; + if (OB_FAIL(header_.deserialize())) { + LOG_WARN("failed to deserialize header.", K(ret)); + } else if (OB_ISNULL(handle = static_cast(allocator_->alloc( + (header_.type() == ObMulModeNodeType::M_DOCUMENT + || header_.type() == ObMulModeNodeType::M_UNPARESED_DOC + || header_.type() == ObMulModeNodeType::M_UNPARSED + || header_.type() == ObMulModeNodeType::M_CONTENT) ? + sizeof(ObXmlDocument) : sizeof(ObXmlElement))))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("fail to alloc memory for element node", K(ret)); + } else { + type_ = header_.type(); + pos += header_.header_size(); + left_data_len = data_len_ - pos; + if (type_ == ObMulModeNodeType::M_DOCUMENT + || type_ == ObMulModeNodeType::M_UNPARSED + || type_ == ObMulModeNodeType::M_CONTENT + || type_ == ObMulModeNodeType::M_UNPARESED_DOC) { + new (&doc_header_)ObXmlDocBinHeader(); + if (OB_FAIL(doc_header_.deserialize(data + pos, left_data_len))) { + LOG_WARN("failed to deserialize header.", K(ret), K(left_data_len)); + } else { + ObXmlDocument* doc = new(handle) ObXmlDocument(type_, ctx_); + doc->set_version(doc_header_.version_); + doc->set_encoding(doc_header_.encoding_); + doc->set_prefix(doc_header_.elem_header_.prefix_); + doc->set_unparse(doc_header_.elem_header_.is_unparse_); + doc->set_has_xml_decl(doc_header_.is_xml_decl_); + doc->set_encoding_flag(doc_header_.is_encoding_empty_); + doc->set_standalone(doc_header_.standalone_); + pos += doc_header_.header_size(); + left_data_len = data_len_ - pos; + handle = doc; + } + } else if (type_ == ObMulModeNodeType::M_ELEMENT) { + new (&ele_header_)ObXmlElementBinHeader(); + if (OB_FAIL(ele_header_.deserialize(data + pos, left_data_len))) { + LOG_WARN("failed to deserialize header.", K(ret), K(left_data_len)); + } else { + handle = new(handle) ObXmlElement(type_, ctx_); + handle->set_prefix(ele_header_.prefix_); + handle->set_unparse(ele_header_.is_unparse_); + pos += ele_header_.header_size(); + left_data_len = data_len_ - pos; + } + } else { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("failed to deserialize header, unexpected type.", K(ret), K(type_)); + } + } + + if (OB_FAIL(ret)) { + } else if (OB_FAIL(handle->alter_member_sort_policy(false))) { + LOG_WARN("failed to alter sort policy.", K(ret)); + } else { + int64_t count = header_.count(); + + index_start_ = pos; + index_entry_size_ = header_.get_count_var_size(); + + key_entry_start_ = index_start_ + index_entry_size_ * count; + + key_entry_size_ = value_entry_size_ = header_.get_entry_var_size(); + + value_entry_start_ = key_entry_start_ + (key_entry_size_ * 2) * count; + + key_start_ = value_entry_start_ + (sizeof(uint8_t) + value_entry_size_) * count; + + if (key_start_ > data_len_) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("failed to deserialize.", K(ret), K(key_start_), K(data_len_), K(pos), K(key_entry_size_)); + } else if (count && value_entry_start_ + (count - 1) * (value_entry_size_ + sizeof(uint8_t)) > data_len_) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("failed to deserialize.", K(ret), K(value_entry_start_), K(data_len_), K(pos), K(key_entry_size_)); + } else { + for (int64_t idx = 0; OB_SUCC(ret) && idx < count; ++idx) { + int64_t key_offset = 0; + int64_t key_len = 0; + + int64_t type = 0; + int64_t value_offset = 0; + + int64_t sort_index = 0; + + const char* cur_val_entry_ptr = data + value_entry_start_; + const char* cur_key_entry_ptr = data + key_entry_start_; + + if (OB_FAIL(ObMulModeVar::read_size_var(data_ + index_start_ + index_entry_size_ * idx, index_entry_size_, &sort_index))) { + LOG_WARN("failed to read sort index.", K(ret), K(idx), K(index_entry_size_)); + } else if (sort_index >= count) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("failed to deserialize, sort index too large.", K(ret), K(sort_index), K(count)); + } else if (OB_FAIL(ObMulModeVar::read_size_var(cur_val_entry_ptr + (sizeof(uint8_t) + value_entry_size_) * sort_index, sizeof(uint8_t), &type)) + || OB_FAIL(ObMulModeVar::read_size_var(cur_val_entry_ptr + (sizeof(uint8_t) + value_entry_size_) * sort_index + sizeof(uint8_t), value_entry_size_, &value_offset))) { + LOG_WARN("failed to read size var.", K(ret), K(sort_index), K(value_entry_start_), K(pos), K(value_entry_size_)); + } else if (!is_valid_xml_type(type) || value_offset > data_len_) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("failed to deserialize.", K(ret), K(sort_index), K(count)); + } else if (OB_FAIL(ObMulModeVar::read_size_var(cur_key_entry_ptr + (key_entry_size_ * 2) * sort_index, key_entry_size_, &key_offset)) + || OB_FAIL(ObMulModeVar::read_size_var(cur_key_entry_ptr + (key_entry_size_ * 2) * sort_index + key_entry_size_, key_entry_size_, &key_len))) { + LOG_WARN("failed to deserialize.", K(ret), K(key_start_), K(data_len_), K(pos), K(key_entry_size_)); + } else if (key_offset + key_len > data_len_) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("failed to deserialize.", K(ret), K(key_offset), K(key_len), K(data_len_)); + } else { + ObString key(key_len, data + key_offset); + const char* value = value_offset + data; + switch (type) { + case M_ELEMENT: { + ObIMulModeBase* child = nullptr; + ObXmlElementSerializer serializer(value, data_len_ - value_offset, ctx_); + + if (OB_FAIL(serializer.deserialize(child))) { + LOG_WARN("fail to deserialize element", K(ret), K(data_len_), K(value_offset)); + } else if (OB_FAIL(handle->add_element(static_cast(child)))) { + LOG_WARN("fail to append element", K(ret)); + } else { + static_cast(child)->set_xml_key(key); + child_count_++; + } + break; + } + + case M_ATTRIBUTE: + case M_NAMESPACE: + case M_INSTRUCT: { + ObIMulModeBase* child = nullptr; + ObXmlAttributeSerializer serializer(value, data_len_ - value_offset, ctx_); + + if (OB_FAIL(serializer.deserialize(child))) { + LOG_WARN("fail to deserialize element", K(ret), K(data_len_), K(value_offset)); + } else if (type != M_INSTRUCT && OB_FAIL(handle->add_attribute(static_cast(child)))) { + LOG_WARN("fail to append element", K(ret)); + } else if (type == M_INSTRUCT && OB_FAIL(handle->add_element(static_cast(child)))) { + LOG_WARN("fail to append element", K(ret)); + } else { + static_cast(child)->set_xml_key(key); + attr_count_++; + } + break; + } + + case M_TEXT: + case M_COMMENT: + case M_CDATA: { + ObIMulModeBase* child = nullptr; + ObXmlTextSerializer serializer(value, data_len_ - value_offset, ctx_); + + if (OB_FAIL(serializer.deserialize(child))) { + LOG_WARN("fail to deserialize element", K(ret), K(data_len_), K(value_offset)); + } else if (OB_FAIL(handle->add_element(static_cast(child)))) { + LOG_WARN("fail to append element", K(ret)); + } else { + static_cast(child)->set_xml_key(key); + attr_count_++; + } + break; + } + + default: { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("fail to deserialize node", K(ret), K(type)); + } + } + } + } + } + } + + if (OB_SUCC(ret)) { + node = handle; + } + + + return ret; +} + +int ObXmlElementSerializer::serialize(int64_t depth) +{ + INIT_SUCC(ret); + ObStringBuffer& buffer = *header_.buffer(); + int64_t start = buffer.length(); + + ObXmlElement* ele = static_cast(root_); + + if (depth > 0) { + } else if (OB_FAIL(ele->set_flag_by_descandant())) { + LOG_WARN("failed to eval sepecail flag on header", K(ret)); + } else if (ele->type() != M_UNPARESED_DOC && ele->is_unparse()) { + header_.type_ = M_UNPARSED; + type_ = M_UNPARSED; + } + + if (OB_FAIL(ret)) { + } else if (OB_FAIL(header_.serialize())) { + LOG_WARN("failed to serialize header.", K(ret), K(buffer.length())); + } else if (type_ == M_DOCUMENT || type_ == M_UNPARSED || type_ == M_UNPARESED_DOC || type_ == M_CONTENT) { + if (OB_FAIL(doc_header_.serialize(buffer))) { + LOG_WARN("failed to document header key string.", K(ret), K(doc_header_.header_size()), K(buffer.length())); + } + } else if (type_ == M_ELEMENT) { + if(OB_FAIL(ele_header_.serialize(buffer))) { + LOG_WARN("failed to serialize element header.", K(ret), K(ele_header_.header_size()), K(buffer.length())); + } + } else { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("failed to serialize header, not leggal type.", K(ret), K(type_)); + } + + if (OB_FAIL(ret)) { + } else if (OB_FAIL(reserve_meta())) { + LOG_WARN("failed to reserve meta.", K(ret), K(buffer.length())); + } else if (OB_FAIL(serialize_key(0, depth))) { + LOG_WARN("failed to serialize key array 0.", K(ret), K(buffer.length())); + } else if (OB_FAIL(serialize_key(1, depth))) { + LOG_WARN("failed to serialize key array 1.", K(ret), K(buffer.length())); + } else if (OB_FAIL(serialize_value(0, depth))) { + LOG_WARN("failed to serialize value array 0.", K(ret), K(buffer.length())); + } else if (OB_FAIL(serialize_value(1, depth))) { + LOG_WARN("failed to serialize value array 1.", K(ret), K(buffer.length())); + } else { + int64_t end = buffer.length(); + int64_t total_size = end - start; + int64_t children_num = size(); + + if (ObMulModeVar::get_var_type(total_size) > header_.get_obj_var_size_type() + || ObMulModeVar::get_var_type(children_num) > header_.get_count_var_size_type()) { + if (serialize_try_time_ >= MAX_RETRY_TIME) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("failed to serialize as meta info not match.", K(ret), K(total_size), K(children_num), K(header_)); + } else { + int64_t delta = total_size - header_.get_obj_size(); + ele->set_delta_serialize_size(delta); + serialize_try_time_++; + buffer.set_length(start); + new (this) ObXmlElementSerializer(root_, &buffer); + if (OB_FAIL(serialize(depth))) { + LOG_WARN("failed to serialize.", K(ret), K(buffer.length())); + } + } + } else { + header_.set_obj_size(total_size); + header_.set_count(children_num); + } + } + + return ret; +} + +ObXmlBinIndexMeta::ObXmlBinIndexMeta(const char* index_entry, int64_t idx, int64_t var_size) +{ + ObMulModeVar::read_size_var(index_entry + idx * var_size, var_size, &pos_); +} + +ObXmlBinIndexMeta::ObXmlBinIndexMeta(const char* index_entry, int64_t idx, uint8_t var_type) +{ + ObMulModeVar::read_var(index_entry + ObMulModeVar::get_var_size(var_type), var_type, &pos_); +} + +int64_t ObXmlBinIndexMeta::get_index() +{ + return pos_; +} + +void ObXmlBinKeyMeta::read(const char* cur_entry, int64_t var_size) +{ + ObMulModeVar::read_size_var(cur_entry, var_size, &offset_); + ObMulModeVar::read_size_var(cur_entry + var_size, var_size, &len_); +} + +void ObXmlBinKeyMeta::read(const char* cur_entry, uint8_t var_type) +{ + ObMulModeVar::read_var(cur_entry, var_type, &offset_); + ObMulModeVar::read_var(cur_entry + ObMulModeVar::get_var_size(var_type), var_type, &len_); +} + +ObXmlBinKeyMeta::ObXmlBinKeyMeta(const ObXmlBinKeyMeta& other) + : offset_(other.offset_), len_(other.len_) +{ +} + +ObXmlBinKeyMeta::ObXmlBinKeyMeta(const char* cur_entry, uint8_t var_type) +{ + read(cur_entry, var_type); +} + +ObXmlBinKeyMeta::ObXmlBinKeyMeta(const char* cur_entry, int64_t var_size) +{ + read(cur_entry, var_size); +} + +ObXmlBinKeyMeta::ObXmlBinKeyMeta(const char* key_entry, int64_t idx, uint8_t var_type) +{ + const char* cur_entry = key_entry + ObMulModeVar::get_var_size(var_type) * 2 * idx; + read(cur_entry, var_type); +} + +ObXmlBinKeyMeta::ObXmlBinKeyMeta(const char* key_entry, int64_t idx, int64_t var_size) +{ + const char* cur_entry = key_entry + var_size * 2 * idx; + read(cur_entry, var_size); +} + +ObXmlBinKeyMeta::ObXmlBinKeyMeta(int64_t offset, int32_t len) + : offset_(offset), + len_(len) +{ +} + +ObXmlBinKey::ObXmlBinKey(const char* data, int64_t cur_entry, uint8_t var_type) + : meta_(data + cur_entry, var_type) +{ + key_.assign_ptr(data + meta_.offset_, meta_.len_); +} + +ObXmlBinKey::ObXmlBinKey(const char* data, int64_t cur_entry, int64_t var_size) +: meta_(data + cur_entry, var_size) +{ + key_.assign_ptr(data + meta_.offset_, meta_.len_); +} + +ObXmlBinKey::ObXmlBinKey(const char* data, int64_t key_entry, int64_t idx, uint8_t var_type) +: meta_(data + key_entry, idx, var_type) +{ + key_.assign_ptr(data + meta_.offset_, meta_.len_); +} + +ObXmlBinKey::ObXmlBinKey(const char* data, int64_t key_entry, int64_t idx, int64_t var_size) +: meta_(data + key_entry, idx, var_size) +{ + key_.assign_ptr(data + meta_.offset_, meta_.len_); +} + +ObXmlBinKey::ObXmlBinKey(const char* data, int64_t offset, int32_t len) +: meta_(offset, len) +{ + key_.assign_ptr(data + offset, len); +} + +ObXmlBinKey::ObXmlBinKey(const ObXmlBinKey& other) + : meta_(other.meta_), + key_(other.key_) +{ +} + +ObXmlBinKey::ObXmlBinKey(const ObString& key) + : key_(key) +{ +} + +ObString ObXmlBin::get_version() +{ + return meta_.get_version(); +} + +ObString ObXmlBin::get_encoding() +{ + return meta_.get_encoding(); +} + +ObString ObXmlBin::get_prefix() +{ + return meta_.get_prefix(); +} + +uint16_t ObXmlBin::get_standalone() +{ + return meta_.standalone_; +} + +bool ObXmlBin::get_is_empty() +{ + return meta_.is_empty_; +} + +bool ObXmlBin::get_unparse() +{ + return meta_.is_unparse_; +} + +bool ObXmlBin::check_extend() +{ + bool ret_bool = false; + if (meta_.type_ != M_ELEMENT && meta_.type_ != M_CONTENT && meta_.type_ != M_DOCUMENT) { + } else if (buffer_for_extend_) { + ret_bool = true; + } else if (meta_.len_ > meta_.total_ && meta_.total_ > 0) { + ret_bool = true; + } + return ret_bool; +} + +bool ObXmlBin::check_if_defined_ns() +{ + bool ret_bool = false; + if (type() != M_ELEMENT) { + } else { + int64_t attribute_num = attribute_size(); + INIT_SUCC(ret); + for (int pos = 0; !ret_bool && OB_SUCC(ret) && pos < attribute_num ; ++pos) { + ObXmlBin buff(*this); + ObXmlBin* tmp = &buff; + + if (OB_FAIL(construct(tmp, allocator_))) { + } else if (OB_FAIL(tmp->set_at(pos))) { + } else if (tmp->type() == M_NAMESPACE) { + ret_bool = true; + } else if (tmp->type() == M_ATTRIBUTE) { + } else { + break; + } + } + } + return ret_bool; +} + +int ObXmlBin::parse_tree(ObIMulModeBase* root, bool set_alter_member) +{ + INIT_SUCC(ret); + ObXmlNode *xml_node = NULL; + + if (OB_ISNULL(root) || root->is_binary()) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("failed to parse tree null pointer.", K(ret)); + } else if (OB_ISNULL(xml_node = static_cast(root))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("cast to xmlnode get null", K(ret)); + } else if (OB_FAIL(set_alter_member && xml_node->alter_member_sort_policy(true))) { + LOG_WARN("fail to sort child element", K(ret)); + } else { + buffer_.reset(); + if (ObXmlUtil::use_text_serializer(root->type())) { + ObXmlTextSerializer serializer(root, buffer_); + if (OB_FAIL(serializer.serialize())) { + LOG_WARN("failed to serialize.", K(ret), K(root->type()), K(root->get_serialize_size())); + } + } else if (ObXmlUtil::use_element_serializer(root->type())) { + ObXmlElementSerializer serializer(root, &buffer_); + if (OB_FAIL(serializer.serialize(0))) { + LOG_WARN("failed to serialize.", K(ret), K(root->type()), K(root->get_serialize_size())); + } + } else if (ObXmlUtil::use_attribute_serializer(root->type())) { + ObXmlAttributeSerializer serializer(root, buffer_); + if (OB_FAIL(serializer.serialize())) { + LOG_WARN("failed to serialize.", K(ret), K(root->type()), K(root->get_serialize_size())); + } + } else { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("error type to serialize.",K(ret), K(root->type())); + } + + if (OB_FAIL(ret)) { + } else if (OB_FAIL(meta_.parser(buffer_.ptr(), buffer_.length()))) { + LOG_WARN("failed to parse meta.", K(ret)); + } + } + + return ret; +} + +int ObXmlBin::append_extend(char* start, int64_t len) +{ + INIT_SUCC(ret); + if (OB_ISNULL(start)) { + ret = OB_BAD_NULL_ERROR; + LOG_WARN("failed to parse tree null pointer.", K(ret)); + } else if (meta_.parsed_ || OB_SUCC(parse())) { + if (buffer_.length() == 0) { + buffer_for_extend_ = true; + } else if (buffer_.length() == meta_.len_ && buffer_.ptr() == meta_.data_) { + buffer_for_extend_ = false; + } else { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("buffer should be data_ or null", K(ret)); + } + } else { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("fail to parse", K(ret)); + } + + if (OB_SUCC(ret)) { + if (OB_FAIL(buffer_.append(start, len))) { + LOG_WARN("fail to append extend", K(ret)); + } else if (!buffer_for_extend_) { + meta_.len_ += len; + } + } + return ret; +} + +int ObXmlBin::append_extend(ObXmlElement* ele) +{ + INIT_SUCC(ret); + + int buffer_length = 0; + if (OB_ISNULL(ele)) { + ret = OB_BAD_NULL_ERROR; + LOG_WARN("failed to parse tree null pointer.", K(ret)); + } else if (OB_FAIL(ele->alter_member_sort_policy(true))) { + LOG_WARN("fail to sort child element", K(ret)); + } else if (meta_.parsed_ || OB_SUCC(parse())) { + if (buffer_.length() == 0) { + buffer_for_extend_ = true; + } else if (buffer_.length() == meta_.len_ && buffer_.ptr() == meta_.data_) { + buffer_length = buffer_.length(); + buffer_for_extend_ = false; + } else { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("buffer should be data_ or null", K(ret)); + } + } else { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("fail to parse", K(ret)); + } + + if (OB_SUCC(ret)) { + ObXmlElementSerializer serializer_element(ele, &buffer_); + if (OB_FAIL(serializer_element.serialize(0))) { + LOG_WARN("failed to serialize.", K(ret), K(ele->type()), K(ele->get_serialize_size())); + } else if (!buffer_for_extend_) { + meta_.len_ += (buffer_.length() - buffer_length); + } + } + + return ret; +} + +int ObXmlBin::remove_extend() +{ + INIT_SUCC(ret); + if (buffer_for_extend_) { + buffer_for_extend_ = false; + buffer_.reset(); + } else { + meta_.len_ = meta_.total_ > 0 ? meta_.total_ : meta_.len_; + } + return ret; +} + +int ObXmlBin::get_extend(char*& start, int64_t& len) +{ + INIT_SUCC(ret); + if (buffer_for_extend_) { + start = buffer_.ptr(); + len = buffer_.length(); + } else if (meta_.total_ > 0 && meta_.len_ > meta_.total_) { + start = meta_.get_data() + meta_.total_; + len = meta_.len_ - meta_.total_; + } else { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("there is no extend", K(ret)); + } + return ret; +} + +int ObXmlBin::get_extend(ObXmlBin& extend) +{ + INIT_SUCC(ret); + extend.buffer_.reset(); + int64_t len = 0; + char* start = buffer_.ptr(); + if (!check_extend()) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("there is no extend", K(ret)); + } else if ((meta_.total_ > 0 && meta_.len_ > meta_.total_) || buffer_for_extend_) { + len = buffer_for_extend_? buffer_.length() : meta_.len_ - meta_.total_; + start = buffer_for_extend_ ? buffer_.ptr() : const_cast (meta_.data_) + meta_.total_; + } else { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("there is no extend", K(ret)); + } + if (OB_FAIL(ret)) { + } else if (OB_FALSE_IT(extend.buffer_.reset())) { + } else if (OB_FAIL(extend.parse(start, len))) { + LOG_WARN("failed to parse meta.", K(ret)); + } + return ret; +} + +int ObXmlBin::merge_extend(ObXmlBin& res) +{ + INIT_SUCC(ret); + ObXmlBin extend; + ObXmlBinMerge bin_merge; + if (OB_FAIL(get_extend(extend))) { + LOG_WARN("failed to get extend.", K(ret)); + } else if (OB_FAIL(bin_merge.merge(*this, extend, res))) { + LOG_WARN("failed to merge.", K(ret)); + } else if (OB_FAIL(res.meta_.parser(res.buffer_.ptr(), res.buffer_.length()))) { + LOG_WARN("failed to parse.", K(ret)); + } else { + res.meta_.key_len_ = meta_.key_len_; + res.meta_.key_ptr_ = meta_.key_ptr_; + } + return ret; +} + +int ObXmlBin::to_tree(ObIMulModeBase*& root) +{ + INIT_SUCC(ret); + + ObMulModeNodeType node_type; + + if (check_extend()) { + ObXmlBin merge(ctx_); + if (OB_FAIL(merge_extend(merge))) { + LOG_WARN("failed to merge extend.", K(ret)); + } else if (OB_FAIL(merge.to_tree(root))) { + LOG_WARN("failed to tree.", K(ret)); + } + } else if (OB_FAIL(parse())) { + LOG_WARN("failed to parse meta.", K(ret)); + } else if (FALSE_IT(node_type = type())) { + } else if (ObXmlUtil::use_element_serializer(node_type)) { + ObXmlElementSerializer deserializer(meta_.data_, meta_.len_, ctx_); + if (OB_FAIL(deserializer.deserialize(root))) { + LOG_WARN("failed to deserialize.", K(ret), K(meta_)); + } else if (node_type == M_ELEMENT) { + (static_cast(root))->set_xml_key(meta_.get_key()); + } + + if (OB_SUCC(ret)) { + ObXmlNode* xnode = static_cast(root); + if (OB_FAIL(xnode->alter_member_sort_policy(true))) { + LOG_WARN("failed to sort member.", K(ret), K(meta_)); + } + } + } else if (ObXmlUtil::use_text_serializer(node_type)) { + ObXmlTextSerializer serializer(meta_.data_, meta_.len_, ctx_); + + if (OB_FAIL(serializer.deserialize(root))) { + LOG_WARN("fail to deserialize text", K(ret), K(meta_.data_), K(meta_.len_), K(node_type)); + } + } else if (ObXmlUtil::use_attribute_serializer(node_type)) { + ObXmlAttributeSerializer serializer(meta_.data_, meta_.len_, ctx_); + if (OB_FAIL(serializer.deserialize(root))) { + LOG_WARN("fail to deserialize attrubyte", K(ret), K(meta_.data_), K(meta_.len_), K(node_type)); + } else { + (static_cast(root))->set_xml_key(meta_.get_key()); + } + } else { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("failed to parse meta.", K(ret)); + } + + + return ret; +} + +int ObXmlBinMetaParser::parser(const char* data, int64_t len) +{ + INIT_SUCC(ret); + + if (data_ == data && len_ == len) { + // do nothing + } else { + new (this) ObXmlBinMetaParser(); + data_ = data; + len_ = len; + } + return parser(); +} + +inline ObString ObXmlBinMetaParser::get_key() +{ + return ObString(key_len_, key_ptr_); +} + +inline ObString ObXmlBinMetaParser::get_value() +{ + return ObString(value_len_, value_ptr_); +} + +inline ObString ObXmlBinMetaParser::get_version() +{ + return ObString(version_len_, version_ptr_); +} + +inline ObString ObXmlBinMetaParser::get_encoding() +{ + return ObString(encoding_len_, encoding_ptr_); +} + +inline uint16_t ObXmlBinMetaParser::get_standalone() +{ + return standalone_; +} + +inline ObString ObXmlBinMetaParser::get_prefix() +{ + return ObString(prefix_len_, prefix_ptr_); +} + +inline uint8_t ObXmlBinMetaParser::get_key_entry_size() +{ + return key_entry_size_; +} + +inline uint8_t ObXmlBinMetaParser::get_key_entry_size_type() +{ + return key_entry_size_type_; +} + +inline uint8_t ObXmlBinMetaParser::get_value_entry_size() +{ + return value_entry_size_; +} + +inline uint8_t ObXmlBinMetaParser::get_value_entry_size_type() +{ + return value_entry_size_type_; +} + +inline int64_t ObXmlBinMetaParser::get_value_offset(int64_t index) +{ + return value_entry_ + index * (value_entry_size_ + sizeof(uint8_t)); +} + +inline int64_t ObXmlBinMetaParser::get_key_offset(int64_t index) +{ + return key_entry_ + index * (key_entry_size_ * 2); +} + +inline int64_t ObXmlBinMetaParser::get_index(int64_t index) +{ + return index_entry_ + index * index_entry_size_; +} + +int ObXmlBinMetaParser::parser() +{ + INIT_SUCC(ret); + + if (len_ < 1 || OB_ISNULL(data_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("failed to parser header.", K(ret), K(len_)); + } else if (parsed_) { + } else { + type_ = static_cast(data_[0]); + switch (type_) { + case M_UNPARSED: + case M_UNPARESED_DOC: + case M_DOCUMENT: + case M_ELEMENT: + case M_CONTENT: { + ObMulBinHeaderSerializer header(data_, len_); + ObXmlDocBinHeader doc_header; + ObXmlElementBinHeader ele_header; + ObXmlElementBinHeader * ele_ptr = &ele_header; + + if (OB_FAIL(header.deserialize())) { + LOG_WARN("failed to parser header.", K(ret), K(len_)); + } else { + type_ = header.type(); + total_ = header.get_obj_size(); + count_ = header.count(); + + int64_t pos = header.header_size(); + + if (type_ == M_DOCUMENT || type_ == M_UNPARSED || type_ == M_CONTENT || type_ == M_UNPARESED_DOC) { + if (OB_FAIL(doc_header.deserialize(data_ + pos, len_ - pos))) { + LOG_WARN("failed to doc header.", K(ret), K(len_), K(pos)); + } else { + ObString version = doc_header.get_version(); + version_ptr_ = version.ptr(); + version_len_ = version.length(); + + ObString encoding = doc_header.get_encoding(); + encoding_ptr_ = encoding.ptr(); + encoding_len_ = encoding.length(); + + standalone_ = doc_header.get_standalone(); + is_empty_ = count_ == 0; + has_xml_decl_ = doc_header.has_xml_decl(); + encoding_val_empty_ = doc_header.get_encoding_empty(); + + pos += doc_header.header_size(); + ele_ptr = &doc_header.elem_header_; + } + } else if (type_ == M_ELEMENT) { + + if (OB_FAIL(ele_header.deserialize(data_ + pos, len_ - pos))) { + LOG_WARN("failed to doc header.", K(ret), K(len_), K(pos)); + } else { + pos += ele_header.header_size(); + } + } else { + ret = OB_NOT_SUPPORTED; + LOG_WARN("failed to parser header.", K(ret), K(type_)); + } + + if (OB_SUCC(ret)) { + ObString prefix = ele_ptr->get_prefix(); + prefix_len_ = prefix.length(); + prefix_ptr_ = prefix.ptr(); + + is_unparse_ = ele_ptr->get_unparse(); + + index_entry_ = pos; + key_entry_ = index_entry_ + count_ * header.get_count_var_size(); + + key_entry_size_ = header.get_entry_var_size(); + key_entry_size_type_ = header.get_entry_var_size_type(); + + index_entry_size_ = header.get_count_var_size(); + index_entry_size_type_ = header.get_count_var_size_type(); + + value_entry_ = key_entry_ + key_entry_size_ * 2 * count_; + value_entry_size_ = header.get_entry_var_size(); + value_entry_size_type_ = header.get_entry_var_size_type(); + parsed_ = true; + } + } + + break; + } + case M_ATTRIBUTE: + case M_NAMESPACE: + case M_INSTRUCT: { + ObXmlAttrBinHeader attr_header; + if (OB_FAIL(attr_header.deserialize(data_, len_))) { + LOG_WARN("failed to parser header.", K(ret), K(len_), K(type_)); + } else { + ObString prefix = attr_header.get_prefix(); + prefix_ptr_ = prefix.ptr(); + prefix_len_ = prefix.length(); + + value_entry_ = attr_header.header_size(); + count_ = 1; + + int64_t pos = value_entry_; + int64_t val = 0; + if (OB_FAIL(serialization::decode_vi64(data_, len_, pos, &val))) { + LOG_WARN("failed to deserialize text string.", K(ret), K(val), K(len_), K(pos)); + } else if (len_ < pos + val) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("failed to deserialize text string.", K(ret), K(len_), K(pos), K(val)); + } else { + value_ptr_ = const_cast(data_ + pos); + value_len_ = val; + total_ = value_len_ + pos; + parsed_ = true; + } + } + break; + } + + case M_TEXT: + case M_COMMENT: + case M_CDATA: { + value_entry_ = 1; + count_ = 1; + int64_t pos = value_entry_; + int64_t val = 0; + if (OB_FAIL(serialization::decode_vi64(data_, len_, pos, &val))) { + LOG_WARN("failed to deserialize text string.", K(ret), K(val)); + } else if (len_ < pos + val) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("failed to deserialize text string.", K(ret), K(len_), K(pos), K(val)); + } else { + value_ptr_ = const_cast(data_ + pos); + value_len_ = val; + total_ = value_len_ + pos; + parsed_ = true; + } + break; + } + default : { + ret = OB_NOT_SUPPORTED; + LOG_WARN("failed to parser header.", K(ret), K(len_), K(type_)); + } + }; + } + + return ret; +} + +int64_t ObXmlBin::attribute_size() +{ + return get_child_start(); +} + +int64_t ObXmlBin::attribute_count() +{ + return attribute_size(); +} + +int64_t ObXmlBin::size() +{ + return child_size(); +} + +int64_t ObXmlBin::count() +{ + return ObXmlBin::size(); +} + +int64_t ObXmlBin::child_size() +{ + return meta_.count_ - get_child_start(); +} + +int ObXmlBin::compare(const ObString& key, int& res) +{ + INIT_SUCC(ret); + if (OB_FAIL(parse())) { + LOG_WARN("failed to parse.", K(ret)); + } else { + res = key.compare(meta_.get_key()); + } + + return ret; +} + +int ObXmlBin::parse(const char* data, int64_t len) +{ + meta_.data_ = data; + meta_.len_ = len; + return parse(); +} + +int ObXmlBin::parse() +{ + INIT_SUCC(ret); + if (OB_FAIL(meta_.parser())) { + LOG_WARN("failed to parser meta string.", K(ret)); + } + return ret; +} + +int ObXmlBin::construct(ObXmlBin*& res, ObIAllocator *allocator) +{ + INIT_SUCC(ret); + + if (OB_ISNULL(allocator) && OB_ISNULL(res)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("failed to construct bin, not valid allocator is provided.", K(ret)); + } else { + ObXmlBin* tmp_res = nullptr; + + if (OB_ISNULL(res)) { + tmp_res = static_cast(allocator->alloc(sizeof(ObXmlBin))); + if (OB_ISNULL(tmp_res)) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("failed to allocate bin.", K(ret)); + } else { + new (tmp_res) ObXmlBin(*this); + } + } else { + tmp_res = static_cast(res); + tmp_res->reset(); + if (OB_FAIL(tmp_res->deep_copy(*this))) { + LOG_WARN("failed to deep copy bin.", K(ret)); + } + } + + if (OB_FAIL(ret)) { + } else if (OB_FAIL(tmp_res->parse())) { + LOG_WARN("fail to parse meta.", K(ret), K(tmp_res->meta_)); + } else { + res = tmp_res; + } + } + + return ret; +} + +int ObXmlBin::get_node_count(ObMulModeNodeType filter_type, int &count) +{ + INIT_SUCC(ret); + count = 0; + + if (ObXmlUtil::is_container_tc(type())) { + int64_t entry_size = meta_.value_entry_size_ + sizeof(uint8_t); + const char* entry = meta_.data_ + meta_.value_entry_; + + bool is_attr_filter = (filter_type == M_ATTRIBUTE || filter_type == M_NAMESPACE); + + int32_t tmp = 0; + for (; tmp < meta_.count_; ++tmp) { + ObMulModeNodeType type = static_cast(entry[tmp * entry_size]); + if (filter_type == type) { + count++; + } + + bool is_attr_node = (type == M_ATTRIBUTE || type == M_NAMESPACE); + + if (!is_attr_node && is_attr_filter) { + break; + } + } + } + + return ret; +} + +int ObXmlBin::get_range(int64_t start, int64_t last, ObIArray &res, ObMulModeFilter* filter) +{ + INIT_SUCC(ret); + start = start < get_child_start() ? meta_.child_pos_ : start; + last = last >= meta_.count_ ? (meta_.count_ - 1) : last; + + ObXmlBinIterator iter = begin(); + + if (!iter.is_valid()) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("failed to create iterator.", K(ret), K(iter.error_code())); + } + + for (int64_t pos = start; iter.is_valid() && OB_SUCC(ret) && pos <= last && pos < meta_.count_; ++pos) { + ObXmlBin* tmp = iter[pos - meta_.child_pos_]; + ObXmlBin* tmp_res = nullptr; + bool is_match = true; + if (!iter.is_valid() || OB_ISNULL(tmp)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("failed to construct binary.", K(ret), K(iter.error_code())); + } else if (OB_FAIL(tmp->construct(tmp_res, allocator_))) { + LOG_WARN("failed to construct binary.", K(ret)); + } else if (OB_NOT_NULL(filter) && OB_FAIL(filter->operator()(tmp_res, is_match))) { + LOG_WARN("failed to filter.", K(ret)); + } else if (is_match && OB_FAIL(res.push_back(tmp_res))) { + LOG_WARN("failed to store result.", K(ret), K(res.count())); + } + } + + return ret; +} + +int ObXmlBin::get_index_content(int64_t index, int64_t &index_content) +{ + INIT_SUCC(ret); + int64_t index_pos = meta_.index_entry_ + index * meta_.index_entry_size_; + if (index >= meta_.count_) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get index unexpected.", K(ret), K(index), K(meta_.count_)); + } else if (OB_FAIL(ObMulModeVar::read_size_var(meta_.data_ + index_pos, meta_.index_entry_size_, &index_content))) { + LOG_WARN("failed to read index.", K(ret)); + } + return ret; +} + +int ObXmlBin::get_sorted_key_info(int64_t index, int64_t &key_len, int64_t &key_offset) +{ + INIT_SUCC(ret); + int64_t key_entry_offset_pos = meta_.key_entry_ + index * (2 * meta_.key_entry_size_); + int64_t key_entry_len_pos = key_entry_offset_pos + meta_.key_entry_size_; + if (index >= meta_.count_) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get index unexpected.", K(ret), K(index), K(meta_.count_)); + } else if (OB_FAIL(ObMulModeVar::read_size_var(meta_.data_ + key_entry_offset_pos, meta_.key_entry_size_, &key_offset))) { + LOG_WARN("failed to get key offset.", K(ret)); + } else if (OB_FAIL(ObMulModeVar::read_size_var(meta_.data_ + key_entry_len_pos, meta_.key_entry_size_, &key_len))) { + LOG_WARN("failed to get key length.", K(ret)); + } + return ret; +} + +int ObXmlBin::get_key_info(int64_t text_index, int64_t& sorted_index, int64_t &key_offset, int64_t &key_len) +{ + INIT_SUCC(ret); + if (OB_FAIL(get_index_content(text_index, sorted_index))) { + LOG_WARN("failed to get sorted index.", K(ret)); + } else { + int64_t key_entry_offset_pos = meta_.key_entry_ + sorted_index * (2 * meta_.key_entry_size_); + int64_t key_entry_len_pos = key_entry_offset_pos + meta_.key_entry_size_; + if (sorted_index >= meta_.count_) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get index unexpected.", K(ret), K(index), K(meta_.count_)); + } else if (OB_FAIL(ObMulModeVar::read_size_var(meta_.data_ + key_entry_offset_pos, meta_.key_entry_size_, &key_offset))) { + LOG_WARN("failed to get key offset.", K(ret)); + } else if (OB_FAIL(ObMulModeVar::read_size_var(meta_.data_ + key_entry_len_pos, meta_.key_entry_size_, &key_len))) { + LOG_WARN("failed to get key length.", K(ret)); + } + } + return ret; +} + +// index is sorted index +int ObXmlBin::get_value_info(int64_t index, uint8_t &type, int64_t &value_offset, int64_t &value_len) +{ + INIT_SUCC(ret); + int64_t get_type = 0; + int64_t type_pos = meta_.value_entry_ + index * (sizeof(uint8_t) + meta_.value_entry_size_); + if (index >= meta_.count_) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get index unexpected.", K(ret), K(index), K(meta_.count_)); + } else if (OB_FAIL(ObMulModeVar::read_size_var(meta_.data_ + type_pos, sizeof(uint8_t), &get_type))) { + LOG_WARN("failed to read index.", K(ret)); + } else { + type = static_cast(get_type); + } + + int64_t value_entry_offset_pos = meta_.value_entry_ + index * (sizeof(uint8_t) + meta_.value_entry_size_) + sizeof(uint8_t); + if (OB_FAIL(ret)) { + } else if (OB_FAIL(ObMulModeVar::read_size_var(meta_.data_ + value_entry_offset_pos, meta_.value_entry_size_, &value_offset))) { + LOG_WARN("failed to get value_offset.", K(ret)); + } else if (value_offset >= meta_.len_) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get value offset unexpect.", K(ret), K(value_offset), K(meta_.len_)); + } else if (index + 1 >= meta_.count_) { + value_len = meta_.total_ - value_offset; + } else { + value_entry_offset_pos += (sizeof(uint8_t) + meta_.value_entry_size_); + int64_t next_value_offset = 0; + if (OB_FAIL(ObMulModeVar::read_size_var(meta_.data_ + value_entry_offset_pos, meta_.value_entry_size_, &next_value_offset))) { + LOG_WARN("failed to get value_offset.", K(ret)); + } else if (value_offset >= meta_.len_) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get value offset unexpect.", K(ret), K(value_offset), K(meta_.len_)); + } else { + value_len = next_value_offset - value_offset; + } + } + return ret; +} + +int ObXmlBin::get_child_value_start(int64_t &value_start) +{ + INIT_SUCC(ret); + int64_t get_type = 0; + int64_t index = attribute_size(); + int64_t value_entry_offset_pos = meta_.value_entry_ + index * (sizeof(uint8_t) + meta_.value_entry_size_) + sizeof(uint8_t); + if (OB_FAIL(ret)) { + } else if (OB_FAIL(ObMulModeVar::read_size_var(meta_.data_ + value_entry_offset_pos, meta_.value_entry_size_, &value_start))) { + LOG_WARN("failed to get value_offset.", K(ret)); + } else if (value_start >= meta_.len_) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get value offset unexpect.", K(ret), K(value_start), K(meta_.len_)); + } + return ret; +} + +int32_t ObXmlBin::get_child_start() +{ + INIT_SUCC(ret); + if (meta_.child_pos_ != -1) { + } else { + int64_t entry_size = meta_.value_entry_size_ + sizeof(uint8_t); + const char* entry = meta_.data_ + meta_.value_entry_; + if (meta_.count_ - 1 <= 0) { + meta_.child_pos_ = 0; + } else { + meta_.child_pos_ = meta_.count_ - 1; + } + + int32_t tmp = 0; + for (; tmp < meta_.count_; ++tmp) { + ObMulModeNodeType type = static_cast(entry[tmp * entry_size]); + if (type == M_ATTRIBUTE || type == M_NAMESPACE) { + meta_.child_pos_ = tmp + 1; + } else { + meta_.child_pos_ = tmp; + break; + } + } + + } + return meta_.child_pos_; +} + +int ObXmlBin::get_children(ObIArray &res, ObMulModeFilter* filter) +{ + INIT_SUCC(ret); + + if (!ObXmlUtil::is_container_tc(type())) { + } else if (OB_FAIL(get_range(get_child_start(), meta_.count_, res, filter))) { + LOG_WARN("failed get range.", K(ret), K(meta_)); + } + + return ret; +} + +int ObXmlBin::get_value(ObString& value, int64_t index) +{ + INIT_SUCC(ret); + ObXmlBin *res = nullptr; + + if (OB_FAIL(parse())) { + LOG_WARN("failed to parse binary.", K(ret)); + } else if (index == -1) { + value = meta_.get_value(); + } else if (meta_.count_ - get_child_start() < index) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("failed to eval value on binary.", K(ret), K(meta_.count_), K(get_child_start())); + } else { + ObXmlBin tmp(*this, nullptr); + if (OB_FAIL(tmp.set_child_at(index))) { + LOG_WARN("failed to set child at.", K(ret), K(index)); + } else { + value = meta_.get_value(); + } + } + + return ret; +} + +int ObXmlBin::get_value(ObIMulModeBase*& value, int64_t index) +{ + INIT_SUCC(ret); + ObXmlBin *res = nullptr; + + if (OB_FAIL(parse())) { + LOG_WARN("failed to parse binary.", K(ret)); + } else if (index == -1) { + if (OB_FAIL(construct(res, allocator_))) { + LOG_WARN("failed to construct binary.", K(ret)); + } else { + value = res; + } + } else if (meta_.count_ - get_child_start() < index) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("failed to eval value on binary.", K(ret), K(meta_.count_), K(get_child_start())); + } else if (OB_FAIL(construct(res, allocator_))) { + LOG_WARN("failed to construct binary.", K(ret)); + } else if (OB_FAIL(res->set_child_at(index))) { + LOG_WARN("failed to set child at.", K(ret), K(index)); + } else { + value = res; + } + + return ret; +} + +int ObXmlBin::get_key(ObString& res, int64_t index) +{ + INIT_SUCC(ret); + + if (OB_FAIL(parse())) { + LOG_WARN("failed to parse binary.", K(ret)); + } else if (index == -1) { + res = meta_.get_key(); + } else if (meta_.count_ - get_child_start() < index) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("failed to eval value on binary.", K(ret), K(meta_.count_), K(get_child_start())); + } else { + ObXmlBin tmp(*this); + if (OB_FAIL(tmp.set_child_at(index))) { + LOG_WARN("failed to set child at.", K(ret), K(index)); + } else { + res = meta_.get_key(); + } + } + + return ret; +} + +int ObXmlBin::get_value_entry_type(uint8_t &type, int64_t index) +{ + INIT_SUCC(ret); + ObXmlBin *xml_bin = (ObXmlBin *)this; + ObXmlBinMetaParser meta = xml_bin->meta_; + int64_t get_type = 0; + const char *data = meta.data_; + int32_t count = meta.count_; + int64_t index_pos = meta.value_entry_ + index * (sizeof(uint8_t) + meta.value_entry_size_); + + if (OB_FAIL(ObMulModeVar::read_size_var(data + index_pos, sizeof(uint8_t), &get_type))) { + LOG_WARN("failed to read index.", K(ret)); + } else if (get_type >= M_MAX_TYPE || get_type < 0) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get sort index unexpected", K(ret), K(get_type), K(meta.len_)); + } else { + type = static_cast(get_type); + } + + return ret; +} + +int ObXmlBin::get_index_key(ObString& key, int64_t &origin_index, int64_t &value_offset, int64_t index) +{ + INIT_SUCC(ret); + ObXmlBin *xml_bin = (ObXmlBin *)this; + ObXmlBinMetaParser meta = xml_bin->meta_; + const char *data = buffer_.length() != 0 ? buffer_.ptr() : meta_.data_; + int32_t count = meta.count_; + int64_t val = 0; + int64_t key_length = 0; + int64_t key_offset = 0; + int64_t index_pos = meta.index_entry_ + index * meta.index_entry_size_; + + if (index >= count) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get index unexpected.", K(ret), K(index), K(count)); + } else if (OB_FAIL(ObMulModeVar::read_size_var(data + index_pos, + meta.index_entry_size_, + &origin_index))) { + LOG_WARN("failed to read index.", K(ret)); + } else if (origin_index >= count || origin_index < 0) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get sort index unexpected", K(ret), K(origin_index), K(count)); + } else { + int64_t key_entry_offset_pos = meta.key_entry_ + origin_index * (2 * meta.key_entry_size_); + int64_t key_entry_len_pos = key_entry_offset_pos + meta.key_entry_size_; + int64_t value_entry_offset_pos = meta.value_entry_ + origin_index * (sizeof(uint8_t) + + meta.value_entry_size_) + sizeof(uint8_t); + if (OB_FAIL(ObMulModeVar::read_size_var(data + key_entry_len_pos, + meta.key_entry_size_, + &key_length))) { + LOG_WARN("failed to get key length.", K(ret)); + } else if (key_length >= meta.len_) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get value offset index unexpected", K(ret), K(key_length), K(meta.total_)); + } else if (OB_FAIL(ObMulModeVar::read_size_var(data + key_entry_offset_pos, + meta.key_entry_size_, + &key_offset))) { + LOG_WARN("failed to get key offset.", K(ret)); + } else if (key_offset >= meta.total_) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get key offset unexpect.", K(ret), K(key_offset), K(meta.total_)); + } else if (OB_FAIL(ObMulModeVar::read_size_var(data + value_entry_offset_pos, + meta.value_entry_size_, + &value_offset))) { + LOG_WARN("failed to get value_offset.", K(ret)); + } else if (value_offset >= meta.total_) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get value offset unexpect.", K(ret), K(value_offset), K(meta.total_)); + } else { + key.assign_ptr(data + key_offset, key_length); + } + } + + return ret; +} + +int ObXmlBin::get_total_value(ObString& res, int64_t value_start) +{ + INIT_SUCC(ret); + ObXmlBin *xml_bin = (ObXmlBin *)this; + ObXmlBinMetaParser meta = xml_bin->meta_; + const char *data = buffer_.length() != 0 ? buffer_.ptr() : meta_.data_; + res.assign_ptr(data + value_start, meta.total_ - value_start); + return ret; +} + +ObString ObXmlBin::get_element_buffer() +{ + ObString res; + ObXmlBin *xml_bin = (ObXmlBin *)this; + res.assign_ptr(xml_bin->meta_.data_, xml_bin->meta_.len_); + return res; +} + +int ObXmlBin::get_text_value(ObString &value) +{ + INIT_SUCC(ret); + int64_t pos = sizeof(uint8_t); + const char *data = meta_.data_; + int64_t data_len = meta_.len_; + int64_t val = 0; + + if (this->type() != M_TEXT) { + // do nothing + } else if (OB_FAIL(serialization::decode_vi64(data, data_len, pos, &val))) { + LOG_WARN("failed to deserialize text string.", K(ret), K(val)); + } else if ((data_len < pos + val)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("failed to deserialize text string.", K(ret), K(data_len), K(pos), K(val)); + } else { + value.assign_ptr(data + pos, val); + } + + return ret; +} + +int ObXmlBin::get_value_start(int64_t &value_start) +{ + INIT_SUCC(ret); + int32_t count = meta_.count_; + const char *data = meta_.data_; + int64_t last_key_len = 0; + int64_t last_key_offset = 0; + int64_t last_key_len_pos = meta_.key_entry_ + (count - 1) * meta_.key_entry_size_ * 2; + int64_t last_key_offset_pos = last_key_len_pos + meta_.key_entry_size_; + if (OB_FAIL(ObMulModeVar::read_size_var(data + last_key_len_pos, + meta_.key_entry_size_, + &last_key_len))) { + LOG_WARN("failed to get key length.", K(ret)); + } else if (last_key_len > meta_.len_) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get value offset index unexpected", K(ret), K(last_key_len), K(meta_.len_)); + } else if (OB_FAIL(ObMulModeVar::read_size_var(data + last_key_offset_pos, + meta_.key_entry_size_, + &last_key_offset))) { + LOG_WARN("failed to get key offset.", K(ret)); + } else if (last_key_offset > meta_.total_) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get value offset index unexpected", K(ret), K(last_key_offset), K(meta_.total_)); + } else { + value_start = last_key_offset + last_key_len; + } + return ret; +} + +int64_t ObXmlBin::low_bound(const ObString& key) +{ + ObXmlKeyCompare comparator; + ObString tmp_key; + int64_t child_start = get_child_start(); + + int64_t low = child_start; + int64_t high = meta_.count_ - 1; + int64_t iter = low; + + int64_t step = 0; + int64_t count = high - low + 1; + + // do binary search + while (count > 0) { + iter = low; + step = count / 2; + iter += step; + + ObXmlBinKey bin_key(meta_.data_, meta_.key_entry_, iter, meta_.key_entry_size_type_); + tmp_key = bin_key.get_key(); + + int compare_result = comparator(tmp_key, key); + if (compare_result < 0) { + low = ++iter; + count -= step + 1; + } else { + count = step; + } + } + + return low; +} + +int64_t ObXmlBin::up_bound(const ObString& key) +{ + ObXmlKeyCompare comparator; + ObString tmp_key; + + int64_t child_start = get_child_start(); + + int64_t low = child_start; + int64_t high = meta_.count_ - 1; + int64_t iter = low; + + int64_t step = 0; + int64_t count = high - low + 1; + + // do binary search + while (count > 0) { + iter = low; + step = count / 2; + iter += step; + + ObXmlBinKey bin_key(meta_.data_, meta_.key_entry_, iter, meta_.key_entry_size_type_); + tmp_key = bin_key.get_key(); + + int compare_result = comparator(tmp_key, key); + if (compare_result <= 0) { + low = ++iter; + count -= step + 1; + } else { + count = step; + } + } + + return low; +} + +int ObXmlBin::get_children(const ObString& key, ObIArray& res, ObMulModeFilter* filter) +{ + INIT_SUCC(ret); + ObXmlKeyCompare comparator; + ObString tmp_key; + + int64_t low = low_bound(key); + int64_t upper = up_bound(key); + int64_t count = meta_.count_; + + if (low < count) { + for (int64_t iter = low; OB_SUCC(ret) && iter < upper && iter < count; ++iter) { + ObXmlBinIndexMeta index_meta(meta_.data_ + meta_.index_entry_, iter, meta_.index_entry_size_type_); + ObXmlBin* tmp_res = nullptr; + + bool is_match = true; + if (OB_FAIL(construct(tmp_res, allocator_))) { + LOG_WARN("failed to construct binary.", K(ret)); + } else if (OB_FAIL(tmp_res->set_sorted_at(iter))) { + LOG_WARN("failed to set at.", K(ret), K(index_meta.get_index())); + } else if (OB_NOT_NULL(filter) && OB_FAIL(filter->operator()(tmp_res, is_match))) { + LOG_WARN("failed to filter.", K(ret)); + } else if (is_match && OB_FAIL(res.push_back(tmp_res))) { + LOG_WARN("fail to store scan result", K(ret), K(res.count())); + } + } + } + + return ret; +} + +bool ObXmlBin::has_flags(ObMulModeNodeFlag flag) +{ + bool res = false; + if (flag & XML_DECL_FLAG) { + res = meta_.has_xml_decl_; + } else if (flag & XML_ENCODING_EMPTY_FLAG) { + res = meta_.encoding_val_empty_; + } + + return res; +} + +ObIMulModeBase* ObXmlBin::at(int64_t pos, ObIMulModeBase* buffer) +{ + INIT_SUCC(ret); + ObXmlBin *res = nullptr; + + if (OB_NOT_NULL(buffer)) { + res = static_cast(buffer); + } + + if (OB_FAIL(construct(res, allocator_))) { + res = nullptr; + LOG_WARN("failed to construct binary.", K(ret)); + } else if (OB_FAIL(res->set_child_at(pos))) { + res = nullptr; + LOG_WARN("failed to set child at.", K(ret), K(pos)); + } + + return res; +} + +ObIMulModeBase* ObXmlBin::attribute_at(int64_t pos, ObIMulModeBase* buffer) +{ + INIT_SUCC(ret); + ObXmlBin *res = nullptr; + + if (OB_NOT_NULL(buffer)) { + res = static_cast(buffer); + } + + if (OB_FAIL(construct(res, allocator_))) { + res = nullptr; + LOG_WARN("failed to construct binary.", K(ret)); + } else if (OB_FAIL(res->set_at(pos))) { + res = nullptr; + LOG_WARN("failed to set child at.", K(ret), K(pos)); + } + + return res; +} + +ObIMulModeBase* ObXmlBin::sorted_at(int64_t pos, ObIMulModeBase* buffer) +{ + INIT_SUCC(ret); + ObXmlBin *res = nullptr; + res = static_cast(buffer); + + if (OB_FAIL(construct(res, allocator_))) { + res = nullptr; + LOG_WARN("failed to construct binary.", K(ret)); + } else if (OB_FAIL(res->set_sorted_at(pos))) { + res = nullptr; + LOG_WARN("failed to set child at.", K(ret), K(pos)); + } + + return res; +} + +int ObXmlBin::get_attribute(ObIArray& res, ObMulModeNodeType filter_type, int32_t flags) +{ + INIT_SUCC(ret); + + if (OB_FAIL(parse())) { + LOG_WARN("failed to parse.", K(ret), K(type())); + } else if (filter_type == M_NAMESPACE) { + int64_t attribute_num = attribute_size(); + for (int pos = 0; OB_SUCC(ret) && pos < attribute_num ; ++pos) { + ObXmlBin buff(*this); + ObXmlBin* tmp = &buff; + + if (OB_FAIL(construct(tmp, allocator_))) { + LOG_WARN("failed to dup bin.", K(ret)); + } else if (OB_FAIL(tmp->set_at(pos))) { + LOG_WARN("failed to set at child.", K(ret)); + } else if (tmp->type() == M_NAMESPACE) { + bool is_match = true; + if (flags) { + ObString prefix; + if (OB_FAIL(tmp->get_key(prefix))) { + LOG_WARN("failed to eval key.", K(ret)); + } else if (prefix.compare(ObXmlConstants::XMLNS_STRING)) { + is_match = false; + } + } + + if (OB_SUCC(ret) && is_match) { + ObXmlBin* dup = nullptr; + if (OB_FAIL(tmp->construct(dup, allocator_))) { + LOG_WARN("failed to dup bin.", K(ret)); + } else if (OB_FAIL(res.push_back(dup))) { + LOG_WARN("fail to store bin ptr", K(ret)); + } + } + } else if (tmp->type() == M_ATTRIBUTE) { + } else { + break; + } + } + } else if (filter_type == M_ATTRIBUTE) { + int64_t attribute_num = attribute_size(); + for (int pos = 0; OB_SUCC(ret) && pos < attribute_num ; ++pos) { + ObXmlBin* tmp = nullptr; + if (OB_FAIL(construct(tmp, allocator_))) { + LOG_WARN("failed to dup bin.", K(ret)); + } else if (OB_FAIL(tmp->set_at(pos))) { + LOG_WARN("failed to set at child.", K(ret)); + } else if (tmp->type() == M_ATTRIBUTE) { + if (OB_FAIL(res.push_back(tmp))) { + LOG_WARN("failed to store result.", K(ret), K(res.count())); + } + } else if (tmp->type() == M_NAMESPACE) { + } else { + break; + } + } + } else { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("failed to get attr list", K(ret), K(filter_type)); + } + + return ret; +} + +int ObXmlBin::node_ns_value(ObString& prefix, ObString& ns_value) +{ + INIT_SUCC(ret); + bool found = false; + + int64_t attribute_num = attribute_size(); + for (int attr_pos = 0; OB_SUCC(ret) && attr_pos < attribute_num; ++attr_pos) { + ObString tmp_prefix; + ObXmlBin tmp(*this); + if (OB_FAIL(tmp.set_at(attr_pos))) { + LOG_WARN("failed to set at child.", K(ret)); + } else if (tmp.type() == M_NAMESPACE) { + if (OB_FAIL(tmp.get_key(tmp_prefix))) { + LOG_WARN("failed to get ns key.", K(ret)); + } else if (prefix.empty()) { + if (tmp_prefix.compare(ObXmlConstants::XMLNS_STRING) == 0) { + ns_value = tmp.meta_.get_value(); + found = true; + break; + } + } else if (tmp_prefix.compare(prefix) == 0) { + ns_value = tmp.meta_.get_value(); + found = true; + break; + } + } else if (tmp.type() == M_ATTRIBUTE || tmp.type() == M_INSTRUCT) { + } else { + break; + } + } + return ret; +} + +int ObXmlBin::get_ns_value(ObStack& stk, ObString& ns_value, ObIMulModeBase* extend) +{ + INIT_SUCC(ret); + bool found = false; + int64_t size = stk.size(); + ObString prefix = get_prefix(); + + if (type() == M_ATTRIBUTE && prefix.empty()) { + } else if (prefix.compare(ObXmlConstants::XML_STRING) == 0) { + ns_value = ObXmlConstants::XML_NAMESPACE_SPECIFICATION_URI; + } else if (OB_FAIL(node_ns_value(prefix, ns_value))) { + LOG_WARN("failed get node ns value.", K(ret)); + } else if (!ns_value.empty()) { + } else if (size > 0) { + for (int64_t pos = size - 1; !found && OB_SUCC(ret) && pos >= 0; --pos) { + ObXmlBin* current = static_cast(stk.at(pos)); + + if (OB_FAIL(current->node_ns_value(prefix, ns_value))) { + LOG_WARN("failed get node ns value.", K(ret)); + } else if (!ns_value.empty()) { + found = true; + break; + } + } + + // if didn't find ns definition after traversing ancestor nodes, check exrend area + if (ns_value.empty() && size > 0) { + // get root node + ObXmlBin* extend_bin; + if (OB_ISNULL(extend)) { // without extend, its normal + } else if (OB_ISNULL(extend_bin = static_cast(extend))) { + ret = OB_BAD_NULL_ERROR; + LOG_WARN("should not be null", K(ret)); + } else if (OB_FAIL(extend_bin->node_ns_value(prefix, ns_value))) { + LOG_WARN("failed get node ns value.", K(ret)); + } + } + + } + return ret; +} + +int ObXmlBin::get_ns_value(const ObString& prefix, ObString& ns_value, int& ans_idx) +{ + INIT_SUCC(ret); + ObString tmp_prefix; + + if (prefix.compare(ObXmlConstants::XML_STRING) == 0) { + ns_value = ObXmlConstants::XML_NAMESPACE_SPECIFICATION_URI; + } else { + bool found = false; + int64_t attribute_num = attribute_size(); + for (int pos = 0; OB_SUCC(ret) && pos < attribute_num && !found ; ++pos) { + ObXmlBin tmp(*this); + ObString tmp_prefix; + if (OB_FAIL(tmp.set_at(pos))) { + LOG_WARN("failed to set at child.", K(ret)); + } else if (tmp.type() == M_NAMESPACE) { + if (OB_FAIL(tmp.get_key(tmp_prefix))) { + LOG_WARN("failed to get ns key.", K(ret)); + } else if (prefix.empty()) { + if (tmp_prefix.compare(ObXmlConstants::XMLNS_STRING) == 0) { + tmp.get_value(ns_value); + found = true; + ans_idx = pos; + break; + } + } else if (tmp_prefix.compare(prefix) == 0) { + tmp.get_value(ns_value); + found = true; + ans_idx = pos; + break; + } + } else if (tmp.type() == M_ATTRIBUTE || tmp.type() == M_INSTRUCT) { + } else { + break; + } + } + } + + return ret; +} + +int ObXmlBin::get_attribute(ObIMulModeBase*& res, ObMulModeNodeType filter_type, const ObString& ns_name, const ObString &node_key) +{ + INIT_SUCC(ret); + res = nullptr; + ObString prefix; + + if (OB_FAIL(parse())) { + LOG_WARN("failed to parse.", K(ret), K(type())); + } else if (filter_type == M_NAMESPACE || filter_type == M_ATTRIBUTE) { + int64_t attribute_num = attribute_size(); + bool found = false; + ObXmlBin tmp(*this); + + for (int pos = 0; OB_SUCC(ret) && !found && pos < attribute_num ; ++pos) { + ObString tmp_key; + tmp.deep_copy(*this); + if (OB_FAIL(tmp.set_at(pos))) { + LOG_WARN("failed to set at child.", K(ret)); + } else if (OB_FAIL(tmp.get_key(tmp_key))) { + LOG_WARN("failed to eval key.", K(ret)); + } else if (tmp.type() == M_NAMESPACE) { + if (filter_type != M_NAMESPACE) { + } else if (tmp_key.compare(ns_name) == 0) { + found = true; + break; + } + } else if (tmp.type() == M_ATTRIBUTE) { + if (filter_type != M_ATTRIBUTE) { + } else if (node_key.compare(tmp_key) == 0) { + ObString ns_value; + if (OB_FAIL(tmp.get_ns_value(ns_value))) { + LOG_WARN("failed to get valid namesapce value.", K(ret)); + } else if (!ns_name.empty() && (ns_value.empty() || (!ns_value.empty() && ns_value.compare(ns_name)))) { + } else if (ns_name.empty() && !ns_value.empty()) { + found = true; + break; + } else { + found = true; + break; + } + } + } else if (tmp.type() == M_INSTRUCT) { + } else { + break; + } + } + + if (OB_SUCC(ret) && found) { + ObXmlBin* tmp_res = nullptr; + if (OB_FAIL(tmp.construct(tmp_res, ctx_->allocator_))) { + LOG_WARN("failed to dup res.", K(ret)); + } else{ + res = tmp_res; + } + } + } + + return ret; +} + +int ObXmlBin::set_child_at(int64_t pos) +{ + return set_at(get_child_start() + pos); +} + +int ObXmlBin::set_sorted_at(int64_t sort_index) +{ + INIT_SUCC(ret); + + meta_.sort_idx_ = sort_index; + if (OB_FAIL(parse())) { + LOG_WARN("failed to parse header.", K(ret)); + } else if (meta_.count_ <= sort_index || sort_index < 0) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("failed to set iter on child.", K(ret), K(sort_index), K(meta_.count_)); + } else { + uint64_t value_start = 0; + uint64_t key_offset = 0; + uint64_t key_len = 0; + + if (OB_FAIL(ObMulModeVar::read_var(meta_.data_ + meta_.get_value_offset(sort_index) + sizeof(uint8_t), + meta_.value_entry_size_type_, &value_start))) { + LOG_WARN("failed to read value offset.", K(ret), K(meta_)); + } else if (meta_.len_ <= value_start) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("failed to parser value.", K(meta_.len_), K(value_start)); + } else if (OB_FAIL(ObMulModeVar::read_var(meta_.data_ + meta_.get_key_offset(sort_index), meta_.key_entry_size_type_, &key_offset)) + || OB_FAIL(ObMulModeVar::read_var(meta_.data_ + meta_.get_key_offset(sort_index) + meta_.key_entry_size_, meta_.key_entry_size_type_, &key_len))) { + LOG_WARN("failed to read key index.", K(meta_), K(ret)); + } else if (key_offset + key_len >= meta_.len_) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("failed to get key index.", K(meta_), K(key_offset), K(key_len)); + } else { + ObString tmp_key(key_len, meta_.data_ + key_offset); + if (OB_FAIL(meta_.parser(meta_.data_ + value_start, meta_.len_ - value_start))) { + LOG_WARN("failed to parser value header.", K(ret), K(meta_)); + } else { + meta_.key_ptr_ = tmp_key.ptr(); + meta_.key_len_ = tmp_key.length(); + // set at child, update len_ to child len + meta_.len_ = meta_.total_; + } + } + } + return ret; +} + +int ObXmlBin::set_at(int64_t pos) +{ + INIT_SUCC(ret); + + meta_.idx_ = pos; + if (OB_FAIL(parse())) { + LOG_WARN("failed to parse header.", K(ret)); + } else if (meta_.count_ <= pos || pos < 0) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("failed to set iter on child.", K(ret), K(pos), K(meta_.count_)); + } else { + uint64_t value_start = 0; + uint64_t key_offset = 0; + uint64_t key_len = 0; + int64_t sort_index = 0; + if (OB_FAIL(ObMulModeVar::read_size_var(meta_.data_ + meta_.get_index(pos), meta_.index_entry_size_, &sort_index))) { + LOG_WARN("failed to read sort index.", K(ret), K(meta_), K(pos)); + } else if (sort_index >= meta_.count_) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("failed to parser index.", K(ret), K(sort_index), K(meta_.count_)); + } else if (OB_FAIL(ObMulModeVar::read_var(meta_.data_ + meta_.get_value_offset(sort_index) + sizeof(uint8_t), + meta_.value_entry_size_type_, &value_start))) { + LOG_WARN("failed to read value offset.", K(ret), K(meta_)); + } else if (meta_.len_ <= value_start) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("failed to parser value.", K(meta_.len_), K(value_start)); + } else if (OB_FAIL(ObMulModeVar::read_var(meta_.data_ + meta_.get_key_offset(sort_index), meta_.key_entry_size_type_, &key_offset)) + || OB_FAIL(ObMulModeVar::read_var(meta_.data_ + meta_.get_key_offset(sort_index) + meta_.key_entry_size_, meta_.key_entry_size_type_, &key_len))) { + LOG_WARN("failed to read key index.", K(meta_), K(ret)); + } else if (key_offset + key_len >= meta_.len_) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("failed to get key index.", K(meta_), K(key_offset), K(key_len)); + } else { + ObString tmp_key(key_len, meta_.data_ + key_offset); + if (OB_FAIL(meta_.parser(meta_.data_ + value_start, meta_.len_ - value_start))) { + LOG_WARN("failed to parser value header.", K(ret), K(meta_)); + } else { + meta_.sort_idx_ = sort_index; + meta_.key_ptr_ = tmp_key.ptr(); + meta_.key_len_ = tmp_key.length(); + // set at child, update len_ to child len + meta_.len_ = meta_.total_; + } + } + } + return ret; +} + + +int ObXmlBin::deep_copy(ObXmlBin& from) +{ + INIT_SUCC(ret); + meta_ = from.meta_; + return ret; +} + +int ObXmlBin::get_raw_binary(common::ObString &out, ObIAllocator *allocator) +{ + INIT_SUCC(ret); + ObIAllocator *alloc = allocator == nullptr ? allocator_ : allocator; + if (OB_ISNULL(alloc)) { + char* buf = static_cast(alloc->alloc(meta_.len_ + 1)); + if (OB_ISNULL(buf)) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("failed to allocate memory.", K(ret), K(meta_.len_)); + } else { + out.assign_buffer(buf, meta_.len_ + 1); + if (OB_FAIL(out.write(meta_.data_, meta_.len_))) { + LOG_WARN("failed to write buffer.", K(ret), K(meta_.len_)); + } + } + } else { + out.assign_ptr(meta_.data_, meta_.len_); + } + + return ret; +} + +bool ObXmlBin::is_equal_node(const ObIMulModeBase* other) +{ + bool res = false; + if (OB_ISNULL(other)) { + } else if (other->is_binary()) { + ObIMulModeBase* ref_other = const_cast(other); + ObXmlBin* tmp = static_cast(ref_other); + res = tmp->meta_ == meta_; + } + return res; +} + +bool ObXmlBin::is_node_before(const ObIMulModeBase* other) +{ + bool res = false; + if (OB_ISNULL(other)) { + } else if (other->is_binary()) { + ObIMulModeBase* ref_other = const_cast(other); + ObXmlBin* tmp = static_cast(ref_other); + res = tmp->meta_ < meta_; + } + return res; +} +ObXmlBin::iterator ObXmlBin::begin() +{ + INIT_SUCC(ret); + ObIAllocator *allocator = allocator_; + ObXmlBin::iterator iter(this); + + if (OB_FAIL(meta_.parser())) { + LOG_WARN("failed to parse meta header.", K(meta_), K(ret)); + iter.error_code_ = ret; + iter.is_valid_ = false; + } else { + iter.cur_pos_ = 0; + iter.total_ = meta_.count_; + iter.meta_header_ = iter.cur_node_.meta_; + iter.is_valid_ = true; + } + + return iter; +} + +ObXmlBin::iterator ObXmlBin::end() +{ + INIT_SUCC(ret); + ObXmlBin::iterator iter(this); + + if (OB_FAIL(meta_.parser())) { + iter.error_code_ = ret; + iter.is_valid_ = false; + LOG_WARN("failed to parse meta header.", K(meta_), K(ret)); + } else { + iter.total_ = meta_.count_; + iter.cur_pos_ = iter.total_; + iter.meta_header_ = iter.cur_node_.meta_; + iter.is_valid_ = true; + } + + return iter; +} + +void ObXmlBinIterator::set_range(int64_t start, int64_t finish) +{ + cur_pos_ = start; + if (finish < total_) { + total_ = finish; + } +} + +bool ObXmlBinIterator::end() +{ + return cur_pos_ >= total_; +} + +bool ObXmlBinIterator::begin() +{ + return cur_pos_ <= 0; +} + +ObXmlBin* ObXmlBinIterator::current() +{ + ObXmlBin* res = nullptr; + INIT_SUCC(ret); + if (!is_valid()) { + } else { + res = &cur_node_; + } + + return res; +} + +ObXmlBin* ObXmlBinIterator::operator*() +{ + return operator[](cur_pos_); +} + +ObXmlBin* ObXmlBinIterator::operator->() +{ + return operator*(); +} + +ObXmlBin* ObXmlBinIterator::operator[](int64_t pos) +{ + INIT_SUCC(ret); + ObXmlBin* res = nullptr; + + if (!is_valid()) { + } else { // + cur_node_.meta_ = meta_header_; + if (is_sorted_iter_ && OB_FAIL(cur_node_.set_sorted_at(pos))) { + LOG_WARN("failed to set sorted iter at.", K(cur_node_.meta_), K(pos), K(ret)); + is_valid_ = false; + error_code_ = ret; + } else if (!is_sorted_iter_ && OB_FAIL(cur_node_.set_at(pos))) { + LOG_WARN("failed to set iter at.", K(cur_node_.meta_), K(pos), K(ret)); + is_valid_ = false; + error_code_ = ret; + } else { + res = &cur_node_; + } + } + + return res; +} + +ObXmlBinIterator& ObXmlBinIterator::next() +{ + cur_pos_++; + return *this; +} + +ObXmlBinIterator& ObXmlBinIterator::operator++() +{ + return next(); +} + +ObXmlBinIterator& ObXmlBinIterator::operator--() +{ + cur_pos_--; + return *this; +} + +ObXmlBinIterator ObXmlBinIterator::operator++(int) +{ + ObXmlBinIterator iter(*this); + cur_pos_++; + return iter; +} + +ObXmlBinIterator ObXmlBinIterator::operator--(int) +{ + ObXmlBinIterator iter(*this); + cur_pos_++; + return iter; +} + +bool ObXmlBinIterator::operator<(const ObXmlBinIterator& iter) +{ + return (meta_header_ == iter.meta_header_ && cur_pos_ < iter.cur_pos_); +} + +bool ObXmlBinIterator::operator>(const ObXmlBinIterator& iter) +{ + return (meta_header_ == iter.meta_header_ && cur_pos_ > iter.cur_pos_); +} + +ObXmlBinIterator& ObXmlBinIterator::operator-=(int size) +{ + cur_pos_ -= size; + return *this; +} + +ObXmlBinIterator& ObXmlBinIterator::operator+=(int size) +{ + cur_pos_ += size; + return *this; +} + +ObXmlBinIterator ObXmlBinIterator::operator+(int size) +{ + ObXmlBinIterator iter(*this); + iter.cur_pos_ += size; + return iter; +} + + +ObXmlBinIterator ObXmlBinIterator::operator-(int size) +{ + ObXmlBinIterator iter(*this); + iter.cur_pos_ -= size; + return iter; +} + +int64_t ObXmlBinIterator::operator-(const ObXmlBinIterator& iter) +{ + return cur_pos_ - iter.cur_pos_; +} + +bool ObXmlBinIterator::operator==(const ObXmlBinIterator& rhs) +{ + return (meta_header_ == rhs.meta_header_ && cur_pos_ == rhs.cur_pos_); +} + +bool ObXmlBinIterator::operator!=(const ObXmlBinIterator& rhs) +{ + return !(*this == rhs); +} + +bool ObXmlBinIterator::operator<=(const ObXmlBinIterator& rhs) +{ + return (meta_header_ == rhs.meta_header_ && cur_pos_ <= rhs.cur_pos_); +} + + +int ObXmlBinMerge::init_merge_info(ObBinMergeCtx& ctx, ObIMulModeBase& origin, + ObIMulModeBase& patch, ObIMulModeBase& res) +{ + INIT_SUCC(ret); + // use for xml binary merge, make sure is xml binary + if (origin.data_type() != OB_XML_TYPE || patch.data_type() != OB_XML_TYPE ||origin.is_tree() || patch.is_tree()) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("should not be binary", K(origin.is_tree()), K(patch.is_tree()), K(ret)); + } else { + ObXmlBin* bin_res = static_cast(&res); + ObXmlBin* bin_origin = static_cast(&origin); + ObXmlBin* bin_patch = static_cast(&patch); + if (OB_ISNULL(bin_res) || OB_ISNULL(bin_origin)|| OB_ISNULL(bin_patch)) { + ret = OB_BAD_NULL_ERROR; + LOG_WARN("should not be null", K(ret)); + } else if (!bin_origin->meta_.parsed_ && OB_FAIL(bin_origin->parse())) { + LOG_WARN("fail to parse", K(ret)); + } else if (!bin_patch->meta_.parsed_ && OB_FAIL(bin_patch->parse())) { + LOG_WARN("fail to parse", K(ret)); + } else if (OB_FAIL(bin_res->buffer_.reserve(estimated_length(false, ctx, origin, patch)))) { + LOG_WARN("fail to reserve", K(ret)); + } else { + int64_t ns_num = patch.attribute_size(); + // if only merge attribute, we only resort attribute key + if (patch.size() == 0 && patch.attribute_size() > 0) { + ctx.only_merge_ns_ = 1; + } + ObXmlBin bin_buffer; + ObIMulModeBase* cur = nullptr; + // init delete vector + for (int i = 0; OB_SUCC(ret) && i < ns_num; ++i) { + cur = patch.attribute_at(i, &bin_buffer); + if (OB_ISNULL(cur)) { + } else if (cur->type() != ObMulModeNodeType::M_NAMESPACE) { + ctx.only_merge_ns_ = 0; + } + + if (OB_FAIL(ctx.del_map_.push(false))) { + LOG_WARN("failed to init delete map.", K(ret)); + } + } + ctx.buffer_ = &bin_res->buffer_; + ctx.reuse_del_map_ = 1; + ctx.reserve_ = 0; + ctx.retry_count_ = 0; + ctx.retry_len_ = 0; + } + } + return ret; +} + +// first, delete duplicate ns, then check: +// 1. when there is no valid ns in patch, return false; +// 2. when origin ns didn't defined in patch, and origin have no elemen child, return false; +int ObXmlBinMerge::if_need_merge(ObBinMergeCtx& ctx, ObIMulModeBase& origin, + ObIMulModeBase& patch, ObIMulModeBase& res, bool& need_merge) +{ + INIT_SUCC(ret); + need_merge = true; + if (origin.type() == M_DOCUMENT || origin.type() == M_CONTENT) { + int64_t origin_children = origin.size(); + bool has_element = false; + ObIMulModeBase* cur = nullptr; + ObXmlBin bin_buffer; + for (int64_t i = 0; OB_SUCC(ret) && !has_element && i < origin_children; i++) { + cur = origin.at(i, &bin_buffer); + if (OB_NOT_NULL(cur) && cur->type() == M_ELEMENT) { + has_element = true; + } + } + need_merge = has_element; + } else if (origin.type() == M_ELEMENT && patch.type() == M_ELEMENT) { + int64_t origin_ns_num = origin.attribute_size(); + ObXmlBin bin_buffer; + ObIMulModeBase* cur = nullptr; + ctx.defined_ns_idx_.reset(); + + // check if has duplicate ns definition + for (int64_t i = 0; i < origin_ns_num && OB_SUCC(ret); i++) { + cur = origin.attribute_at(i, &bin_buffer); + ObString tmp_ns_key; + ObString tmp_ns_value; + int ans_idx = -1; + if (OB_ISNULL(cur)) { + ret = OB_BAD_NULL_ERROR; + LOG_WARN("should not be null", K(ret)); + } else if (cur->type() == ObMulModeNodeType::M_NAMESPACE) { + cur->get_key(tmp_ns_key); + patch.get_ns_value(tmp_ns_key, tmp_ns_value, ans_idx); + if (0 <= ans_idx && !tmp_ns_key.empty() && ans_idx < ctx.del_map_.size()) { + ret = ctx.del_map_.set(ans_idx, true); + } + } + } + + if (OB_FAIL(ret)) { + } else if (ctx.defined_ns_idx_.size() == 0 && ctx.is_all_deleted()) { + need_merge = false; + } else { + // if origin ns defined in patch + ObString ns_prefix = origin.get_prefix(); + ObString tmp_ns_value; + int ans_idx = -1; + ret = patch.get_ns_value(ns_prefix, tmp_ns_value, ans_idx); + if (OB_FAIL(ret)) { + } else if (0 <= ans_idx && ans_idx < ctx.del_map_.size() && !(ctx.del_map_.at(ans_idx))) { + int origin_ans_idx = -1; + ret = ctx.del_map_.set(ans_idx, true); + if (OB_FAIL(ret)) { + } else if (origin.type() == M_ELEMENT + && OB_FAIL(origin.get_ns_value(ns_prefix, tmp_ns_value, origin_ans_idx))) { + LOG_WARN("fail to check ns definition in origin", K(ret)); + } else if (origin_ans_idx == -1) { + ctx.defined_ns_idx_.push(ans_idx); + } + } + if (OB_FAIL(ret)) { + } else { + for (int64_t i = 0; i < origin_ns_num && OB_SUCC(ret); i++) { + cur = origin.attribute_at(i, &bin_buffer); + ObString tmp_ns_key; + ObString tmp_ns_value; + int ans_idx = -1; + if (OB_ISNULL(cur)) { + ret = OB_BAD_NULL_ERROR; + LOG_WARN("should not be null", K(ret)); + } else if (cur->type() == ObMulModeNodeType::M_ATTRIBUTE) { + // check attribute ns + tmp_ns_key = cur->get_prefix(); + if (!tmp_ns_key.empty()) { + ret = patch.get_ns_value(tmp_ns_key, tmp_ns_value, ans_idx); + if (OB_SUCC(ret) && 0 <= ans_idx && ans_idx < ctx.del_map_.size() && !(ctx.del_map_.at(ans_idx))) { + if (OB_SUCC(ctx.del_map_.set(ans_idx, true))) { + ret = ctx.defined_ns_idx_.push(ans_idx); + } + } + } // default attribute do not add default ns + } + } + } + if (OB_FAIL(ret)) { + } else if (ctx.defined_ns_idx_.size() == 0) { + int64_t origin_children = origin.size(); + bool has_element = false; + for (int64_t i = 0; OB_SUCC(ret) && !has_element && i < origin_children; i++) { + cur = origin.at(i, &bin_buffer); + if (OB_NOT_NULL(cur) && cur->type() == M_ELEMENT) { + has_element = true; + } + } + need_merge = has_element; + } else { + need_merge = true; + } + } // has valid ns + } else { + // not element, do not need merge ns + need_merge = false; + } + return ret; +} + +bool ObXmlBinMerge::if_need_append_key(ObBinMergeCtx& ctx, ObIMulModeBase& origin, + ObIMulModeBase& patch, ObIMulModeBase& res) +{ + return ctx.defined_ns_idx_.size() > 0; +} + +// for xml, must be append origin as res +// but for json, may be patch or origin +int ObXmlBinMerge::append_res_without_merge(ObBinMergeCtx& ctx, ObIMulModeBase& origin, + ObIMulModeBase& patch, ObIMulModeBase& res) +{ + return append_value_without_merge(ctx, origin, res); +} + +int ObXmlBinMerge::append_value_without_merge(ObBinMergeCtx& ctx, ObIMulModeBase& value, ObIMulModeBase& res) +{ + INIT_SUCC(ret); + ObXmlBin* bin_val = static_cast(&value); + ObXmlBin* bin_res = static_cast(&res); + if (OB_ISNULL(bin_val) || OB_ISNULL(bin_res)) { + ret = OB_BAD_NULL_ERROR; + LOG_WARN("should not be null", K(ret)); + } else if (OB_FAIL(bin_res->buffer_.reserve(bin_val->meta_.len_))) { + LOG_WARN("fail to reserve val", K(ret)); + } else if (OB_FAIL(bin_res->buffer_.append(bin_val->meta_.data_, bin_val->meta_.len_))) { + LOG_WARN("fail to append val", K(ret)); + } + return ret; +} + +int ObXmlBinMergeMeta::init_merge_meta(ObBinMergeCtx& ctx, ObIMulModeBase& origin, ObMulBinHeaderSerializer& header, bool with_patch) +{ + INIT_SUCC(ret); + header_ = &header; + attr_count_ = with_patch ? (origin.attribute_size() + ctx.defined_ns_idx_.size()) : origin.attribute_size(); + child_count_ = origin.size(); + int64_t children_count = attr_count_ + child_count_; + header.count_ = children_count; + header_start_ = header_->start(); + int64_t header_len = header_->header_size(); + bool element_header = true; + if (header.type_ == M_DOCUMENT || header.type_ == M_CONTENT) { + new (&doc_header_) ObXmlDocBinHeader(origin.get_version(), + origin.get_encoding(), + origin.get_encoding_flag(), + origin.get_standalone(), + origin.has_xml_decl()); + new(&doc_header_.elem_header_) ObXmlElementBinHeader(origin.get_unparse(), origin.get_prefix()); + header_len += doc_header_.header_size(); + element_header = false; + } else { + new(&ele_header_) ObXmlElementBinHeader(origin.get_unparse(), origin.get_prefix()); + header_len += ele_header_.header_size(); + } + index_start_ = header_->start() + header_len; + index_entry_size_ = header_->count_var_size_; + char* start_ = header.buffer_->ptr() + index_start_; + // offset start + key_entry_start_ = index_start_ + children_count * index_entry_size_; + key_entry_size_ = header_->get_entry_var_size(); + + value_entry_start_ = children_count * (key_entry_size_ * 2) + key_entry_start_; + value_entry_size_ = header_->get_entry_var_size(); + + key_start_ = (value_entry_size_ + sizeof(uint8_t)) *children_count + value_entry_start_; + if (OB_FAIL(ret)) { + } else if (OB_ISNULL(ctx.buffer_)) { + ret = OB_BAD_NULL_ERROR; + LOG_WARN("should not be null", K(ret)); + } else if (element_header) { + if (OB_FAIL(ele_header_.serialize(*ctx.buffer_))) { + LOG_WARN("fail to serialize element header", K(ret)); + } + } else if (OB_FAIL(doc_header_.serialize(*ctx.buffer_))) { + LOG_WARN("fail to serialize element header", K(ret)); + } + return ret; +} + +void ObXmlBinMergeMeta::set_index_entry(int64_t origin_index, int64_t sort_index) +{ + int64_t offset = index_start_ + origin_index * index_entry_size_; + char* write_buf = header_->buffer_->ptr() + offset; + ObMulModeVar::set_var(sort_index, header_->get_count_var_size_type(), write_buf); +} + +void ObXmlBinMergeMeta::set_key_entry(int64_t entry_idx, int64_t key_offset, int64_t key_len) +{ + int64_t offset = key_entry_start_ + entry_idx * (key_entry_size_ * 2); + char* write_buf = header_->buffer_->ptr() + offset; + ObMulModeVar::set_var(key_offset, header_->get_entry_var_size_type(), write_buf); + write_buf += key_entry_size_; + ObMulModeVar::set_var(key_len, header_->get_entry_var_size_type(), write_buf); +} + +void ObXmlBinMergeMeta::set_value_entry(int64_t entry_idx, uint8_t type, int64_t value_offset) +{ + int64_t offset = value_entry_start_ + entry_idx * (value_entry_size_ + sizeof(uint8_t)); + char* write_buf = header_->buffer_->ptr() + offset; + *reinterpret_cast(write_buf) = type; + ObMulModeVar::set_var(value_offset, header_->get_entry_var_size_type(), write_buf + sizeof(uint8_t)); +} + +void ObXmlBinMergeMeta::set_value_offset(int64_t entry_idx, int64_t value_offset) +{ + int64_t offset = value_entry_start_ + entry_idx * (value_entry_size_ + sizeof(uint8_t)); + char* write_buf = header_->buffer_->ptr() + offset; + ObMulModeVar::set_var(value_offset, header_->get_entry_var_size_type(), write_buf + sizeof(uint8_t)); +} + +int ObXmlBinMerge::reserve_meta(ObMulBinHeaderSerializer& header) +{ + INIT_SUCC(ret); + if (OB_ISNULL(header.buffer_)) { + ret = OB_BAD_NULL_ERROR; + LOG_WARN("should not be null", K(ret)); + } else { + int64_t pos = header.buffer_->length(); + uint32_t reserve_size = merge_meta_.key_start_ - merge_meta_.index_start_; + if (OB_FAIL(merge_meta_.header_->buffer_->reserve(reserve_size))) { + LOG_WARN("failed to reserve buffer.", K(ret), K(reserve_size)); + } else { + header.buffer_->set_length(pos + reserve_size); + } + } + return ret; +} + +int ObXmlBinMerge::append_key_without_merge(ObBinMergeCtx& ctx, ObIMulModeBase& origin, + ObMulBinHeaderSerializer& header, ObIMulModeBase& res) +{ + INIT_SUCC(ret); + if (ctx.only_merge_ns_) { + // in this case, we don't need add ns definition + if (OB_FAIL(merge_meta_.init_merge_meta(ctx, origin, header, false))) { + LOG_WARN("fail to init element header", K(ret)); + } else if (OB_FAIL(reserve_meta(header))) { + LOG_WARN("failed to reserve meta.", K(ret)); + } else { + ObXmlBin* bin_origin = static_cast(&origin); + ObXmlBin* bin_res = static_cast(&res); + int64_t children_count = origin.attribute_count() + origin.size(); + uint64_t key_start = (bin_origin->meta_.get_value_entry_size() + sizeof(uint8_t)) *children_count + bin_origin->meta_.value_entry_; + uint64_t res_key_offset = merge_meta_.key_start_; + int64_t value_start = 0; + int64_t child_value_start = 0; + uint64_t res_value_offset = 0; + if (OB_FAIL(bin_origin->get_value_start(value_start)) + || OB_FAIL(bin_origin->get_child_value_start(child_value_start))) { + LOG_WARN("failed get origin value start.", K(value_start)); + } else if (value_start < key_start || child_value_start < value_start) { + // value_start == key_start: when there is no k-v child + // child_value_start == value_start: when there is no attribute child + ret = OB_ERR_UNEXPECTED; + LOG_WARN("failed get total key len.", K(value_start)); + } else { + res_value_offset = merge_meta_.key_start_ + (value_start - key_start); + } + + // same size of count and entry, copy directly + if (OB_FAIL(ret)) { + } else if (bin_origin->meta_.key_entry_size_ == header.get_entry_var_size() + && bin_origin->meta_.index_entry_size_ == header.get_count_var_size()) { + MEMCPY(header.buffer_->ptr() + merge_meta_.index_start_, + bin_origin->meta_.data_ + bin_origin->meta_.index_entry_, + merge_meta_.key_start_ - merge_meta_.index_start_ + 1); + } else { + // set sorted index one by one + for (int i = 0; i < bin_origin->meta_.count_ && OB_SUCC(ret); ++i) { + int64_t index_content = 0; + if (OB_FAIL(bin_origin->get_index_content(i, index_content))) { + LOG_WARN("failed get sorted index .", K(value_start)); + } else { + merge_meta_.set_index_entry(i, index_content); + } + } + } + + // set info one by one + for (int i = 0; i < bin_origin->meta_.count_ && OB_SUCC(ret); ++i) { + int64_t key_len = 0; + int64_t origin_key_offset = 0; + uint8_t type = 0; + int64_t value_offset = 0; + int64_t value_len = 0; + if (OB_FAIL(bin_origin->get_sorted_key_info(i, key_len, origin_key_offset)) + || OB_FAIL(bin_origin->get_value_info(i, type, value_offset, value_len))) { + LOG_WARN("failed get origin info.", K(value_start)); + } else { + merge_meta_.set_key_entry(i, res_key_offset - merge_meta_.header_start_, key_len); + merge_meta_.set_value_entry(i, type, res_value_offset - merge_meta_.header_start_); + res_key_offset += key_len; + res_value_offset += value_len; + } + } + + // if don't append key, because only element value may need merge + // so, copy attribute value directly + if (OB_FAIL(ret)) { + } else if (OB_FAIL(bin_res->buffer_.reserve(child_value_start - key_start))) { + LOG_WARN("fail to reserve", K(ret)); + } else if (OB_FAIL(bin_res->buffer_.append(bin_origin->meta_.data_ + key_start, + child_value_start - key_start))) { + LOG_WARN("fail to append", K(ret)); + } + } + } else { + ret = OB_NOT_SUPPORTED; + LOG_WARN("not supporte to merge other type yet", K(ret)); + } + return ret; +} + +int ObXmlBinMerge::collect_merge_key(ObBinMergeCtx& ctx, ObIMulModeBase& origin, ObIMulModeBase& patch, + ObMulBinHeaderSerializer& header, ObArray& attr_vec) +{ + INIT_SUCC(ret); + ObXmlBin* bin_origin = static_cast(&origin); + ObXmlBin* bin_patch = static_cast(&patch); + int defined_ns_size = ctx.defined_ns_idx_.size(); + int64_t new_attr_size = bin_origin->attribute_size() + defined_ns_size; + for (int i = 0; OB_SUCC(ret) && i < new_attr_size; ++i) { + int64_t sorted_index = 0; + int64_t key_offset = 0; + int64_t key_len = 0; + ObBinMergeKeyInfo merge_key_info; + if (i < defined_ns_size) { + int index = ctx.defined_ns_idx_.at(i); + if (OB_FAIL(bin_patch->get_key_info(index, sorted_index, key_offset, key_len))) { + LOG_WARN("failed to get key_info.", K(ret)); + } else if (OB_FALSE_IT(merge_key_info = ObBinMergeKeyInfo(bin_patch->meta_.get_data() + key_offset, key_len, sorted_index, i, false))) { + } else if (OB_FAIL(attr_vec.push_back(merge_key_info))) { + LOG_WARN("failed to record key_info.", K(ret)); + } + } else if (OB_FAIL(bin_origin->get_key_info(i - defined_ns_size, sorted_index, key_offset, key_len))) { + LOG_WARN("failed to get key_info.", K(ret)); + } else if (OB_FALSE_IT(merge_key_info = ObBinMergeKeyInfo(bin_origin->meta_.get_data() + key_offset, key_len, sorted_index, i, true))) { + } else if (OB_FAIL(attr_vec.push_back(merge_key_info))) { + LOG_WARN("failed to record key_info.", K(ret)); + } + } + if (OB_SUCC(ret)) { + do_sort(attr_vec); + } + return ret; +} + +int ObXmlBinMerge::append_merge_key(ObBinMergeCtx& ctx, ObIMulModeBase& origin, ObIMulModeBase& patch, + ObMulBinHeaderSerializer& header, ObIMulModeBase& res) +{ + INIT_SUCC(ret); + if (ctx.only_merge_ns_) { + ObArray attr_vec; + if (OB_FAIL(merge_meta_.init_merge_meta(ctx, origin, header, true))) { + LOG_WARN("fail to init element header", K(ret)); + } else if (OB_FAIL(reserve_meta(header))) { + LOG_WARN("failed to reserve meta.", K(ret)); + } else if (OB_FAIL(collect_merge_key(ctx, origin, patch, header, attr_vec))) { + LOG_WARN("failed to collect merge key.", K(ret)); + } else { + ObXmlBin* bin_origin = static_cast(&origin); + ObXmlBin* bin_patch = static_cast(&patch); + ObXmlBin* bin_res = static_cast(&res); + uint64_t res_key_offset = merge_meta_.key_start_; + int attr_size = attr_vec.size(); + int origin_attr_size = origin.attribute_size(); + int defined_ns_size = ctx.defined_ns_idx_.size(); + // set attribute key&index info by attr_vec + for (int i = 0; i < attr_size && OB_SUCC(ret); ++i) { + // set index + merge_meta_.set_index_entry(attr_vec[i].text_index_, i); + // set key_entry + merge_meta_.set_key_entry(i, res_key_offset - merge_meta_.header_start_, attr_vec[i].key_len_); + // set key + if (OB_FAIL(bin_res->buffer_.reserve(attr_vec[i].key_len_))) { + LOG_WARN("fail to reserve", K(ret)); + } else if (OB_FAIL(bin_res->buffer_.append(attr_vec[i].key_ptr_, attr_vec[i].key_len_))) { + LOG_WARN("fail to append", K(ret)); + } else { + res_key_offset += attr_vec[i].key_len_; + } + } + // set element index info + for (int i = 0; i < origin.size() && OB_SUCC(ret); ++i) { + int64_t index_content = 0; + if (OB_FAIL(bin_origin->get_index_content(origin_attr_size + i, index_content))) { + LOG_WARN("failed get sorted index .", K(ret), K(i)); + } else { + merge_meta_.set_index_entry(attr_size + i, index_content + defined_ns_size); + } + } + // set element key info + for (int i = 0; i < origin.size() && OB_SUCC(ret); ++i) { + int64_t key_len = 0; + int64_t origin_key_offset = 0; + if (OB_FAIL(bin_origin->get_sorted_key_info(origin_attr_size + i, key_len, origin_key_offset))) { + LOG_WARN("failed get origin info.", K(origin_attr_size + i)); + } else if (OB_FAIL(bin_res->buffer_.reserve(key_len))) { + LOG_WARN("fail to reserve", K(ret)); + } else if (OB_FAIL(bin_res->buffer_.append(bin_origin->meta_.get_data() + origin_key_offset, key_len))) { + LOG_WARN("fail to append", K(ret)); + } else { + merge_meta_.set_key_entry(attr_size + i, res_key_offset - merge_meta_.header_start_, key_len); + res_key_offset += key_len; + } + } + + // set value entry + uint64_t res_value_offset = res_key_offset; + for (int i = 0; i < header.count_ && OB_SUCC(ret); ++i) { + uint8_t type = 0; + int64_t value_len = 0; + int64_t value_offset = 0; + int64_t origin_sorted_idx = 0; + // attribute : set type, value_offset and append value + if (i < attr_size) { + origin_sorted_idx = attr_vec[i].origin_index_; + ObXmlBin* bin = attr_vec[i].is_origin_ ? bin_origin : bin_patch; + if (OB_FAIL(bin->get_value_info(origin_sorted_idx, type, value_offset, value_len))) { + LOG_WARN("failed get origin info.", K(i)); + } else if (OB_FAIL(bin_res->buffer_.reserve(value_len))) { + LOG_WARN("fail to reserve", K(ret)); + } else if (OB_FAIL(bin_res->buffer_.append(bin->meta_.get_data() + value_offset, value_len))) { + LOG_WARN("fail to append", K(ret)); + } else { + merge_meta_.set_value_entry(i, type, res_value_offset - merge_meta_.header_start_); + res_value_offset += value_len; + } + } else { + // element: only set type + origin_sorted_idx = i - defined_ns_size; + if (OB_FAIL(bin_origin->get_value_info(origin_sorted_idx, type, value_offset, value_len))) { + LOG_WARN("failed get origin info.", K(i)); + } else { + merge_meta_.set_value_entry(i, type, res_value_offset - merge_meta_.header_start_); + res_value_offset += value_len; + } + } + } + // append attribute value + } + } else { + ret = OB_NOT_SUPPORTED; + LOG_WARN("not supporte to merge other type yet", K(ret)); + } + return ret; +} + +void ObXmlBinMerge::do_sort(ObArray& attr_vec) +{ + ObBinMergeKeyCompare cmp; + std::stable_sort(attr_vec.begin(), attr_vec.end(), cmp); +} + +uint64_t ObXmlBinMerge::estimated_count(bool retry, ObBinMergeCtx& ctx, ObIMulModeBase& origin, ObIMulModeBase& patch) +{ + return retry ? ctx.retry_count_ : origin.attribute_count() + origin.size() + patch.attribute_count(); +} + +uint64_t ObXmlBinMerge::estimated_length(bool retry, ObBinMergeCtx& ctx, ObIMulModeBase& origin, ObIMulModeBase& patch) +{ + ObXmlBin* bin_origin = static_cast(&origin); + ObXmlBin* bin_patch = static_cast(&patch); + return retry ? ctx.retry_len_ : ceil((bin_origin->meta_.len_ + bin_patch->meta_.len_) * 1.2); +} +int ObXmlBinMerge::set_value_offset(int idx, uint64_t offset, ObBinMergeCtx& ctx, ObIMulModeBase& res) +{ + INIT_SUCC(ret); + if (idx < merge_meta_.attr_count_) { + } else if (idx < merge_meta_.child_count_ + merge_meta_.attr_count_) { + merge_meta_.set_value_offset(idx, offset); + } else { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("error idx", K(ret)); + } + return ret; +} +int ObXmlBinMerge::append_value_by_idx(bool is_origin, int index, ObBinMergeCtx& ctx, ObIMulModeBase& origin, + ObIMulModeBase& patch, ObMulBinHeaderSerializer& header, ObIMulModeBase& res) +{ + INIT_SUCC(ret); + if (ctx.only_merge_ns_) { + ObXmlBin* bin_origin = static_cast(&origin); + if (!is_origin) { // ns value, already appended + } else if (index < bin_origin->meta_.count_) { + ObXmlBin bin_buffer; + ObIMulModeBase* cur = nullptr; + ObXmlBinMerge bin_merge; + cur = bin_origin->sorted_at(index, &bin_buffer); + if (OB_ISNULL(cur)) { + ret = OB_BAD_NULL_ERROR; + LOG_WARN("fail to get value", K(ret)); + } else if (cur->type() != M_ELEMENT) { + if (OB_FAIL(append_value_without_merge(ctx, *cur, res))) { + LOG_WARN("fail to append value", K(ret)); + } + } else if (OB_FAIL(bin_merge.inner_merge(ctx, *cur, patch, res))) { + LOG_WARN("fail to append value", K(ret)); + } + } + } else { + ret = OB_NOT_SUPPORTED; + LOG_WARN("not supporte to merge other type yet", K(ret)); + } + return ret; +} + +} // namespace common +} // namespace oceanbase diff --git a/deps/oblib/src/lib/xml/ob_xml_bin.h b/deps/oblib/src/lib/xml/ob_xml_bin.h new file mode 100644 index 0000000000..153cbd7c84 --- /dev/null +++ b/deps/oblib/src/lib/xml/ob_xml_bin.h @@ -0,0 +1,978 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + * This file contains interface support for the xml bin abstraction. + */ + +#ifndef OCEANBASE_SQL_OB_XML_BIN +#define OCEANBASE_SQL_OB_XML_BIN + +#include "lib/xml/ob_multi_mode_interface.h" +#include "ob_multi_mode_bin.h" +#include "ob_tree_base.h" +#include "ob_xml_tree.h" + +namespace oceanbase { +namespace common { + +/** + * xml binary format as following + * + * | common_header | doc_header | element header | key entry | value_entry | real key | real value | + * +*/ + +/** + * element header: + * |uint16_t flags | prefix_ | standalone(uint16_t) | +*/ +struct ObXmlElementBinHeader { + ObXmlElementBinHeader() + : flags_(0), + prefix_len_size_(0), + prefix_len_(0), + prefix_() {} + + ObXmlElementBinHeader(uint16_t is_unparsed, + const ObString& prefix) + : ObXmlElementBinHeader() + { + prefix_ = prefix; + if (prefix_.empty()) { + is_prefix_ = 0; + } else { + is_prefix_ = 1; + prefix_len_ = prefix_.length(); + prefix_len_size_ = serialization::encoded_length_vi64(prefix.length()); + } + is_unparse_ = is_unparsed; + } + +public: + union { + struct { + uint8_t is_unparse_ : 1; + uint8_t is_prefix_ : 1; + uint8_t reserved_: 7; + }; + + uint8_t flags_; + }; + + uint8_t prefix_len_size_; + uint16_t prefix_len_; + ObString prefix_; + + uint32_t header_size(); + ObString get_prefix() { return prefix_; } + uint16_t get_unparse() { return is_unparse_; } + + int serialize(ObStringBuffer& buffer); + int deserialize(const char* data, int64_t length); +}; + +/** + * header: + * |uint8_t flags | prefix_ | +*/ +struct ObXmlAttrBinHeader { + ObXmlAttrBinHeader(const ObString& prefix, ObMulModeNodeType type) + { + type_ = type; + prefix_ = prefix; + if (prefix_.empty()) { + is_prefix_ = 0; + } else { + is_prefix_ = 1; + prefix_len_ = prefix_.length(); + prefix_len_size_ = serialization::encoded_length_vi64(prefix.length()); + } + } + + ObXmlAttrBinHeader() + : flags_(0), + prefix_len_(0), + prefix_(0) {} + + union { + uint8_t flags_; + struct { + uint8_t is_prefix_ : 1; + uint8_t reserved_ : 7; + }; + }; + + uint8_t prefix_len_size_; + uint16_t prefix_len_; + ObString prefix_; + ObMulModeNodeType type_; + + uint32_t header_size(); + ObString get_prefix() { return prefix_; } + int serialize(ObStringBuffer* buffer); + int deserialize(const char* data, int64_t length); +}; + +class ObXmlAttributeSerializer { +public: + ObXmlAttributeSerializer(ObIMulModeBase* root, ObStringBuffer& buffer); + ObXmlAttributeSerializer(const char* data, int64_t length, ObMulModeMemCtx* ctx); + + int serialize(); + int deserialize(ObIMulModeBase*& handle); + +private: + // for serialzie + ObIMulModeBase* root_; + ObStringBuffer* buffer_; + ObXmlAttrBinHeader header_; + + // for deserialize + const char* data_; + int64_t data_len_; + ObIAllocator* allocator_; + ObMulModeMemCtx* ctx_; +}; + +/** + * text header: + * | type | value | +*/ +class ObXmlTextSerializer { +public: + ObXmlTextSerializer(ObIMulModeBase* root, ObStringBuffer& buffer); + ObXmlTextSerializer(const char* data, int64_t length, ObMulModeMemCtx* ctx); + int serialize(); + int deserialize(ObIMulModeBase*& handle); + int64_t header_size() { return is_extend_type(type_) ? sizeof(uint8) * 2 : sizeof(uint8_t); } +private: + // for serialzie + ObIMulModeBase* root_; + ObStringBuffer* buffer_; + ObMulModeNodeType type_; + // for deserialize + const char* data_; + int64_t data_len_; + ObIAllocator* allocator_; + ObMulModeMemCtx* ctx_; +}; + +/** + * | flags_ | version | is_encoding | element_header | +*/ +struct ObXmlDocBinHeader { + ObXmlDocBinHeader() + : flags_(0), + version_len_(0), + encode_len_(0), + standalone_(0), + version_(), + encoding_(), + elem_header_(0, "") {} + + ObXmlDocBinHeader(int32_t sort_flag) + : flags_(sort_flag), + version_len_(0), + encode_len_(0), + standalone_(0), + version_(), + encoding_(), + elem_header_(0, "") {} + + ObXmlDocBinHeader(const ObString& version, + const ObString& encoding, + uint16_t encoding_empty, + uint16_t standalone, + uint16_t is_xml_decl) + : flags_(0), + standalone_(standalone), + version_(version), + encoding_(encoding), + elem_header_(0, "") + { + encode_len_ = encoding.length(); + version_len_ = version.length(); + is_version_ = version_len_ > 0; + is_encoding_ = encode_len_ > 0; + is_standalone_ = standalone > 0; + is_encoding_empty_ = encoding_empty > 0; + is_xml_decl_ = is_xml_decl; + } + + union { + uint16_t flags_; // + struct { + uint16_t is_version_ : 1; + uint16_t is_encoding_ : 1; + uint16_t is_int_dtd_ : 1; + uint16_t is_ext_dtd_ : 1; + + uint16_t is_standalone_ : 1; + uint16_t is_xml_decl_ : 1; + uint16_t is_encoding_empty_ : 1; + uint16_t reserved_ : 9; + }; + }; + + // 如果is_prefix, is_standalone_, is_encoding_是0, 则不会序列化相应的成员 + uint8_t version_len_; + uint8_t encode_len_; + uint16_t standalone_; + ObString version_; + ObString encoding_; + + ObXmlElementBinHeader elem_header_; + + uint64_t header_size(); + ObString get_version() { return version_; } + ObString get_encoding() { return encoding_; } + uint8_t has_xml_decl() { return is_xml_decl_; } + uint8_t get_encoding_empty() { return is_encoding_empty_; } + uint16_t get_standalone() { return standalone_; } + + int serialize(ObStringBuffer& buffer); + int deserialize(const char* data, int64_t length); +}; + +/** + * Element value + * | element header | index-array | key_entry | value entry | key | value | + * key-entry : | key_offset | key_len | + * value-entry | type | value_offset | +*/ +class ObXmlElementSerializer : public ObMulModeContainerSerializer { +public: + static const int64_t MAX_RETRY_TIME = 2; + // root must be ObXmlElement or ObXmlDocument + ObXmlElementSerializer(ObIMulModeBase* root, ObStringBuffer* buffer, bool serialize_key = false); + ObXmlElementSerializer(const char* data, int64_t length, ObMulModeMemCtx* ctx); + int serialize_value(int idx, int64_t depth); + int serialize_key(int idx, int64_t depth); + int serialize(int64_t depth); + int deserialize(ObIMulModeBase*& node); + int reserve_meta(); + void set_key_entry(int64_t entry_idx, int64_t key_offset, int64_t key_len); + void set_index_entry(int64_t origin_index, int64_t sort_index); + + int64_t size() { return attr_count_ + child_count_; } + int serialize_child_key(const ObString& key, int64_t idx); + void set_value_entry(int64_t entry_idx, uint8_t type, int64_t value_offset); + + struct MemberArray { + int64_t g_start_; + int64_t g_last_; + + int64_t l_start_; + int64_t l_last_; + + ObIMulModeBase* entry_; + + bool is_valid() { return l_start_ != -1; } + int64_t size() { return l_last_ - l_start_ + 1; } + + MemberArray() + : g_start_(-1), + g_last_(-1), + l_start_(-1), + l_last_(-1), + entry_(nullptr) {} + }; + +private: + int64_t attr_count_; + int64_t child_count_; + + int64_t index_start_; + int8_t index_entry_size_; + int64_t key_entry_start_; + int8_t key_entry_size_; + int64_t key_start_; + + MemberArray child_arr_[2]; + + union { + ObXmlDocBinHeader doc_header_; + ObXmlElementBinHeader ele_header_; + }; + + bool serialize_key_; + + // for deserialize + const char* data_; + int64_t data_len_; + ObIAllocator* allocator_; + ObMulModeMemCtx* ctx_; + + int64_t serialize_try_time_; +}; + +struct ObXmlBinIndexMeta { + ObXmlBinIndexMeta(const char* index_entry, int64_t idx, int64_t var_size); + ObXmlBinIndexMeta(const char* index_entry, int64_t idx, uint8_t var_type); + int64_t get_index(); + int64_t pos_; +}; + +struct ObXmlBinKeyMeta { + ObXmlBinKeyMeta() : offset_(-1), len_(-1) {} + ObXmlBinKeyMeta(const char* cur_entry, uint8_t var_type); + ObXmlBinKeyMeta(const char* cur_entry, int64_t var_size); + ObXmlBinKeyMeta(const char* key_entry, int64_t idx, uint8_t var_type); + ObXmlBinKeyMeta(const char* key_entry, int64_t idx, int64_t var_size); + + ObXmlBinKeyMeta(int64_t offset, int32_t len); + ObXmlBinKeyMeta(const ObXmlBinKeyMeta& meta); + + void read(const char* cur_entry, int64_t var_size); + void read(const char* cur_entry, uint8_t var_size); + + int64_t offset_; + int64_t len_; +}; + +struct ObXmlBinKey { + ObXmlBinKey() : meta_(), key_() {} + ObXmlBinKey(const char* data, int64_t cur_entry, uint8_t var_type); + ObXmlBinKey(const char* data, int64_t cur_entry, int64_t var_size); + ObXmlBinKey(const char* data, int64_t key_entry, int64_t idx, uint8_t var_type); + ObXmlBinKey(const char* data, int64_t key_entry, int64_t idx, int64_t var_size); + ObXmlBinKey(const char* data, int64_t offset, int32_t len); + ObXmlBinKey(const ObString& key); + ObXmlBinKey(const ObXmlBinKey& other); + + ObString get_key() { return key_; } + + ObXmlBinKeyMeta meta_; + ObString key_; +}; + +struct ObXmlBinMetaParser { + ObXmlBinMetaParser() + : data_(nullptr), + len_(0), // bin_len (without extend) + total_(0), // cur_obj_size + count_(0), + key_len_(0), + version_len_(0), + encoding_len_(0), + prefix_len_(0), + value_len_(0), + key_ptr_(nullptr), + version_ptr_(nullptr), + encoding_ptr_(nullptr), + prefix_ptr_(nullptr), + value_ptr_(nullptr), + index_entry_(0), + key_entry_(0), + value_entry_(0), + child_pos_(-1), + idx_(-1), + sort_idx_(-1), + parsed_(0), + is_empty_(0), + is_unparse_(0), + has_xml_decl_(0), + encoding_val_empty_(0), + standalone_(0), + index_entry_size_(0), + index_entry_size_type_(0), + key_entry_size_(0), + key_entry_size_type_(0), + value_entry_size_(0), + value_entry_size_type_(0) {} + + ObXmlBinMetaParser(const char* data, int64_t len) + : ObXmlBinMetaParser() + { + data_ = data; + len_ = len; + } + + ObXmlBinMetaParser(const ObXmlBinMetaParser& other) + : data_(other.data_), + len_(other.len_), + total_(other.total_), + count_(other.count_), + key_len_(other.key_len_), + version_len_(other.version_len_), + encoding_len_(other.encoding_len_), + prefix_len_(other.prefix_len_), + value_len_(other.value_len_), + key_ptr_(other.key_ptr_), + version_ptr_(other.version_ptr_), + encoding_ptr_(other.encoding_ptr_), + prefix_ptr_(other.prefix_ptr_), + value_ptr_(other.value_ptr_), + index_entry_(other.index_entry_), + key_entry_(other.key_entry_), + value_entry_(other.value_entry_), + type_(other.type_), + child_pos_(other.child_pos_), + idx_(other.idx_), + sort_idx_(other.sort_idx_), + parsed_(other.parsed_), + is_empty_(other.is_empty_), + is_unparse_(other.is_unparse_), + has_xml_decl_(other.has_xml_decl_), + encoding_val_empty_(other.encoding_val_empty_), + standalone_(other.standalone_), + index_entry_size_(other.index_entry_size_), + index_entry_size_type_(other.index_entry_size_type_), + key_entry_size_(other.key_entry_size_), + key_entry_size_type_(other.key_entry_size_type_), + value_entry_size_(other.value_entry_size_), + value_entry_size_type_(other.value_entry_size_type_) {} + + + bool operator==(const ObXmlBinMetaParser& other) { + return data_ == other.data_ && len_ == other.len_; + } + + bool operator<(const ObXmlBinMetaParser& other) { + return data_ < other.data_; + } + ObXmlBinMetaParser& operator=(const ObXmlBinMetaParser& other) { + new (this) ObXmlBinMetaParser(other); + return *this; + } + + int parser(); + int parser(const char* data, int64_t len); + + ObString get_version(); + ObString get_encoding(); + uint16_t get_standalone(); + ObString get_prefix(); + ObString get_key(); + ObString get_value(); + uint8_t get_key_entry_size(); + uint8_t get_key_entry_size_type(); + uint8_t get_value_entry_size(); + uint8_t get_value_entry_size_type(); + + int64_t get_key_offset(int64_t index); + int64_t get_value_offset(int64_t index); + int64_t get_index(int64_t index); + bool is_empty() { return is_empty_; } + char* get_data() { return const_cast(data_);} + + const char* data_; + int64_t len_; + int64_t total_; + + int32_t count_; + + uint16_t key_len_; + uint8_t version_len_; + uint8_t encoding_len_; + + uint32_t prefix_len_; + uint32_t value_len_; + + char* key_ptr_; + char* version_ptr_; + char* encoding_ptr_; + char* prefix_ptr_; + + char* value_ptr_; + int32_t index_entry_; + int32_t key_entry_; + int32_t value_entry_; + + ObMulModeNodeType type_; + int32_t child_pos_; + + int32_t idx_; + int32_t sort_idx_; + bool parsed_; + + bool is_empty_; + bool is_unparse_; + bool has_xml_decl_; + bool encoding_val_empty_; + + uint8_t standalone_; + + uint8_t index_entry_size_; + uint8_t index_entry_size_type_; + + uint8_t key_entry_size_; + uint8_t key_entry_size_type_; + + uint8_t value_entry_size_; + uint8_t value_entry_size_type_; + + TO_STRING_KV(K(len_), + K(total_), + K(count_), + K(key_len_), + K(version_len_), + K(encoding_len_), + K(prefix_len_), + K(value_len_), + K(index_entry_), + K(key_entry_), + K(value_entry_), + K(type_), + K(idx_), + K(standalone_), + K(index_entry_size_), + K(index_entry_size_type_), + K(key_entry_size_), + K(key_entry_size_type_), + K(value_entry_size_), + K(value_entry_size_type_)); +}; + +typedef PageArena LibModuleArena; +typedef common::ObArray ObXmlBinMetaArray; + +const int64_t OB_XMLBIN_META_SIZE = (1LL << 10); // 1KB + +class ObXmlBin : public ObIMulModeBase { +public: + ObStringBuffer buffer_; + ObXmlBinMetaParser meta_; + ObMulModeMemCtx* ctx_; + // true, when buffer used for record exrend + bool buffer_for_extend_; +public: + friend class ObXmlBinMerge; + ObXmlBin() + : ObIMulModeBase(ObNodeMemType::BINARY_TYPE, ObNodeDataType::OB_XML_TYPE), + buffer_(), + meta_(), + ctx_(nullptr), + buffer_for_extend_(false) {} + + ObXmlBin(const ObXmlBin& other, ObMulModeMemCtx* ctx) + : ObIMulModeBase(ObNodeMemType::BINARY_TYPE, ObNodeDataType::OB_XML_TYPE), + buffer_(), + meta_(other.meta_), + ctx_(ctx), + buffer_for_extend_(false) + { + if (ctx && ctx->allocator_) { + ObIMulModeBase::set_allocator(ctx->allocator_); + buffer_.set_allocator(allocator_); + } + } + + ObXmlBin(const ObXmlBin& other) + : ObXmlBin(other, other.ctx_) + { + } + + ObXmlBin(ObMulModeMemCtx* ctx) + : ObXmlBin() + { + if (OB_NOT_NULL(ctx)) { + ctx_ = ctx; + buffer_.set_allocator(ctx->allocator_); + set_allocator(ctx->allocator_); + } + } + + ObXmlBin(const ObString &data, ObMulModeMemCtx* ctx = nullptr) + : ObXmlBin(ctx) + { + new (&meta_) ObXmlBinMetaParser(data.ptr(), data.length()); + } + + ObXmlBin(const char* data, size_t len) + : ObXmlBin(ObString(len, data), nullptr) + { + } + + ObXmlBin& operator=(const ObXmlBin& rhs) + { + new (this) ObXmlBin(rhs); + return *this; + } + + void reset() { + if (ctx_ && ctx_->allocator_) { + buffer_.reset(); + buffer_for_extend_ = false; + } + } + + int get_ns_value(ObStack& stk, ObString& value, ObIMulModeBase* extend); + int node_ns_value(ObString& prefix, ObString& ns_value); + + ObMulModeMemCtx* get_mem_ctx() { return ctx_; } + + int get_ns_value(ObString& ns_value) { + return 0; + } + + int get_ns_value(const ObString& prefix, ObString& ns_value, int& ans_idx); + + virtual bool is_equal_node(const ObIMulModeBase* other); + virtual bool is_node_before(const ObIMulModeBase* other); + virtual bool check_extend(); + virtual bool check_if_defined_ns(); + + virtual int get_attribute(ObIArray& res, ObMulModeNodeType filter_type, int32_t flags = 0); + + virtual int get_attribute(ObIMulModeBase*& res, ObMulModeNodeType filter_type, const ObString& key1, const ObString &key2 = ObString()); + + int deep_copy(ObXmlBin& from); + + int64_t child_size(); + + int64_t low_bound(const ObString& key); + int64_t up_bound(const ObString& key); + + friend class ObXmlBinIterator; + + typedef class ObXmlBinIterator iterator; + + iterator begin(); + iterator end(); + + int64_t size(); + + int64_t count(); + + int64_t attribute_size(); + + int64_t attribute_count(); + int64_t origin_bin_len() { return meta_.total_;} + + ObMulModeNodeType type() const { return meta_.type_; } + + virtual ObNodeMemType get_internal_type() { return ObNodeMemType::BINARY_TYPE; } + + // namespace, attribute, children all together + int set_at(int64_t pos); + + int set_sorted_at(int64_t sort_idx); + + // just child + int set_child_at(int64_t pos); + + int construct(ObXmlBin*& res, ObIAllocator *allocator_); + + int get_value_start(int64_t &value_start); + + /** + * for search + * current node key name is consistent with input key string + */ + virtual int compare(const ObString& key, int& res); + // seek interface + + /** + * under current node + * get specified element's value string + */ + + virtual int get_value(ObIMulModeBase*& value, int64_t index = -1); + + + virtual int get_value(ObString& value, int64_t index = -1); + + ObString get_prefix(); + + ObString get_version(); + + uint16_t get_encoding_flag() { return meta_.encoding_val_empty_;} + + uint16_t has_xml_decl() { return meta_.has_xml_decl_;} + uint16_t is_unparse() { return meta_.is_unparse_;} + ObIMulModeBase* get_attribute_handle() { return nullptr; } + + ObString get_encoding(); + + uint16_t get_standalone(); + + void set_standalone(uint16_t standalone) { meta_.standalone_ = standalone; } + + bool get_unparse(); + + bool get_is_empty(); + + bool has_flags(ObMulModeNodeFlag flag); + + /** + * under current node + * get specified element's key string + */ + int get_key(ObString& res, int64_t index = -1); + int get_total_value(ObString& res, int64_t value_start); + ObString get_element_buffer(); + int get_text_value(ObString &value); + int get_index_key(ObString& res, int64_t &origin_index, int64_t &value_offset, int64_t index = -1); + int get_value_entry_type(uint8_t &type, int64_t index = -1); + + /** + * get speicified element pointer under current node + */ + + int32_t get_child_start(); + + ObIMulModeBase* at(int64_t pos, ObIMulModeBase* buffer = nullptr); + + ObIMulModeBase* attribute_at(int64_t pos, ObIMulModeBase* buffer = nullptr); + ObIMulModeBase* sorted_at(int64_t pos, ObIMulModeBase* buffer = nullptr); + + int get_range(int64_t start, int64_t end, ObIArray &res, ObMulModeFilter* filter = nullptr); + + // the index is sorted index + int get_index_content(int64_t index, int64_t &index_content); + int get_sorted_key_info(int64_t index, int64_t &key_len, int64_t &key_offset); + int get_key_info(int64_t text_index, int64_t& sorted_index, int64_t &value_offset, int64_t &key_len); + int get_value_info(int64_t index, uint8_t &type, int64_t &value_offset, int64_t &value_len); + int get_child_value_start(int64_t &value_start); + int get_node_count(ObMulModeNodeType node_type, int &count); + int get_children(ObIArray &res, ObMulModeFilter* filter = nullptr); + + int get_after(ObIArray& nodes, ObMulModeFilter* filter = nullptr); + + int get_before(ObIArray& nodes, ObMulModeFilter* filter = nullptr); + + int get_descendant(ObIArray& nodes, scan_type type, ObMulModeFilter* filter = nullptr); + + /** + * get all childrent member under current node, whose key string is equal with input key stirng + */ + int get_children(const ObString& key, ObIArray& res, ObMulModeFilter* filter = nullptr); + + /** + * current node's binary + */ + virtual int get_raw_binary(common::ObString &out, ObIAllocator *allocator = NULL); + + /** + * for node compare + * current json use + */ + virtual int compare(const ObIMulModeBase &other, int &res) { return 0; } + + /** + * serialize tree into binary + */ + int parse_tree(ObIMulModeBase* root, bool set_alter_member = true); + + /** + * binary type tranform to tree + */ + int to_tree(ObIMulModeBase*& root); + + int parse(const char* data, int64_t len); + + int parse(); + int append_extend(ObXmlElement* ele); + int append_extend(char* start, int64_t len); + int remove_extend(); + int merge_extend(ObXmlBin& res); + int get_extend(ObXmlBin& extend); + int get_extend(char*& start, int64_t& len); + // not support modify on binary + int append(ObIMulModeBase* node) { return OB_NOT_SUPPORTED; } + + int insert(int64_t pos, ObIMulModeBase* node) { return OB_NOT_SUPPORTED; } + + int remove(int64_t pos) { return OB_NOT_SUPPORTED; } + + int remove(ObIMulModeBase* node) { return OB_NOT_SUPPORTED; } + + int update(int64_t pos, ObIMulModeBase* new_node) { return OB_NOT_SUPPORTED; } + + int update(ObIMulModeBase* old_node, ObIMulModeBase* new_node) { return OB_NOT_SUPPORTED; } +}; + +class ObXmlBinIterator { +public: + friend class ObXmlBin; + + ObXmlBinIterator() + : is_valid_(false), + is_sorted_iter_(false), + cur_pos_(-1), + total_(-1), + cur_node_(), + ctx_(nullptr) + {} + + // construct + ObXmlBinIterator(ObXmlBin* bin, bool is_sorted_iter = false) + : is_valid_(true), + is_sorted_iter_(is_sorted_iter), + cur_pos_(0), + total_(bin->meta_.count_), + meta_header_(bin->meta_), + cur_node_(*bin), + ctx_(bin->ctx_) + { + cur_node_.meta_.parsed_ = false; + } + + // construct + ObXmlBinIterator(const ObXmlBinIterator& iter) + : is_valid_(iter.is_valid_), + is_sorted_iter_(iter.is_sorted_iter_), + cur_pos_(iter.cur_pos_), + total_(iter.total_), + meta_header_(iter.meta_header_), + ctx_(iter.ctx_) + { + new (&cur_node_) ObXmlBin(iter.cur_node_); + } + + ObXmlBinIterator& operator=(const ObXmlBinIterator& from) { + new(this) ObXmlBinIterator(from); + + return *this; + } + + ObXmlBin* current(); + ObXmlBin* operator*(); + ObXmlBin* operator->(); + ObXmlBin* operator[](int64_t pos); + + bool end(); + bool begin(); + bool is_valid() { return is_valid_; } + int error_code() { return error_code_; } + ObXmlBinIterator& next(); + ObXmlBinIterator& operator++(); + ObXmlBinIterator& operator--(); + ObXmlBinIterator operator++(int); + ObXmlBinIterator operator--(int); + bool operator<(const ObXmlBinIterator& iter); + bool operator>(const ObXmlBinIterator& iter); + ObXmlBinIterator operator-(int size); + ObXmlBinIterator operator+(int size); + ObXmlBinIterator& operator+=(int size); + ObXmlBinIterator& operator-=(int size); + void set_range(int64_t start, int64_t finish); + int64_t operator-(const ObXmlBinIterator& iter); + bool operator==(const ObXmlBinIterator& rhs); + bool operator!=(const ObXmlBinIterator& rhs); + bool operator<=(const ObXmlBinIterator& rhs); + + int64_t to_string(char *buf, const int64_t buf_len) const { + int64_t pos = 0; + databuff_printf(buf, buf_len, pos, "cur_pos = %ld, total_=%ld", cur_pos_, total_); + return pos; + } + +protected: + + bool is_valid_; + bool is_sorted_iter_; + int error_code_; + int64_t cur_pos_; + int64_t total_; + + ObXmlBinMetaParser meta_header_; + ObXmlBin cur_node_; + ObMulModeMemCtx* ctx_; +}; + +typedef struct ObBinMergeKeyInfo { + ObBinMergeKeyInfo() + : key_ptr_(nullptr), + key_len_(0), + origin_index_(0), + text_index_(0), + is_origin_(false) {} + ObBinMergeKeyInfo(char* key_ptr, int64_t key_len, int64_t origin_index, int64_t text_index, bool is_origin) + : key_ptr_(key_ptr), + key_len_(key_len), + origin_index_(origin_index), + text_index_(text_index), + is_origin_(is_origin) {} + ObBinMergeKeyInfo(const ObBinMergeKeyInfo& other) + : key_ptr_(other.key_ptr_), + key_len_(other.key_len_), + origin_index_(other.origin_index_), + text_index_(other.text_index_), + is_origin_(other.is_origin_) {} + char* key_ptr_; + int64_t key_len_; + int64_t origin_index_; // origin_sorted_index + int64_t text_index_; + bool is_origin_; + TO_STRING_KV(K(key_len_), + K(key_ptr_), + K(text_index_), + K(origin_index_), + K(is_origin_)); +} ObBinMergeKeyInfo; + +typedef struct ObBinMergeKeyCompare { + int operator()(const ObBinMergeKeyInfo& left, const ObBinMergeKeyInfo& right) { + ObString left_str = ObString(left.key_len_, left.key_ptr_); + ObString right_str = ObString(right.key_len_, right.key_ptr_); + return (left_str.compare(right_str) < 0); + } +} ObBinMergeKeyCompare; + +class ObXmlBinMergeMeta { +public: + int init_merge_meta(ObBinMergeCtx& ctx, ObIMulModeBase& origin, ObMulBinHeaderSerializer& header, bool with_patch); + void set_key_entry(int64_t entry_idx, int64_t key_offset, int64_t key_len); + void set_index_entry(int64_t origin_index, int64_t sort_index); + void set_value_entry(int64_t entry_idx, uint8_t type, int64_t value_offset); + void set_value_offset(int64_t entry_idx, int64_t value_offset); + ObMulBinHeaderSerializer* header_; + int64_t header_start_; + int64_t attr_count_; + int64_t child_count_; + + int64_t index_start_; + int8_t index_entry_size_; + int64_t key_entry_start_; + int8_t key_entry_size_; + int64_t value_entry_start_; + int64_t value_entry_size_; + int64_t key_start_; + ObXmlDocBinHeader doc_header_; + ObXmlElementBinHeader ele_header_; +}; + +// use for xml binary merge, make sure is xml binary, checked in function: init_merge_info +class ObXmlBinMerge : public ObMulModeBinMerge { +protected: + friend class ObXmlBin; + virtual int init_merge_info(ObBinMergeCtx& ctx, ObIMulModeBase& origin, + ObIMulModeBase& patch, ObIMulModeBase& res); + virtual int if_need_merge(ObBinMergeCtx& ctx, ObIMulModeBase& origin, + ObIMulModeBase& patch, ObIMulModeBase& res, bool& need_merge); + virtual bool if_need_append_key(ObBinMergeCtx& ctx, ObIMulModeBase& origin, + ObIMulModeBase& patch, ObIMulModeBase& res); + virtual int append_res_without_merge(ObBinMergeCtx& ctx, ObIMulModeBase& origin, + ObIMulModeBase& patch, ObIMulModeBase& res); + virtual int append_value_without_merge(ObBinMergeCtx& ctx, ObIMulModeBase& value, ObIMulModeBase& res); + virtual int append_key_without_merge(ObBinMergeCtx& ctx, ObIMulModeBase& origin, + ObMulBinHeaderSerializer& header, ObIMulModeBase& res); + virtual int append_merge_key(ObBinMergeCtx& ctx, ObIMulModeBase& origin, ObIMulModeBase& patch, + ObMulBinHeaderSerializer& header, ObIMulModeBase& res); + virtual int append_value_by_idx(bool is_origin, int idx, ObBinMergeCtx& ctx, ObIMulModeBase& origin, + ObIMulModeBase& patch, ObMulBinHeaderSerializer& header, ObIMulModeBase& res); + virtual int set_value_offset(int idx, uint64_t offset, ObBinMergeCtx& ctx, ObIMulModeBase& res); + virtual uint64_t estimated_length(bool retry, ObBinMergeCtx& ctx, ObIMulModeBase& origin, ObIMulModeBase& patch); + virtual uint64_t estimated_count(bool retry, ObBinMergeCtx& ctx, ObIMulModeBase& origin, ObIMulModeBase& patch); + virtual ObMulModeNodeType get_res_type(const ObMulModeNodeType &origin_type, const ObMulModeNodeType &res_type) { return origin_type;} + int collect_merge_key(ObBinMergeCtx& ctx, ObIMulModeBase& origin, ObIMulModeBase& patch, + ObMulBinHeaderSerializer& header, ObArray& attr_vec); + int reserve_meta(ObMulBinHeaderSerializer& header); + void do_sort(ObArray& attr_vec); + + ObXmlBinMergeMeta merge_meta_; +}; + +} // namespace common +} // namespace oceanbase + +#endif // OCEANBASE_SQL_OB_XML_BIN \ No newline at end of file diff --git a/deps/oblib/src/lib/xml/ob_xml_parser.cpp b/deps/oblib/src/lib/xml/ob_xml_parser.cpp new file mode 100644 index 0000000000..dbb966c9fd --- /dev/null +++ b/deps/oblib/src/lib/xml/ob_xml_parser.cpp @@ -0,0 +1,770 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ +#define USING_LOG_PREFIX LIB + +#include "lib/xml/ob_xml_parser.h" +#include "lib/xml/ob_xml_util.h" +#include "lib/ob_errno.h" +#include "lib/ob_define.h" +#include "libxml2/libxml/parserInternals.h" +#include +#include + +namespace oceanbase { +namespace common { + +static inline bool is_blank_str(const ObString& text) +{ + bool res = true; + const char* str = text.ptr(); + for (int i = 0; i < text.length() && res == true; ++i) { + if (!isspace(str[i])) { + res = false; + } + } + return res; +} + +// ObXmlParserBase + +ObXmlNode* ObXmlParserBase::get_last_child(ObXmlNode* cur_node) +{ + + ObXmlNode* last_child = nullptr; + if (OB_NOT_NULL(cur_node) && cur_node->size() > 0) { + last_child = cur_node->at(cur_node->size() - 1); + } + return last_child; +} +ObXmlNode* ObXmlParserBase::get_first_child(ObXmlNode* cur_node) +{ + + ObXmlNode* first_child = nullptr; + if (OB_NOT_NULL(cur_node) && cur_node->size() > 0) { + first_child = cur_node->at(0); + } + return first_child; +} + +// if last child of current node is text, then merge +int ObXmlParserBase::add_or_merge_text(const ObString& text) +{ + INIT_SUCC(ret); + ObIAllocator* allocator = nullptr; + ObXmlNode* last_child = nullptr; + ObXmlText* text_node = nullptr; + char* str = nullptr; + + if (OB_UNLIKELY(text.length() <= 0)) { + // empty string, do nothing + } else if (OB_ISNULL(allocator = this->get_allocator())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("allocator is null", K(ret)); + } else { + last_child = get_last_child(cur_node_); + if (OB_NOT_NULL(text_node = ObXmlUtil::xml_node_cast(last_child, ObMulModeNodeType::M_TEXT))) { + // merge text node + ObString text_value; + text_node->get_value(text_value); + char* old_str = text_value.ptr(); + int64_t old_len = text_value.length(); + int64_t new_len = old_len + text.length(); + if (OB_ISNULL(str = static_cast(allocator->alloc(new_len)))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("alloc failed", K(ret), K(text.length()), K(old_len)); + } else { + MEMCPY(str, old_str, old_len); + MEMCPY(str + old_len, text.ptr(), text.length()); + allocator->free(reinterpret_cast(old_str)); + old_str = nullptr; + text_node->set_value(ObString(new_len, str)); + } + } else { + if (OB_ISNULL(text_node = OB_NEWx(ObXmlText, allocator, ObMulModeNodeType::M_TEXT, ctx_))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("alloc failed", K(ret)); + } else if (OB_ISNULL(str = static_cast(allocator->alloc(text.length())))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("alloc failed", K(ret), K(text.length())); + } else { + MEMCPY(str, text.ptr(), text.length()); + text_node->set_value(ObString(text.length(), str)); + if (OB_FAIL(this->add_text_node(text_node))) { + LOG_WARN("parser characters failed", K(ret)); + } + } + } + } + return ret; +} + +// remove prev slibing node if is empty text +// and no contine plain text node when parse +int ObXmlParserBase::remove_prev_empty_text() +{ + INIT_SUCC(ret); + ObXmlText* text_node = nullptr; + if (! is_ignore_space()) { + } else if (OB_NOT_NULL(text_node = + ObXmlUtil::xml_node_cast(get_last_child(cur_node_), + ObMulModeNodeType::M_TEXT))) { + // remove first alone empty text node if necessary + ObString text_value; + if (OB_FAIL(text_node->get_value(text_value))) { + LOG_WARN("get text value failed", K(ret)); + } else if (is_blank_str(text_value)) { + if (OB_FAIL(cur_node_->remove(cur_node_->size() - 1))) { + LOG_WARN("remove last empty text child failed", K(ret)); + } else { + // ignore blank string + text_node->set_value(ObString()); + allocator_->free(reinterpret_cast(text_value.ptr())); + text_node->~ObXmlText(); + allocator_->free(text_node); + } + } + } + return ret; +} + +// currently M_TEXT text node will be merge before +// and can not happen there are two continuely M_TEXT type text node +int ObXmlParserBase::add_text_node(ObXmlText* node) +{ + INIT_SUCC(ret); + if (OB_ISNULL(cur_node_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("current node is null", K(ret)); + } else if (OB_FAIL(remove_prev_empty_text())) { + LOG_WARN("remove_prev_empty_text fail", K(ret)); + } else if (OB_FAIL(cur_node_->append(node))) { + LOG_WARN("add child failed", K(ret)); + } + return ret; +} + +int ObXmlParserBase::comment(ObXmlText* node) +{ + INIT_SUCC(ret); + if (OB_ISNULL(cur_node_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("current node is null", K(ret)); + } else if (OB_FAIL(remove_prev_empty_text())) { + LOG_WARN("remove_prev_empty_text fail", K(ret)); + } else if (OB_FAIL(cur_node_->append(node))) { + LOG_WARN("add child failed", K(ret)); + } + return ret; +} + +int ObXmlParserBase::processing_instruction(ObXmlAttribute* node) +{ + INIT_SUCC(ret); + if (OB_ISNULL(cur_node_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("current node is null", K(ret)); + } else if (OB_FAIL(remove_prev_empty_text())) { + LOG_WARN("remove_prev_empty_text fail", K(ret)); + } else if (OB_FAIL(cur_node_->append(node))) { + LOG_WARN("add child failed", K(ret)); + } + return ret; +} + +int ObXmlParserBase::cdata_block(ObXmlText* node) +{ + INIT_SUCC(ret); + if (OB_ISNULL(cur_node_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("current node is null", K(ret)); + } else if (OB_FAIL(remove_prev_empty_text())) { + LOG_WARN("remove_prev_empty_text fail", K(ret)); + } else if (OB_FAIL(cur_node_->append(node))) { + LOG_WARN("add child failed", K(ret)); + } + return ret; +} + +int ObXmlParserBase::start_document(ObXmlDocument* node) +{ + INIT_SUCC(ret); + this->document_ = node; + this->cur_node_ = node; + return ret; +} + +int ObXmlParserBase::end_document() +{ + INIT_SUCC(ret); + if (OB_ISNULL(cur_node_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("current node is null", K(ret)); + } else if (OB_FAIL(remove_prev_empty_text())) { + LOG_WARN("remove_prev_empty_text fail", K(ret)); + } + return ret; +} + +bool ObXmlParserBase::reach_max_depth() +{ + return depth_ > OB_XML_PARSER_MAX_DEPTH; +} + +int ObXmlParserBase::start_element(ObXmlElement* node) +{ + INIT_SUCC(ret); + if (reach_max_depth()) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("reach max parse depth", K(ret), K(depth_)); + } else if (OB_ISNULL(cur_node_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("current node is null", K(ret)); + } else if (OB_FAIL(remove_prev_empty_text())) { + LOG_WARN("remove_prev_empty_text fail", K(ret)); + } else if (OB_FAIL(cur_node_->append(node))) { + LOG_WARN("add child failed", K(ret)); + } else { + ++depth_; + cur_node_ = node; + } + return ret; +} + +int ObXmlParserBase::end_element() +{ + INIT_SUCC(ret); + if (OB_NOT_NULL(cur_node_)) { + if (cur_node_->size() > 1 && OB_FAIL(remove_prev_empty_text())) { + LOG_WARN("remove_prev_empty_text failed", K(ret)); + } else { + --depth_; + cur_node_ = cur_node_->get_parent(); + } + } + return ret; +} + +// ObXmlParserBase end + + +// ObXmlParserUtils +int ObXmlParserUtils::parse_document_text(ObMulModeMemCtx* ctx, const ObString& xml_text, ObXmlDocument*&node, int64_t option) +{ + INIT_SUCC(ret); + ObXmlParser parser(ctx); + + if (OB_FAIL(parser.parse_document(xml_text))) { + LOG_WARN("fail to parse document", K(ret), K(xml_text)); + } else { + node = parser.document(); + if (OB_ISNULL(node)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("node can not be null", K(ret)); + } else if (!(option & OB_XML_PARSE_CONTAINER_LAZY_SORT)) { + if (OB_FAIL(node->alter_member_sort_policy(true))) { + LOG_WARN("fail to sort child element", K(ret)); + } + } + } + return ret; +} + +int ObXmlParserUtils::parse_content_text(ObMulModeMemCtx* ctx, const ObString& xml_text, ObXmlDocument*&node, int64_t option) +{ + INIT_SUCC(ret); + ObXmlParser parser(ctx); + + if (OB_FAIL(parser.parse_content(xml_text))) { + LOG_WARN("fail to parse document", K(ret), K(xml_text)); + } else { + node = parser.document(); + if (OB_ISNULL(node)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("node can not be null", K(ret)); + } else if (!(option & OB_XML_PARSE_CONTAINER_LAZY_SORT)) { + if (OB_FAIL(node->alter_member_sort_policy(true))) { + LOG_WARN("fail to sort child element", K(ret)); + } + } + } + return ret; +} + + + +int ObXmlParserUtils::get_xml_escape_char_length(const char c) +{ + int len = 0; + switch (c) { + case ObXmlParserBase::OB_XML_PREDEFINED_ENTITY_AMP_SYMBOL : { + len = ObXmlParserBase::OB_XML_PREDEFINED_ENTITY_AMP_LEN; //"& + break; + } + case ObXmlParserBase::OB_XML_PREDEFINED_ENTITY_LT_SYMBOL : { + len = ObXmlParserBase::OB_XML_PREDEFINED_ENTITY_LT_LEN; //"< + break; + } + case ObXmlParserBase::OB_XML_PREDEFINED_ENTITY_GT_SYMBOL: { + len = ObXmlParserBase::OB_XML_PREDEFINED_ENTITY_GT_LEN; //"> + break; + } + case ObXmlParserBase::OB_XML_PREDEFINED_ENTITY_QUOT_SYMBOL: { + len = ObXmlParserBase::OB_XML_PREDEFINED_ENTITY_QUOT_LEN; //"" + break; + } + case ObXmlParserBase::OB_XML_PREDEFINED_ENTITY_APOS_SYMBOL : { + len = ObXmlParserBase::OB_XML_PREDEFINED_ENTITY_APOS_LEN; //"' + break; + } + default : { + len = 1; + break; + } + } + return len; +} + +int ObXmlParserUtils::get_xml_escape_str_length(const ObString &str) +{ + const char *ptr = str.ptr(); + int len = str.length(); + int res_len = 0; + for (int i = 0; i < len; i++) { + res_len += get_xml_escape_char_length(ptr[i]); + } + return res_len; +} + +int ObXmlParserUtils::escape_xml_text(const ObString &src, ObStringBuffer &dst) +{ + INIT_SUCC(ret); + const char *ptr = src.ptr(); + for (int i = 0; i < src.length() && OB_SUCC(ret); i++) { + switch (*ptr) { + case ObXmlParserBase::OB_XML_PREDEFINED_ENTITY_AMP_SYMBOL : { + if (OB_FAIL(dst.append(ObXmlParserBase::OB_XML_PREDEFINED_ENTITY_AMP))) { + LOG_WARN("append char failed", K(ret)); + } + break; + } + case ObXmlParserBase::OB_XML_PREDEFINED_ENTITY_LT_SYMBOL : { + if (OB_FAIL(dst.append(ObXmlParserBase::OB_XML_PREDEFINED_ENTITY_LT))) { + LOG_WARN("append char failed", K(ret)); + } + break; + } + case ObXmlParserBase::OB_XML_PREDEFINED_ENTITY_GT_SYMBOL : { + if (OB_FAIL(dst.append(ObXmlParserBase::OB_XML_PREDEFINED_ENTITY_GT))) { + LOG_WARN("append char failed", K(ret)); + } + break; + } + case ObXmlParserBase::OB_XML_PREDEFINED_ENTITY_QUOT_SYMBOL : { + if (OB_FAIL(dst.append(ObXmlParserBase::OB_XML_PREDEFINED_ENTITY_QUOT))) { + LOG_WARN("append char failed", K(ret)); + } + break; + } + case ObXmlParserBase::OB_XML_PREDEFINED_ENTITY_APOS_SYMBOL : { + if (OB_FAIL(dst.append(ObXmlParserBase::OB_XML_PREDEFINED_ENTITY_APOS))) { + LOG_WARN("append char failed", K(ret)); + } + break; + } + default : { + if (OB_FAIL(dst.append(ptr, 1))) { + LOG_WARN("append char failed", K(ret)); + } + break; + } + } + ptr++; + } + return ret; +} + +int ObXmlParserUtils::escape_xml_text(const ObString &src, ObString &dst) +{ + INIT_SUCC(ret); + const char *ptr = src.ptr(); + int len = src.length(); + for (int i = 0; i < len && OB_SUCC(ret); i++) { + const char c = ptr[i]; + switch (c) { + case ObXmlParserBase::OB_XML_PREDEFINED_ENTITY_AMP_SYMBOL : { + if (OB_UNLIKELY(ObXmlParserBase::OB_XML_PREDEFINED_ENTITY_AMP_LEN != dst.write( + ObXmlParserBase::OB_XML_PREDEFINED_ENTITY_AMP, + ObXmlParserBase::OB_XML_PREDEFINED_ENTITY_AMP_LEN))) { + ret = OB_SIZE_OVERFLOW; + LOG_WARN("write amp char failed", K(ret), K(c)); + } + break; + } + case ObXmlParserBase::OB_XML_PREDEFINED_ENTITY_LT_SYMBOL : { + if (OB_UNLIKELY(ObXmlParserBase::OB_XML_PREDEFINED_ENTITY_LT_LEN != dst.write( + ObXmlParserBase::OB_XML_PREDEFINED_ENTITY_LT, + ObXmlParserBase::OB_XML_PREDEFINED_ENTITY_LT_LEN))) { + ret = OB_SIZE_OVERFLOW; + LOG_WARN("write lt char failed", K(ret), K(c)); + } + break; + } + case ObXmlParserBase::OB_XML_PREDEFINED_ENTITY_GT_SYMBOL : { + if (OB_UNLIKELY(ObXmlParserBase::OB_XML_PREDEFINED_ENTITY_GT_LEN != dst.write( + ObXmlParserBase::OB_XML_PREDEFINED_ENTITY_GT, + ObXmlParserBase::OB_XML_PREDEFINED_ENTITY_GT_LEN))) { + ret = OB_SIZE_OVERFLOW; + LOG_WARN("write gt char failed", K(ret), K(c)); + } + break; + } + case ObXmlParserBase::OB_XML_PREDEFINED_ENTITY_QUOT_SYMBOL : { + if (OB_UNLIKELY(ObXmlParserBase::OB_XML_PREDEFINED_ENTITY_QUOT_LEN != dst.write( + ObXmlParserBase::OB_XML_PREDEFINED_ENTITY_QUOT, + ObXmlParserBase::OB_XML_PREDEFINED_ENTITY_QUOT_LEN))) { + ret = OB_SIZE_OVERFLOW; + LOG_WARN("append quot char failed", K(ret), K(c)); + } + break; + } + case ObXmlParserBase::OB_XML_PREDEFINED_ENTITY_APOS_SYMBOL : { + if (OB_UNLIKELY(ObXmlParserBase::OB_XML_PREDEFINED_ENTITY_APOS_LEN != dst.write( + ObXmlParserBase::OB_XML_PREDEFINED_ENTITY_APOS, + ObXmlParserBase::OB_XML_PREDEFINED_ENTITY_APOS_LEN))) { + ret = OB_SIZE_OVERFLOW; + LOG_WARN("write apos char failed", K(ret), K(c)); + } + break; + } + default : { + if (OB_UNLIKELY(1 != dst.write(ptr + i, 1))) { + ret = OB_SIZE_OVERFLOW; + LOG_WARN("write normal char failed", K(ret), K(c)); + } + break; + } + } // end switch + } // end for + return ret; +} + +bool ObXmlParserUtils::has_xml_decl(const ObString& xml_text) +{ + const char* str = xml_text.ptr(); + int length = xml_text.length(); + int idx = 0; + bool res = false; + // skip space + while(idx < length && isspace(str[idx])) { + ++idx; + } + if (idx+4 < length + && str[idx] == '<' + && str[idx+1] == '?' + && str[idx+2] == 'x' + && str[idx+3] == 'm' + && str[idx+4] == 'l') { + res = true; + } + return res; +} + + +// XMLDecl ::= '' +#define OB_PARSE_XML_DECL_SKIP_SPACE \ +do{\ + while(idx < length && isspace(str[idx]))++idx; \ +}while(0); + +static int parse_name_value(const char* str, + int& idx, + int length, + const ObString& name, + const char*& value_pos, + int& value_len, + bool& has_value) +{ + INIT_SUCC(ret); + OB_PARSE_XML_DECL_SKIP_SPACE + if (idx + name.length() <= length && MEMCMP(name.ptr(), str+idx, name.length()) == 0) { + idx += name.length(); + if (idx < length && str[idx] == '=') { + idx += 1; + OB_PARSE_XML_DECL_SKIP_SPACE + if (idx < length && str[idx] == '"') { + // "xxx" + int start = ++idx; + while(idx < length && str[idx] != '"')++idx; + if (idx < length && str[idx] == '"') { + value_pos = str + start; + value_len = idx - start; + idx += 1; + } else { + ret = OB_INVALID_ARGUMENT; + LOG_DEBUG("not match", K(idx), K(name)); + } + } else if (idx < length && str[idx] == '\'') { + // 'xxx' + int start = ++idx; + while(idx < length && str[idx] != '\'')++idx; + if (idx < length && str[idx] == '\'') { + value_pos = str + start; + value_len = idx - start; + idx += 1; + } else { + ret = OB_INVALID_ARGUMENT; + LOG_DEBUG("not match", K(idx), K(name)); + } + } else { + ret = OB_INVALID_ARGUMENT; + LOG_DEBUG("not match", K(idx), K(name)); + } + } else { + ret = OB_INVALID_ARGUMENT; + LOG_DEBUG("not match", K(idx), K(name)); + } + + // has name, but length is zero + if (OB_SUCC(ret)) { + has_value = true; + } + } + return ret; +} + +int ObXmlParserUtils::parse_xml_decl_encoding(const ObString& xml_decl, bool &has_decl, ObString& encoding_str) +{ + INIT_SUCC(ret); + ObString src_version_str; + ObString src_standalone_str; + bool has_xml_decl = false; + bool has_version_value = false; + bool has_encoding_value = false; + bool has_standalone_value = false; + + has_decl = ObXmlParserUtils::has_xml_decl(xml_decl); + if (has_decl && OB_FAIL(ObXmlParserUtils::parse_xml_decl(xml_decl, + src_version_str, + has_version_value, + encoding_str, + has_encoding_value, + src_standalone_str, + has_standalone_value))) { + LOG_WARN("parse xml decl failed", K(ret), K(xml_decl)); + } + return ret; +} + +int ObXmlParserUtils::parse_xml_decl(const ObString& xml_decl, + ObString& version, + bool &has_version_value, + ObString& encoding, + bool &has_encoding_value, + ObString& standalone, + bool &has_standalone_value) +{ + INIT_SUCC(ret); + const char* str = xml_decl.ptr(); + int length = xml_decl.length(); + int idx = 0; + const char* version_start = nullptr; + int version_len = 0; + const char* encoding_start = nullptr; + int encoding_len = 0; + const char* standalone_start = nullptr; + int standalone_len = 0; + + // case sensitive, so no need tolower + if (idx + 4 < length + && str[idx] == '<' + && str[idx+1] == '?' + && str[idx+2] == 'x' + && str[idx+3] == 'm' + && str[idx+4] == 'l') { + // ') { + version.assign_ptr(version_start, version_len); + encoding.assign_ptr(encoding_start, encoding_len); + standalone.assign_ptr(standalone_start, standalone_len); + } else { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("not invalid xml decl", K(ret), K(xml_decl)); + } + } + } else { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("not invalid xml decl", K(ret), K(xml_decl)); + } + return ret; +} + +ObXmlStandaloneType ObXmlParserUtils::get_standalone_type(const ObString& src_standalone_str) { + ObXmlStandaloneType type = OB_XML_STANDALONE_NONE; + if (!src_standalone_str.empty()) { + if (src_standalone_str.compare("yes") == 0) { + type = OB_XML_STANDALONE_YES; + } else if (src_standalone_str.compare("no") == 0) { + type = OB_XML_STANDALONE_NO; + } + } + return type; +} + +/* + * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' | + * CombiningChar | Extender + * + * [5NS] NCName ::= (Letter | '_') (NCNameChar)* +*/ +int ObXmlParserUtils::check_local_name_legality(const ObString& localname) +{ + INIT_SUCC(ret); + uint64_t letter_count = 0; + const char* str = localname.ptr(); + unsigned codepoint = 0; + uint64_t last_pos = 0; + + rapidjson::MemoryStream input_stream(str, localname.length()); + while (OB_SUCC(ret) && input_stream.Tell() < localname.length()) { + last_pos = input_stream.Tell(); + bool first_codepoint = (last_pos == 0); + if (!rapidjson::UTF8::Decode(input_stream, &codepoint)) { + ret = OB_ERR_PARSER_SYNTAX; + LOG_WARN("ns is invalid", K(ret), K(localname), K(input_stream.Tell())); + } else { + uint64_t curr_pos = input_stream.Tell(); + if (first_codepoint) { + if ((IS_LETTER(codepoint) || (codepoint == 0x5f))) { // '_' + // do nothing + } else { + ret = OB_ERR_PARSER_SYNTAX; + LOG_WARN("ns is invalid", K(ret), K(localname)); + } + } else if (((IS_LETTER(codepoint)) || (IS_DIGIT(codepoint)) || + (codepoint == 0x2e) || (codepoint == 0x2d) || // '.', '-' + (codepoint == 0x5f) || // '_' + (IS_COMBINING(codepoint)) || + (IS_EXTENDER(codepoint)))) { + // do nothing + } else { + ret = OB_ERR_PARSER_SYNTAX; + LOG_WARN("ns is invalid", K(ret), K(localname)); + } + } + } + return ret; +} + +int ObXmlParserUtils::get_prefix_and_localname(const ObString& qname, ObString& prefix, ObString& localname) +{ + INIT_SUCC(ret); + const char* str = qname.ptr(); + int sep_pos = -1; + for (int i = 0; i < qname.length(); ++i) { + if (str[i] == ':') { + sep_pos = i; + break; + } + } + if (sep_pos > 0) { + prefix.assign_ptr(qname.ptr(), sep_pos); + localname.assign_ptr(qname.ptr() + sep_pos + 1, qname.length() - sep_pos - 1); + } else { + // no ns prefix + localname = qname; + } + + if (OB_FAIL(ret)) { + } else if (OB_FAIL(check_local_name_legality(localname))) { + LOG_WARN("localname is invalid", K(ret), K(qname), K(sep_pos), K(prefix), K(localname)); + } + return ret; +} + +bool ObXmlParserUtils::is_namespace_attribute(ObXmlAttribute* attr) +{ + bool res = false; + if (OB_NOT_NULL(attr)) { + if (attr->get_prefix().compare(ObXmlConstants::XMLNS_STRING) == 0) { + res = true; + } else if (attr->get_prefix().empty() && attr->get_key().compare(ObXmlConstants::XMLNS_STRING) == 0) { + res = true; + } + } + return res; +} + +bool ObXmlParserUtils::is_entity_ref(ObString &input_str, int64_t index, ObString &ref, int64_t &ref_len) +{ + bool res = false; + const char *ptr = input_str.ptr(); + if (index < input_str.length()) { + ObString tmp_str; + // length = 4 + if (index + 3 < input_str.length()) { + tmp_str.assign_ptr(ptr+index, 4); + if (tmp_str.case_compare("<") == 0) { + res = true; + ref = ObString::make_string("<"); + ref_len = 4; + } else if (tmp_str.case_compare(">") == 0) { + res = true; + ref = ObString::make_string(">"); + ref_len = 4; + } + } + // length = 6 + if (!res && (index + 5 < input_str.length())) { + tmp_str.assign_ptr(ptr+index, 6); + if (tmp_str.case_compare(""") == 0) { + res = true; + ref = ObString::make_string("\""); + ref_len = 6; + } else if (tmp_str.case_compare("'") == 0) { + res = true; + ref = ObString::make_string("'"); + ref_len = 6; + } + } + // length = 5 + if (!res && (index + 4 < input_str.length())) { + tmp_str.assign_ptr(ptr+index, 5); + if (tmp_str.case_compare("&") == 0) { + res = true; + ref = ObString::make_string("&"); + ref_len = 5; + } + } + } + return res; +} + +// ObXmlParserUtils end + + +} // namespace common +} // namespace oceanbase diff --git a/deps/oblib/src/lib/xml/ob_xml_parser.h b/deps/oblib/src/lib/xml/ob_xml_parser.h new file mode 100644 index 0000000000..20e7d0d259 --- /dev/null +++ b/deps/oblib/src/lib/xml/ob_xml_parser.h @@ -0,0 +1,271 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#ifndef OCEANBASE_XML_PARSER_H_ +#define OCEANBASE_XML_PARSER_H_ +#include "lib/xml/ob_xml_tree.h" +#include "lib/string/ob_string_buffer.h" + +struct _xmlParserCtxt; +struct _xmlSAXHandler; + +namespace oceanbase { +namespace common { + +enum ObXmlParserOption { + OB_XML_PARSE_RECOVER = 1<<1, /* continue parse whenever some error occur */ + OB_XML_PARSE_NOT_IGNORE_SPACE = 1<<2, + OB_XML_PARSE_SYNTAX_CHECK = 1<<3, + OB_XML_PARSE_CONTENT_ALLOW_XML_DECL = 1 << 4, + OB_XML_PARSE_CONTAINER_LAZY_SORT = 1 << 5, + OB_XML_PARSE_NOT_ENTITY_REPLACE = 1 << 6, +}; + + +// ObXmlParserBase currently used for constructing xml tree and some common functions. +// If having same complex logic, please first add to ObLibXml2SaxParser +class ObXmlParserBase { +public: + static constexpr int OB_XML_PARSER_MAX_DEPTH = 1000; + + static constexpr char OB_XML_PREDEFINED_ENTITY_LT_SYMBOL = '<'; + static constexpr const char* const OB_XML_PREDEFINED_ENTITY_LT = "<"; + static constexpr int OB_XML_PREDEFINED_ENTITY_LT_LEN = 1 + 2 + 1; + static constexpr char OB_XML_PREDEFINED_ENTITY_GT_SYMBOL = '>'; + static constexpr const char* const OB_XML_PREDEFINED_ENTITY_GT = ">"; + static constexpr int OB_XML_PREDEFINED_ENTITY_GT_LEN = 1 + 2 + 1; + static constexpr char OB_XML_PREDEFINED_ENTITY_QUOT_SYMBOL = '"'; + static constexpr const char* const OB_XML_PREDEFINED_ENTITY_QUOT = """; + static constexpr int OB_XML_PREDEFINED_ENTITY_QUOT_LEN = 1 + 4 + 1; + static constexpr char OB_XML_PREDEFINED_ENTITY_APOS_SYMBOL = '\''; + static constexpr const char* const OB_XML_PREDEFINED_ENTITY_APOS = "'"; + static constexpr int OB_XML_PREDEFINED_ENTITY_APOS_LEN = 1 + 4 + 1; + static constexpr char OB_XML_PREDEFINED_ENTITY_AMP_SYMBOL = '&'; + static constexpr const char* const OB_XML_PREDEFINED_ENTITY_AMP = "&"; + static constexpr int OB_XML_PREDEFINED_ENTITY_AMP_LEN = 1 + 3 + 1; + + ObXmlParserBase(ObMulModeMemCtx* ctx): + allocator_(ctx->allocator_), + document_(nullptr), + cur_node_(nullptr), + depth_(0), + ctx_(ctx) + { + } + virtual ~ObXmlParserBase() {} + + ObIAllocator* get_allocator() {return allocator_;} + virtual int parse_document(const ObString& xml_text) = 0; + virtual int parse_content(const ObString& xml_text) = 0; + + virtual int start_document(ObXmlDocument* node); + + virtual int end_document(); + virtual int start_element(ObXmlElement* node); + virtual int end_element(); + + int add_or_merge_text(const ObString& text); + int remove_prev_empty_text(); + virtual int add_text_node(ObXmlText* node); + + virtual int comment(ObXmlText* node); + + virtual int processing_instruction(ObXmlAttribute* node); + virtual int cdata_block(ObXmlText* node); + + ObXmlDocument* document() {return document_;} + void set_cur_node(ObXmlNode* node) {cur_node_ = node;} + ObXmlNode* cur_node() {return cur_node_;} + + bool reach_max_depth(); + + void set_only_syntax_check() { + options_ = options_ | OB_XML_PARSE_SYNTAX_CHECK; + } + bool is_only_syntax_check() { + return (options_ & OB_XML_PARSE_SYNTAX_CHECK) != 0; + } + + bool is_ignore_space() { + return (options_ & OB_XML_PARSE_NOT_IGNORE_SPACE) == 0; + } + void set_not_ignore_space() { + options_ = options_ | OB_XML_PARSE_NOT_IGNORE_SPACE; + } + + bool is_recover_mode() { + return (options_ & OB_XML_PARSE_RECOVER) != 0; + } + void set_recover_mode() { + options_ = options_ | OB_XML_PARSE_RECOVER; + } + + void set_member_sort_policy() { + options_ |= OB_XML_PARSE_CONTAINER_LAZY_SORT; + } + + bool is_member_sort_lazy() { + return (options_ & OB_XML_PARSE_CONTAINER_LAZY_SORT) != 0 ; + } + + bool is_content_allow_xml_decl() { + return (options_ & OB_XML_PARSE_CONTENT_ALLOW_XML_DECL) != 0; + } + void set_content_allow_xml_decl() { + options_ = options_ | OB_XML_PARSE_CONTENT_ALLOW_XML_DECL; + } + + bool is_entity_replace() { + return (options_ & OB_XML_PARSE_NOT_ENTITY_REPLACE) == 0; + } + void set_not_entity_replace() { + options_ = options_ | OB_XML_PARSE_NOT_ENTITY_REPLACE; + } + + bool is_document_parse() {return document_ != nullptr && ObMulModeNodeType::M_DOCUMENT == document_->type();} + bool is_content_parse() {return document_ == nullptr || ObMulModeNodeType::M_CONTENT == document_->type();} + +protected: + ObXmlNode* get_last_child(ObXmlNode* cur_node); + ObXmlNode* get_first_child(ObXmlNode* cur_node); + +protected: + ObIAllocator* allocator_ = nullptr; + ObXmlDocument* document_ = nullptr; + ObXmlNode* cur_node_ = nullptr; + int depth_ = 0; + int64_t options_ = 0; + ObMulModeMemCtx* ctx_ = nullptr; +}; + + +// should use ObXmlParser, not this class +// the impl of ObLibXml2SaxParser is in ob_libxml2_sax_handler.cpp +// to aviod importing to much libxml2 header files +class ObLibXml2SaxParser : public ObXmlParserBase { +public: + + ObLibXml2SaxParser(ObMulModeMemCtx* ctx): + ObXmlParserBase(ctx), + ctxt_(nullptr), + ns_cnt_stack_(), + ns_stack_() {} + virtual ~ObLibXml2SaxParser(); + + virtual int parse_document(const ObString& xml_text); + virtual int parse_content(const ObString& xml_text); + + int add_text_node(ObMulModeNodeType type, const char* value, int len); + + int start_document(); + int start_element(const char* name, const char** attrs); + int end_element(); + int characters(const char *ch, int len); + int processing_instruction(const ObString& target, const ObString& value); + + // for error handling + void stop_parse(int code); + void set_stop_parse(bool val) {stop_parse_ = val;} + bool is_stop_parse() {return stop_parse_;} + int on_error(int code); + void set_errno(int code) {errno_ = code;} + int get_last_errno() {return errno_;} + + _xmlParserCtxt* get_libxml2_ctxt() {return ctxt_;} + +private: + int init(const ObString& xml_text, bool skip_start_blank); + int init_parse_context(); + int init_xml_text(const ObString& xml_text, bool skip_start_blank); + int check(); + + int alloc_text_node(ObMulModeNodeType type, + const char* src_value, + int len, + ObXmlText*& node); + int escape_xml_text(const ObString &src_attr_value, ObString &dst_attr_value); + int construct_text_value(const ObString &src_attr_value, ObString &attr_value); + + // helper method + int set_xml_decl(const ObString& xml_decl_str); + int set_element_name(ObXmlElement& element, const char* src_name); + int add_element_attr(ObXmlElement& element, const char* src_attr_name, const char* src_attr_value); + + // for namespace + int push_namespace(ObXmlAttribute* ns); + int pop_namespace(); + int get_namespace(const ObString& name, bool use_default_ns, ObXmlAttribute*& ns); + int set_element_namespace(ObXmlElement& element); + + int to_ob_xml_errno(int code); + + int get_parse_byte_num(); + bool is_parsed_all_input(); + +private: + int errno_ = 0; + bool stop_parse_ = false; + + ObString xml_text_; + + _xmlParserCtxt* ctxt_ = nullptr; + _xmlSAXHandler* old_sax_ = nullptr; + + ObArray ns_cnt_stack_; + ObArray ns_stack_; + DISALLOW_COPY_AND_ASSIGN(ObLibXml2SaxParser); +}; +// use ObXmlParser for parser +typedef ObLibXml2SaxParser ObXmlParser; + + +class ObXmlParserUtils { +public: + + // just for simple situation, if use complex, use ObXmlParser directly + // and may return null when xml_text is empty or whitespace + static int parse_document_text(ObMulModeMemCtx* ctx, const ObString& xml_text, ObXmlDocument*&node, int64_t option = 0); + static int parse_content_text(ObMulModeMemCtx* ctx, const ObString& xml_text, ObXmlDocument*&node, int64_t option = 0); + // has decl + static bool has_xml_decl(const ObString& xml_text); + static int parse_xml_decl_encoding(const ObString& xml_decl, bool &has_decl, ObString& encoding_str); + static int parse_xml_decl(const ObString& xml_decl, + ObString& version, + bool &has_version_value, + ObString& encoding, + bool &has_encoding_value, + ObString& standalone, + bool &has_standalone_value); + + static ObXmlStandaloneType get_standalone_type(const ObString& src_standalone_str); + + + // escape character to predefine entity + // just append to dst, don't clear + static int escape_xml_text(const ObString &src, ObStringBuffer &dst); + // use ObString as buffer, caller need ensure dst have enough memory, + // or return size overflow error + static int escape_xml_text(const ObString &src, ObString &dst); + static int get_xml_escape_char_length(const char c); + static int get_xml_escape_str_length(const ObString &str); + + // just set ptr, no content copy + static int check_local_name_legality(const ObString& localname); + static int get_prefix_and_localname(const ObString& qname, ObString& prefix, ObString& localname); + static bool is_namespace_attribute(ObXmlAttribute* attr); + static bool is_entity_ref(ObString &input_str, int64_t index, ObString &ref, int64_t &ref_len); +}; + +} // namespace common +} // namespace oceanbase + +#endif //OCEANBASE_XML_PARSER_H_ \ No newline at end of file diff --git a/deps/oblib/src/lib/xml/ob_xml_tree.cpp b/deps/oblib/src/lib/xml/ob_xml_tree.cpp new file mode 100644 index 0000000000..e023d507e6 --- /dev/null +++ b/deps/oblib/src/lib/xml/ob_xml_tree.cpp @@ -0,0 +1,1288 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + * This file contains interface support for the xml tree abstraction. + */ +#define USING_LOG_PREFIX LIB +#include "lib/utility/ob_hang_fatal_error.h" +#include "common/ob_smart_call.h" +#include "lib/ob_errno.h" +#include "lib/xml/ob_xml_tree.h" +#include "lib/xml/ob_xml_bin.h" +#include "lib/xml/ob_xml_util.h" +#include "lib/xml/ob_xml_parser.h" + +namespace oceanbase { +namespace common { + +ObXmlElement::ObXmlElement(ObMulModeNodeType type, ObMulModeMemCtx *ctx) + : ObXmlNode(type, ctx), + prefix_(), + attributes_(nullptr), + name_spaces_(nullptr), + flags_(0), + is_init_(false) +{} + +ObXmlElement::ObXmlElement(ObMulModeNodeType type, ObMulModeMemCtx *ctx, const ObString& tag) + : ObXmlNode(type, ctx), + prefix_(), + tag_info_(tag), + attributes_(nullptr), + name_spaces_(nullptr), + flags_(0), + is_init_(false) +{} + +int ObXmlElement::init() +{ + INIT_SUCC(ret); + attributes_ = static_cast (get_allocator()->alloc(sizeof(ObXmlNode))); + if (OB_ISNULL(attributes_)) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("allocate row buffer failed at ObJsonDecimal", K(ret)); + } else { + attributes_ = new (attributes_) ObXmlNode(ObMulModeNodeType::M_ATTRIBUTE, ctx_); + attributes_->set_parent(this); + attributes_->set_flags(is_lazy_sort() ? MEMBER_LAZY_SORTED: 0); + is_init_ = true; + } + return ret; +} +int ObXmlNode::append(ObIMulModeBase* node) +{ + ObXmlNode* n_node = static_cast(node); + int ret = ObLibContainerNode::append(n_node); + if (OB_SUCC(ret)) { + update_serialize_size(node->get_serialize_size()); + } + return ret; +} + +int ObXmlNode::insert(int64_t pos, ObIMulModeBase* node) +{ + ObXmlNode* n_node = static_cast(node); + int ret = ObLibContainerNode::insert(pos, n_node); + if (OB_SUCC(ret)) { + update_serialize_size(node->get_serialize_size()); + } + return ret; +} + +int ObXmlNode::remove(int64_t pos) +{ + int64_t delta_size = 0; + if (size() > pos && pos >= 0) { + delta_size = -1 * (static_cast(at(pos)))->get_serialize_size(); + } + + int ret = ObLibContainerNode::remove(pos); + if (OB_SUCC(ret)) { + update_serialize_size(delta_size); + } + return ret; +} + +int ObXmlNode::remove(ObIMulModeBase* node) +{ + ObXmlNode* n_node = static_cast(node); + int ret = ObLibContainerNode::remove(n_node); + if (OB_SUCC(ret)) { + update_serialize_size(-1 * n_node->get_serialize_size()); + } + return ret; +} + +int ObXmlNode::get_value(ObIMulModeBase*& value, int64_t index) +{ + INIT_SUCC(ret); + ObIMulModeBase* tmp = nullptr; + if (index == -1) { + value = this; + } else if (OB_ISNULL(tmp = at(index))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("fail to get specified child", K(ret), K(index), K(size())); + } else { + value = tmp; + } + return ret; +} + +// tree base update use as leaf node : text +int ObXmlNode::update(int64_t pos, ObIMulModeBase* new_node) +{ + INIT_SUCC(ret); + ObXmlNode* n_node = NULL; + if (OB_ISNULL(new_node)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("new node input is null", K(ret)); + } else { + int64_t delta_size = 0; + if (size() > pos && pos >= 0) { + delta_size = at(pos)->get_serialize_size(); + } + + n_node = static_cast(new_node); + if (OB_FAIL(ObLibContainerNode::update(pos, n_node))) { + LOG_WARN("fail to update new node in pos", K(ret), K(pos)); + } else { + delta_size -= n_node->get_serialize_size(); + update_serialize_size(delta_size); + } + } + return ret; +} + +// new node replace old node in element child +int ObXmlNode::update(ObIMulModeBase* old_node, ObIMulModeBase* new_node) +{ + INIT_SUCC(ret); + ObXmlNode* o_node = NULL; + ObXmlNode* n_node = NULL; + if (OB_ISNULL(old_node)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("node input is null", K(ret)); + } else if (OB_ISNULL(new_node)) { + if (OB_FAIL(remove(o_node))) { + LOG_WARN("fail remove old node", K(ret)); + } + } else { + o_node = static_cast(old_node); + n_node = static_cast(new_node); + if (OB_FAIL(ObLibContainerNode::update(o_node, n_node))) { + LOG_WARN("fail to update new node in pos", K(ret)); + } else { + int64_t delta_size = o_node->get_serialize_size() - n_node->get_serialize_size(); + update_serialize_size(delta_size); + } + } + return ret; +} + +bool ObXmlNode::is_equal_node(const ObIMulModeBase* other) +{ + bool res = false; + + if (OB_ISNULL(other)) { + } else if (other->is_tree()) { + res = static_cast(this) == other; + } + + return res; +} + +bool ObXmlNode::is_node_before(const ObIMulModeBase* other) +{ + bool res = false; + + if (OB_ISNULL(other)) { + } else if (other->is_tree()) { + res = static_cast(this) < other; + } + + return res; +} +bool ObXmlElement::is_element(ObString tag) +{ + return 0 == tag_info_.compare(tag); +} + +bool ObXmlAttribute::is_pi(ObString target) +{ + return 0 == name_.compare(target); +} + +int ObXmlNode::set_flag_by_descandant() +{ + INIT_SUCC(ret); + + ObLibContainerNode* current = this; + ObLibContainerNode::tree_iterator iter(current, PRE_ORDER, ctx_->allocator_); + + if (!(type() == M_ELEMENT || type() == M_DOCUMENT || type() == M_CONTENT || type() == M_UNPARSED || type() == M_UNPARESED_DOC)) { + } else if (OB_FAIL(iter.start())) { + LOG_WARN("fail to prepare scan iterator", K(ret)); + } else { + ObLibContainerNode* tmp = nullptr; + + while (OB_SUCC(iter.next(tmp))) { + ObXmlNode* xnode = static_cast(tmp); + + if (xnode->type() == M_ELEMENT) { + ObXmlElement* tmp = static_cast(xnode); + if (tmp->is_unparse()) { + (static_cast(this))->set_unparse(1); + break; + } + } + } + + if (ret == OB_ITER_END || ret == OB_SUCCESS) { + ret = OB_SUCCESS; + } else { + LOG_WARN("fail scan liberty tree", K(ret), K(type())); + } + } + + return ret; +} + +bool ObXmlElement::has_attribute(const ObString& ns_value, const ObString& name) // name if exist +{ + return NULL != get_attribute_by_name(ns_value, name); +} + +bool ObXmlElement::has_attribute_with_ns(ObXmlAttribute *ns) +{ + bool is_found = false; + for (int64_t i = 0; !is_found && i < attribute_size(); i++) { + ObXmlAttribute *t_attr = static_cast(attributes_->at(i)); + if (t_attr->type() == ObMulModeNodeType::M_ATTRIBUTE + && ns == t_attr->get_ns()) { + is_found = true; + } + } + return is_found; +} + +// only check attribute and ns name, do not check attribute with ns +int ObXmlElement::get_attribute_pos(ObMulModeNodeType xml_type, const ObString& name, int64_t &pos) +{ + int ret = OB_SUCCESS; + bool is_found = false; + if (xml_type != M_ATTRIBUTE && xml_type != M_NAMESPACE) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("invalid xml node type", K(ret)); + } else { + for (int64_t i = 0; !is_found && i < attribute_size(); i++) { + if (attributes_->at(i)->type() == xml_type && + 0 == name.case_compare(attributes_->at(i)->get_key())) { + pos = i; + is_found = true; + } + } + if (!is_found) { + ret = OB_SEARCH_NOT_FOUND; + } + } + + return ret; +} + +ObXmlAttribute* ObXmlElement::get_attribute_by_name(const ObString& ns_value, const ObString& name) // get attr by name +{ + ObXmlAttribute* res_node = NULL; + ObXmlAttribute* t_attr = NULL; + for (int64_t i = 0; i < attribute_size(); i++) { + if (attributes_->at(i)->type() == ObMulModeNodeType::M_ATTRIBUTE + && 0 == name.compare(attributes_->at(i)->get_key())) { + t_attr = dynamic_cast(attributes_->at(i)); + if (!ns_value.empty() + && ((OB_ISNULL(t_attr->get_ns()) + && !(t_attr->get_prefix().compare(ObXmlConstants::XML_STRING) == 0 + && ns_value.compare(ObXmlConstants::XML_NAMESPACE_SPECIFICATION_URI) == 0)) // ns_value not null but attr without ns + || (OB_NOT_NULL(t_attr->get_ns()) // attr with ns but ns mismatch + && 0 != ns_value.compare(t_attr->get_ns()->get_value())))) { // ns mismatch do nothing + } else if (ns_value.empty() && OB_NOT_NULL(t_attr->get_ns())) { // input prefix is null but attr ns has value + } else { + res_node = t_attr; + break; + } + } + } + return res_node; +} + +ObXmlAttribute* ObXmlElement::get_ns_by_name(const ObString& name) +{ + ObXmlAttribute* res_node = NULL; + for (int64_t i = 0; i < attribute_size(); i++) { + if (attributes_->at(i)->type() == ObMulModeNodeType::M_NAMESPACE + && 0 == name.compare(attributes_->at(i)->get_key())) { + res_node = dynamic_cast(attributes_->at(i)); + break; + } + } + return res_node; +} + +int ObXmlElement::get_namespace_default(ObIArray &value) +{ + INIT_SUCC(ret); + ObArray t_value; + if (OB_FAIL(get_namespace_list(t_value))) { + LOG_WARN("fail to get all ns from attribute", K(ret)); + } else { + for (int64_t i = 0; OB_SUCC(ret) && i < t_value.size(); i++) { + if (0 == dynamic_cast(dynamic_cast(t_value.at(i)))->get_key().compare(ObXmlConstants::XMLNS_STRING)) { + ret = value.push_back(t_value.at(i)); + } + } + } + return ret; +} + +bool ObXmlElement::is_invalid_namespace(ObXmlAttribute* ns) +{ + return get_ns()->type() == ObMulModeNodeType::M_NAMESPACE + && (0 == ns->get_key().compare(get_ns()->get_key())) + && (0 == ns->get_value().compare(get_ns()->get_value())); +} + +int ObXmlElement::get_value(ObString& value, int64_t index) +{ + int ret = OB_SUCCESS; + UNUSED(index); + UNUSED(value); + ret = OB_NOT_SUPPORTED; + LOG_WARN("element has no value", K(ret)); + return ret; +} + +int ObXmlNode::get_range(int64_t start_idx, int64_t last_idx, ObIArray &res, ObMulModeFilter* filter) +{ + INIT_SUCC(ret); + + IndexRange range = ObLibContainerNode::get_effective_range(start_idx, last_idx); + + iterator first = begin() + range.first; + iterator finish = end(); + iterator last = begin() + range.second; + for (; OB_SUCC(ret) && first < finish && first <= last ; ++first) { + ObIMulModeBase* tmp = *first; + bool filtered = false; + if (OB_ISNULL(filter)) { + filtered = true; + } else if (OB_FAIL((*filter)(tmp, filtered))) { + LOG_WARN("fail to filter xnode", K(ret)); + } + if (OB_SUCC(ret) && filtered && OB_FAIL(res.push_back(tmp))) { + LOG_WARN("fail to store scan result", K(ret)); + } + } + + return ret; +} + +int ObXmlNode::get_children(ObIArray &res, ObMulModeFilter* filter) +{ + return get_range(-1, static_cast(-1), res, filter); +} + +int ObXmlNode::get_node_count(ObMulModeNodeType node_type, int &count) +{ + INIT_SUCC(ret); + count = 0; + for (int i = 0; i < size(); i++) { + if (at(i)->type() == node_type) ++count; + } + return ret; +} + +int ObXmlNode::get_descendant(ObIArray& res, scan_type type, ObMulModeFilter* filter) +{ + INIT_SUCC(ret); + + ObLibContainerNode* current = this; + ObLibContainerNode::tree_iterator iter(current, type, ctx_->allocator_); + + if (OB_FAIL(iter.start())) { + LOG_WARN("fail to prepare scan iterator", K(ret)); + } else { + ObLibContainerNode* tmp = nullptr; + while (OB_SUCC(iter.next(tmp))) { + bool filtered = false; + ObXmlNode* xnode = static_cast(tmp); + if (OB_ISNULL(filter)) { + filtered = true; + } else if (OB_FAIL((*filter)(xnode, filtered))) { + LOG_WARN("fail to filter xnode", K(ret)); + } + if (OB_SUCC(ret) && filtered && OB_FAIL(res.push_back(xnode))) { + LOG_WARN("fail to store scan result", K(ret)); + } + } + + if (ret == OB_ITER_END) { + ret = OB_SUCCESS; + } else { + LOG_WARN("fail scan liberty tree", K(ret), K(type)); + } + } + + return ret; +} + +int ObXmlNode::get_children(const ObString& key, ObIArray& res, ObMulModeFilter* filter) +{ + INIT_SUCC(ret); + IterRange range; + if (OB_FAIL(ObLibContainerNode::get_children(key, range))) { + LOG_WARN("fail to get range child", K(ret)); + } else if (!range.first.end()) { + iterator start(range.first); + iterator last(range.second); + iterator finish = iterator(ObLibContainerNode::sorted_end()); + for (; OB_SUCC(ret) && start <= last && start < finish; start++) { + bool filtered = false; + if (OB_ISNULL(filter)) { + filtered = true; // do not need filter + } else if (OB_FAIL((*filter)(*start, filtered))) { + LOG_WARN("fail to filter xnode", K(ret)); + } + if (OB_SUCC(ret) && filtered && OB_FAIL(res.push_back(*start))) { + LOG_WARN("fail to store scan result", K(ret)); + } + } + } + return ret; +} + +int ObXmlNode::get_before(ObIArray &res, ObMulModeFilter* filter) +{ + INIT_SUCC(ret); + int64_t pos = get_index(); + + return get_range(-1, pos - 1, res, filter); +} + +int ObXmlNode::get_after(ObIArray &res, ObMulModeFilter* filter) +{ + INIT_SUCC(ret); + + int64_t pos = get_index(); + + return get_range(pos + 1, static_cast(-1), res, filter); +} + +void ObXmlNode::update_serialize_size(int64_t size) +{ + serialize_size_ += size; + ObXmlNode* parent = get_parent(); + + while (parent) { + parent->set_delta_serialize_size(size); + parent = parent->get_parent(); + }; +} + +int64_t ObXmlElement::get_serialize_size() +{ + int64_t res = 0; + if (serialize_size_ <= 0) { + serialize_size_ = 0; + iterator finish = end(); + int64_t children_num = 0; + for (iterator iter = begin(); iter < finish; iter++, children_num++) { + ObXmlNode* cur = static_cast(*iter); + res += cur->get_serialize_size(); + } + + if (OB_NOT_NULL(attributes_)) { + iterator finish = attributes_->end(); + for (iterator iter = attributes_->begin(); iter < finish; iter++, children_num++) { + ObXmlNode* cur = static_cast(*iter); + res += cur->get_serialize_size(); + } + } + // binary common header + res += sizeof(uint8_t) * 2 + 2 * sizeof(uint64_t); + + res += sizeof(uint16_t); + if (prefix_.length() > 0) { + res += sizeof(uint16_t) + prefix_.length(); + } + + // node key stirng key entry + res += get_key().length(); + res += (sizeof(uint32_t) * 4 + sizeof(uint8_t)) * children_num; + serialize_size_ = res; + } else { + res = serialize_size_; + } + + return res; +} + +int ObXmlNode::get_raw_binary(common::ObString &out, ObIAllocator *allocator) +{ + INIT_SUCC(ret); + ObXmlBin bin(ctx_); + + ObIAllocator* used_allocator = allocator == nullptr ? ObIMulModeBase::allocator_ : allocator; + + if (OB_ISNULL(used_allocator)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid allocator null param", K(ret)); + } else if (OB_FAIL(bin.parse_tree(this))) { + LOG_WARN("failed to serialize to bin", K(ret)); + } else if (OB_FAIL(bin.get_raw_binary(out, used_allocator))) { + LOG_WARN("failed to get bin", K(ret)); + } + return ret; +} + +int ObXmlElement::get_key(ObString& res, int64_t index) +{ + INIT_SUCC(ret); + UNUSED(index); + res.assign_ptr(tag_info_.ptr(), tag_info_.length()); + return ret; +} + +int ObXmlElement::add_element(ObXmlNode* xnode, bool ns_check, int pos) +{ + INIT_SUCC(ret); + if (OB_ISNULL(xnode)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("xml node is null", K(ret)); + } else if (pos > count() || pos < -1) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("pos is invalid", K(ret)); + } else { + xnode->set_parent(this); + if (ns_check && OB_FAIL(Validate_XML_Tree_Legitimacy(xnode, OperaType::APPEND, ValidType::ALL))) { + LOG_WARN("add element failed", K(ret)); + } else if (pos == -1 && OB_FAIL(append(xnode))) { + LOG_WARN("element fail to add xnode in the end", K(ret)); + } else if (pos >= 0 && OB_FAIL(insert(pos, xnode))) { + LOG_WARN("element fail to insert xnode in pos", K(ret)); + } else { + set_empty(0); + update_serialize_size(xnode->get_serialize_size()); + } + } + return ret; +} + +int ObXmlElement::update_element(ObXmlNode* xnode, int pos, bool ns_check) +{ + INIT_SUCC(ret); + if (OB_ISNULL(xnode)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("update element is null", K(ret)); + } else { + xnode->set_parent(this); + if (ns_check && OB_FAIL(Validate_XML_Tree_Legitimacy(xnode, OperaType::APPEND, ValidType::ALL))) { + LOG_WARN("update element failed", K(ret)); + } else if (OB_FAIL(update(pos, xnode))) { + LOG_WARN("update xml node with pos failed", K(ret)); + } + } + return ret; +} + +int ObXmlElement::remove_element(ObXmlNode* xnode) +{ + INIT_SUCC(ret); + if (OB_ISNULL(xnode)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("remove element is null", K(ret)); + } else if (OB_FAIL(remove(xnode))) { + LOG_WARN("remove xml element with pos failed", K(ret)); + } + return ret; +} + +int ObXmlElement::compare(const ObString& key, int& res) // 0, 1,-1 +{ + INIT_SUCC(ret); + res = key.compare(tag_info_); + return ret; +} + +int ObXmlElement::get_value(ObIArray &value, const ObString& key_name) +{ + INIT_SUCC(ret); + int child_size = size(); + for (int i = 0; OB_SUCC(ret) && i < child_size; i ++) { + if (OB_FAIL(value.push_back(static_cast(static_cast(this)->at(i))))) { + LOG_WARN("add child failed", K(ret)); + } + } + int attr_size = attribute_size(); + for (int i = 0; OB_SUCC(ret) && i < attr_size; i ++) { + if (type() != ObMulModeNodeType::M_ATTRIBUTE) { + } else if (OB_FAIL(value.push_back(attributes_->at(i)))) { + LOG_WARN("add attribute failed", K(ret), K(i)); + } + } + return ret; +} + +int ObXmlElement::get_element_list(ObIArray &value) +{ + INIT_SUCC(ret); + if (OB_FAIL(ObXmlNode::get_children(value))) { + LOG_WARN("get children element failed", K(ret)); + } + return ret; +} + +int ObXmlElement::get_element_by_name(const ObString& prefix, const ObString& name, ObIArray &value) // get element by name +{ + INIT_SUCC(ret); + ObArray t_value; + ObXmlNode* t_node = NULL; + if (OB_FAIL(ObXmlNode::get_children(t_value))) { + LOG_WARN("get children element failed", K(ret), K(prefix), K(name)); + } else { + for (int64_t i = 0; OB_SUCC(ret) && i < t_value.size(); i++) { + t_node = dynamic_cast(t_value.at(i)); + if (t_node->type() == ObMulModeNodeType::M_ELEMENT + && 0 == name.compare(t_node->get_key())) { + if (!prefix.empty() && 0 != prefix.compare(dynamic_cast(t_node)->get_prefix())) { // ns mismatch do nothing + } else if (prefix.empty() && !dynamic_cast(t_node)->get_prefix().empty()) { // input prefix is null but attr prefix has value + } else { + ret = value.push_back(t_value.at(i)); + break; + } + } + } + } + return ret; +} + +int ObXmlElement::add_attribute(ObXmlNode* xnode, bool ns_check, int pos) +{ + INIT_SUCC(ret); + if (OB_ISNULL(xnode)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("xml node is null", K(ret)); + } else if (!is_init_) { // init attribute if first use, + attributes_ = static_cast (get_allocator()->alloc(sizeof(ObXmlNode))); + if (OB_ISNULL(attributes_)) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("allocate row buffer failed at ObJsonDecimal", K(ret)); + } else { + attributes_ = new (attributes_) ObXmlNode(ObMulModeNodeType::M_ATTRIBUTE, ctx_); + attributes_->set_parent(this); + is_init_ = true; + } + } + if (OB_FAIL(ret)) { + } else if (pos > attributes_->count() || pos < -1) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("pos is invalid", K(ret)); + } else { + xnode->set_parent(this); + if (ns_check && OB_FAIL(Validate_XML_Tree_Legitimacy(xnode, OperaType::APPEND, ValidType::ALL))) { + LOG_WARN("update namespace failed", K(ret)); + } else if (pos == -1 && OB_FAIL(attributes_->append(xnode))) { + LOG_WARN("attribute fail to add xnode in the end", K(ret)); + } else if (pos >= 0 && OB_FAIL(attributes_->insert(pos, xnode))) { + LOG_WARN("attribute fail to insert xnode in pos", K(ret)); + } + } + return ret; +} + +int ObXmlElement::get_valid_ns_from_parent(NsMap &ns_map, ObXmlNode* cur_node) +{ + INIT_SUCC(ret); + ObXmlNode* t_node = cur_node; + while(OB_SUCC(ret) && OB_NOT_NULL(t_node->get_parent())) { + t_node = t_node->get_parent(); + ObXmlElement *t_element = static_cast(t_node); + for (int i = 0; OB_SUCC(ret) && i < t_element->attribute_size(); i ++) { + if (!is_init_) { + } else if (is_init_ && OB_ISNULL(t_element->attributes_->at(i))) { + LOG_WARN("node in pos is null", K(ret), K(i)); + } else if (t_element->attributes_->at(i)->type() == ObMulModeNodeType::M_NAMESPACE + && OB_ISNULL(ns_map.get(t_element->attributes_->at(i)->get_key()))) { + ret = add_update_ns_map(ns_map, t_element->attributes_->at(i)->get_key(), t_element->attributes_->at(i)); + } + } + } + return ret; +} + +int ObXmlElement::Validate_XML_Tree_Legitimacy(ObXmlNode* node, int8_t operator_data, int8_t valid_type) +{ + INIT_SUCC(ret); + ObXmlElement::NsMap ns_map; + ObXmlElement::NsArray ns_array; + if (OB_ISNULL(node)) { // report error + ret = OB_ERR_UNEXPECTED; + LOG_WARN("node input is null", K(ret)); + } else if (OB_FAIL(ns_map.create(64, "XML_PARENT_NS"))) { + LOG_WARN("ns map create failed", K(ret)); + } else if (OB_FAIL(get_valid_ns_from_parent(ns_map, node))) { + LOG_WARN("get ns from parent failed", K(ret)); + } else { + for (NsMap::iterator it = ns_map.begin(); OB_SUCC(ret) && it != ns_map.end(); it++) { + if (OB_FAIL(ns_array.push_back(it->second))) { + LOG_WARN("fail to add parent ns to array", K(ret), K(it->first), K(it->second)); + } + } + if (OB_SUCC(ret) && OB_FAIL(check_node_valid_with_ns(ns_array, node, operator_data, valid_type))) { + // check node and child valid + LOG_WARN("failed to check node valid", K(ret)); + } + } + return ret; +} + +// find namespace in array +ObXmlAttribute* ObXmlElement::get_ns_value_from_array(NsArray& ns_array, const ObString& prefix) +{ + ObXmlAttribute* res = NULL; + int64_t size_arr = ns_array.size(); + for (int64_t i = (size_arr - 1); i >= 0; i--) { + if (0 == prefix.compare(ns_array.at(i)->get_key())) { + res = ns_array.at(i); + break; + } + } + return res; +} + +int ObXmlElement::append_unparse_text(const ObString &str) +{ + INIT_SUCC(ret); + ObXmlElement* new_element = NULL; + ObXmlText* new_text = NULL; + bool need_com = false; + char* str_buf = NULL; + size_t str_len = str.length(); + // need_com record whether last element is unparse node + need_com = this->size() > 1 && this->at(this->size() - 1)->type() == M_ELEMENT + && dynamic_cast(this->at(this->size() - 1))->is_unparse(); + if (need_com) { + new_text = dynamic_cast(this->at(this->size() - 1)->at(0)); + if (OB_ISNULL(new_text)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("fail to get unparse text node", K(ret), K(this->size() - 1)); + } else { + str_len += new_text->get_length(); + } + } + if (OB_FAIL(ret)) { + } else if (str_len > 0 && OB_ISNULL(str_buf = static_cast(get_allocator()->alloc(str_len)))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("allocate memory failed", K(ret), K(str_len)); + } else { + ObString res_str(str_len, 0, str_buf); + if (need_com) { + if (new_text->get_text().length() != res_str.write(new_text->get_text().ptr(), new_text->get_text().length())) { + LOG_WARN("fail to get unparse text from unparse node", K(ret)); + } + } + if (OB_FAIL(ret)) { + } else if (str.length() != res_str.write(str.ptr(), str.length())) { + LOG_WARN("fail to get text from expr", K(ret), K(str)); + } else { + new_text = NULL; + if (OB_ISNULL(new_text = OB_NEWx(ObXmlText, get_allocator(), ObMulModeNodeType::M_TEXT, ctx_))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("alloc failed", K(ret)); + } else { + new_text->set_text(res_str); + if (need_com) { // need combine + if (OB_FAIL(this->at(this->size() - 1)->update((int64_t)0, new_text))) { + LOG_WARN("fail to update unparse node", K(ret)); + } + } else { + if (OB_ISNULL(new_element = OB_NEWx(ObXmlElement, get_allocator(), ObMulModeNodeType::M_ELEMENT, ctx_))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("alloc failed", K(ret)); + } else { + new_element->set_unparse(1); + set_unparse(1); + if (OB_FAIL(new_element->add_element(new_text))) { + LOG_WARN("fail to add well form text", K(ret)); + } else if (OB_FAIL(this->add_element(new_element))) { + LOG_WARN("fail to add well form element node", K(ret)); + } + } + } + } + } + } + return ret; +} + +// remove namespace in array +int ObXmlElement::remove_ns_value_from_array(NsArray& ns_array, ObXmlAttribute* node) +{ + INIT_SUCC(ret); + ObXmlAttribute* res = NULL; + int64_t pos = -1; + int64_t size_arr = ns_array.size(); + for (int64_t i = (size_arr - 1); i >= 0; i--) { + if (0 == node->get_key().compare(ns_array.at(i)->get_key()) + && 0 == node->get_value().compare(ns_array.at(i)->get_value())) { + pos = i; + break; + } + } + if (pos >= 0 && OB_FAIL(ns_array.remove(pos))) { + LOG_WARN("fail to remove namespace from array", K(ret)); + } + return ret; +} + +// TODO down to top +int ObXmlElement::check_node_valid_with_ns(NsArray& ns_array, ObXmlNode* cur_node, int8_t operator_data, int8_t valid_type) +{ + INIT_SUCC(ret); + if (OB_ISNULL(cur_node)) { + } else { + int64_t ns_count = 0; + ObXmlAttribute* t_attr = NULL; + switch(cur_node->type()) { + case ObMulModeNodeType::M_DOCUMENT : + case ObMulModeNodeType::M_CONTENT : + case ObMulModeNodeType::M_ELEMENT : { + ObXmlElement *cur_element = static_cast(cur_node); + // add ns in cur node + for (int i = 0; OB_SUCC(ret) && i < cur_element->attribute_size(); i ++) { + if (operator_data == OperaType::APPEND + && cur_element->attributes_->at(i)->type() == ObMulModeNodeType::M_NAMESPACE) { + if (OB_FAIL(ns_array.push_back(dynamic_cast(cur_element->attributes_->at(i))))) { + LOG_WARN("fail to add ns to array", K(ret)); + } else { + ns_count ++; + } + } + } + // check ns valid in cur node + if (OB_SUCC(ret) && cur_element->type() == ObMulModeNodeType::M_ELEMENT) { + t_attr = get_ns_value_from_array(ns_array, cur_element->get_prefix()); + if (OB_NOT_NULL(t_attr)) { + cur_element->set_ns(t_attr); + } else if (cur_element->get_prefix().empty()) { + t_attr = get_ns_value_from_array(ns_array, ObXmlConstants::XMLNS_STRING); + if (OB_NOT_NULL(t_attr)) { + cur_element->set_ns(t_attr); + } + } else { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("failed to check element node ns", K(ret), K(cur_element->get_prefix())); + } + } + // check attribute ns valid in cur_node + for (int i = 0; OB_SUCC(ret) && i < cur_element->attribute_size(); i ++) { + if (cur_element->attributes_->at(i)->type() == ObMulModeNodeType::M_ATTRIBUTE + && OB_FAIL(SMART_CALL(check_node_valid_with_ns(ns_array, cur_element->attributes_->at(i), operator_data, valid_type)))) { + LOG_WARN("failed to check attribute node", K(ret), K(i)); + } + } + // iterator child element + for (int i = 0; valid_type == ValidType::ALL && OB_SUCC(ret) && i < cur_element->size(); i ++) { + if (OB_FAIL(SMART_CALL(check_node_valid_with_ns(ns_array, cur_element->at(i), operator_data, valid_type)))) { + LOG_WARN("failed to check element child node", K(ret), K(i)); + } + } + // delete ns from cur element + for (int64_t i = 0; i < ns_count; i++) { + ns_array.pop_back(); + } + break; + } + case ObMulModeNodeType::M_ATTRIBUTE : { + ObXmlAttribute *cur_attr = static_cast(cur_node); + t_attr = NULL; + if (cur_attr->get_prefix().empty()) { // default namespace do nothing + } else if (OB_ISNULL(t_attr = get_ns_value_from_array(ns_array, cur_attr->get_prefix()))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("can not update this node", K(ret), K(cur_attr->get_prefix())); + } else { + cur_attr->set_ns(t_attr); + } + break; + } + case ObMulModeNodeType::M_NAMESPACE : { + ObXmlAttribute *cur_attr = static_cast(cur_node); + if (operator_data == OperaType::APPEND && OB_FAIL(ns_array.push_back(cur_attr))) { // add ns to map + LOG_WARN("fail to add ns to array", K(ret), K(cur_attr->get_key())); + } else if (operator_data == OperaType::DELETE && OB_FAIL(remove_ns_value_from_array(ns_array, cur_attr))) { // delete ns from array + LOG_WARN("fail to delete ns in array", K(ret), K(cur_attr->get_key())); + } else if (OB_FAIL(SMART_CALL(check_node_valid_with_ns(ns_array, cur_attr->get_parent(), operator_data, valid_type)))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("fail to check element from namespace", K(ret)); + } + break; + } + default: + break; + } + } + return ret; +} + +int ObXmlElement::add_update_ns_map(NsMap &ns_map, ObString key, ObXmlNode* cur_node, bool overwrite) +{ + INIT_SUCC(ret); + if (OB_NOT_NULL(ns_map.get(key)) && !overwrite) { // not overwrite + // do nothing + } else if (OB_NOT_NULL(ns_map.get(key)) && OB_FAIL(ns_map.erase_refactored(key))) { // overwrite + LOG_WARN("fail to delete ns from map", K(ret), K(key)); + } else if (OB_FAIL(ns_map.set_refactored(key, dynamic_cast(cur_node)))) { + LOG_WARN("fail to add ns from map", K(ret), K(key)); + } + return ret; +} + +int ObXmlElement::update_attribute(ObXmlNode* xnode, int pos, bool ns_check) +{ + INIT_SUCC(ret); + if (OB_ISNULL(xnode)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("xml node is null", K(ret)); + } else if (!is_init_) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("attribute node is null", K(ret)); + } else if (pos >= attributes_->count() || pos < 0) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("pos is invalid", K(ret)); + } else { + xnode->set_parent(this); + + ObXmlAttribute *cur_attr = static_cast(xnode); + if (ns_check && cur_attr->type() == ObMulModeNodeType::M_NAMESPACE + && OB_FAIL(Validate_XML_Tree_Legitimacy(xnode, OperaType::APPEND, ValidType::ALL))) { + LOG_WARN("fail to upadate namespace", K(ret)); + } else if (ns_check && cur_attr->type() == ObMulModeNodeType::M_ATTRIBUTE + && OB_FAIL(Validate_XML_Tree_Legitimacy(xnode, OperaType::APPEND))) { + LOG_WARN("fail to namespace failed", K(ret)); + } else if (OB_FAIL(attributes_->update(pos, xnode))) { + LOG_WARN("attribute update fail", K(ret)); + } else { + xnode->set_parent(this); + } + } + return ret; +} + +bool ObXmlElement::has_flags(ObMulModeNodeFlag flag) +{ + bool res = false; + if (flag & XML_DECL_FLAG) { + res = has_xml_decl_; + } else if (flag & XML_ENCODING_EMPTY_FLAG) { + res = encoding_val_empty_; + } + + return res; +} + +int64_t ObXmlDocument::get_serialize_size() +{ + int64_t res = 0; + if (serialize_size_ <= 0) { + res = ObXmlElement::get_serialize_size(); + res += sizeof(uint64_t) * 3 + version_.length() + encoding_.length(); + serialize_size_ = res; + } else { + res = serialize_size_; + } + + return res; +} + +int ObXmlElement::remove_attribute(int pos) +{ + INIT_SUCC(ret); + if (!is_init_) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("attribute node is null", K(ret)); + } else if (pos >= attributes_->count() || pos < 0) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("pos is invalid", K(ret), K(pos)); + } else { + int64_t delta_size = -1 * attributes_->at(pos)->get_serialize_size(); + if (attributes_->at(pos)->type() != ObMulModeNodeType::M_ATTRIBUTE) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("remove node is not attribute", K(ret), K(pos)); + } else if (OB_FAIL(attributes_->remove(pos))) { + LOG_WARN("attribute update fail", K(ret)); + } + } + return ret; +} + +int ObXmlElement::remove_namespace(int pos, bool ns_check) +{ + INIT_SUCC(ret); + if (!is_init_) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("attribute node is null", K(ret)); + } else if (pos >= attributes_->count() || pos < 0) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("pos is invalid", K(ret), K(pos)); + } else { + if (ns_check && OB_FAIL(Validate_XML_Tree_Legitimacy(attributes_->at(pos), OperaType::DELETE, ValidType::ALL))) { + LOG_WARN("invalid xml tree after remove ns node", K(ret), K(pos)); + } else if (OB_FAIL(attributes_->remove(pos))) { + LOG_WARN("attribute update fail", K(ret)); + } + } + return ret; +} + +int ObXmlElement::get_attribute_list(ObIArray &value) +{ + INIT_SUCC(ret); + ObArray t_value; + if (!is_init_) { + } else if (OB_FAIL((static_cast(attributes_))->get_children(t_value))) { + LOG_WARN("get attribute list failed", K(ret)); + } else { + for (int64_t i = 0; OB_SUCC(ret) && i < t_value.size(); i++) { + if (dynamic_cast(t_value.at(i))->type() == ObMulModeNodeType::M_ATTRIBUTE) { + ret = value.push_back(t_value.at(i)); + } + } + } + return ret; +} + +int ObXmlElement::get_namespace_list(ObIArray &value) +{ + INIT_SUCC(ret); + ObArray t_value; + if (!is_init_) { + } else if (OB_FAIL((static_cast(attributes_))->get_children(t_value))) { + LOG_WARN("get attribute list failed", K(ret)); + } else { + for (int64_t i = 0; OB_SUCC(ret) && i < t_value.size(); i++) { + if (dynamic_cast(t_value.at(i))->type() == ObMulModeNodeType::M_NAMESPACE) { + ret = value.push_back(t_value.at(i)); + } + } + } + return ret; +} + +int ObXmlElement::get_ns_value(const ObString& prefix, ObString& ns_value, int& ans_idx) +{ + INIT_SUCC(ret); + ObXmlNode* handle = nullptr; + if (OB_ISNULL(handle = static_cast(get_attribute_handle()))) { + } else { + iterator iter = handle->begin(); + + for (int i = 0; OB_SUCC(ret) && !iter.end(); ++iter, ++i) { + ObXmlNode* node = static_cast(*iter); + if (node->type() != M_NAMESPACE) { + } else { + ObString tmp_prefix, tmp_ns_value; + node->get_key(tmp_prefix); + node->get_value(tmp_ns_value); + + if (prefix.compare(tmp_prefix) == 0) { + ns_value = tmp_ns_value; + ans_idx = i; + break; + } + } + } + } + + return ret; +} + +void ObXmlElement::set_ns(ObXmlAttribute* xnode) +{ + name_spaces_ = xnode; +} + +ObIMulModeBase* ObXmlElement::attribute_at(int64_t pos, ObIMulModeBase* buffer) +{ + ObXmlAttribute* res = nullptr; + get_attribute(res, pos); + return res; +} + +int ObXmlElement::get_attribute(ObXmlAttribute*& res, int64_t pos) +{ + INIT_SUCC(ret); + if (pos < 0 || pos >= attribute_size()) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("fail to get attr, index out of range", K(ret), K(attribute_size()), K(pos)); + } else if (OB_ISNULL(res = dynamic_cast(attributes_->at(pos)))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("failed to get attr at pos", K(ret), K(pos)); + } + return ret; +} + +int ObXmlElement::get_attribute(ObIArray& res, ObMulModeNodeType filter_type, int32_t flags) +{ + INIT_SUCC(ret); + + if (filter_type == M_NAMESPACE) { + if (flags) { + if (OB_FAIL(get_namespace_default(res))) { + LOG_WARN("failed to get default ns list", K(ret)); + } + } else if (OB_FAIL(get_namespace_list(res))) { + LOG_WARN("failed to get ns list", K(ret)); + } + } else if (filter_type == M_ATTRIBUTE) { + if (OB_FAIL(get_attribute_list(res))) { + LOG_WARN("failed to get ns list", K(ret)); + } + } else { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("failed to get attr list", K(ret), K(filter_type)); + } + + return ret; +} + +int ObXmlElement::get_attribute(ObIMulModeBase*& res, ObMulModeNodeType filter_type, const ObString& key1, const ObString &key2) +{ + INIT_SUCC(ret); + + if (filter_type == M_NAMESPACE) { + res = get_ns_by_name(key1); + } else if (filter_type == M_ATTRIBUTE) { + res = get_attribute_by_name(key1, key2); + } else { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("failed to get attr list", K(ret), K(filter_type)); + } + + return ret; +} + +bool ObXmlElement::check_if_defined_ns() +{ + bool ret_bool = false; + ObArray t_value; + if (!is_init_) { + } else { + static_cast(attributes_)->get_children(t_value); + for (int64_t i = 0; i < t_value.size() && !ret_bool; i++) { + if (dynamic_cast(t_value.at(i))->type() == ObMulModeNodeType::M_NAMESPACE) { + ret_bool = true; + } + } + } + return ret_bool; +} + +int ObXmlElement::add_attr_by_str(const ObString& name, + const ObString& value, + ObMulModeNodeType type, + bool ns_check, + int pos) +{ + INIT_SUCC(ret); + ObXmlAttribute* new_node = NULL; + if (OB_ISNULL(new_node = OB_NEWx(ObXmlAttribute, get_allocator(), type, ctx_))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("alloc failed", K(ret)); + } else { + new_node->set_xml_key(name); + new_node->set_value(value); + if (OB_FAIL(this->add_attribute(new_node, ns_check, pos))) { + LOG_WARN("fail to add attribute in element", K(ret), K(pos)); + } + } + return ret; +} + +int ObXmlElement::get_ns_value(ObStack& stk, ObString &ns_value, ObIMulModeBase* extend) +{ + INIT_SUCC(ret); + (void)stk; + if (prefix_.compare(ObXmlConstants::XML_STRING) == 0) { + ns_value = ObXmlConstants::XML_NAMESPACE_SPECIFICATION_URI; + } else if (OB_ISNULL(name_spaces_)) { + // do nothing + } else if (OB_FAIL(name_spaces_->get_value(ns_value))) { + LOG_WARN("get namespace failed", K(ret)); + } + return ret; +} + +int ObXmlText::compare(const ObString& key, int& res) +{ + INIT_SUCC(ret); + res = get_text().compare(key); + return ret; +} + +int64_t ObXmlAttribute::get_serialize_size() +{ + int64_t res = 0; + if (serialize_size_ <= 0) { + res += sizeof(uint8_t) * 3 + get_prefix().length() + sizeof(uint16_t) + get_value().length(); + serialize_size_ = res; + } else { + res = serialize_size_; + } + + return res; +} + +int ObXmlAttribute::compare(const ObString& key, int& res) +{ + res = key.compare(name_); + return OB_SUCCESS; +} + +int ObXmlAttribute::get_key(ObString& res, int64_t index) { + UNUSED(index); + res.assign_ptr(name_.ptr(), name_.length()); + return OB_SUCCESS; +} + +int ObXmlAttribute::get_value(ObString& value, int64_t decrease_index_after) { + UNUSED(decrease_index_after); + value.assign_ptr(value_.ptr(), value_.length()); + return OB_SUCCESS; +} + +int ObXmlAttribute::get_ns_value(ObStack& stk, ObString &ns_value, ObIMulModeBase* extend) +{ + INIT_SUCC(ret); + (void)stk; + if (prefix_.compare(ObXmlConstants::XML_STRING) == 0) { + ns_value = ObXmlConstants::XML_NAMESPACE_SPECIFICATION_URI; + } else if (OB_ISNULL(ns_)) { + // do nothing + } else if (OB_FAIL(ns_->get_value(ns_value))) { + LOG_WARN("get namespace failed", K(ret)); + } + return ret; +} + +// text get_key() is for sort child +int ObXmlText::get_key(ObString& res, int64_t index) { + UNUSED(index); + res = ""; + return OB_SUCCESS; +} + +int64_t ObXmlText::get_serialize_size() +{ + int64_t res = 0; + if (serialize_size_ == 0) { + res += sizeof(uint8_t) * 2 + sizeof(uint64_t) + text_.length(); + serialize_size_ = res; + } else { + res = serialize_size_; + } + + return res; +} + +int ObXmlText::get_value(ObString& value, int64_t index) { + UNUSED(index); + value.assign_ptr(text_.ptr(), text_.length()); + return OB_SUCCESS; +}; + +} // namespace common +} // namespace oceanbase diff --git a/deps/oblib/src/lib/xml/ob_xml_tree.h b/deps/oblib/src/lib/xml/ob_xml_tree.h new file mode 100644 index 0000000000..50aaf86974 --- /dev/null +++ b/deps/oblib/src/lib/xml/ob_xml_tree.h @@ -0,0 +1,531 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + * This file contains interface support for the xml tree abstraction. + */ + +#ifndef OCEANBASE_SQL_OB_XML_TREE +#define OCEANBASE_SQL_OB_XML_TREE + +#include "lib/xml/ob_multi_mode_interface.h" +#include "ob_tree_base.h" +#include "lib/container/ob_array_iterator.h" + +namespace oceanbase { +namespace common { + +class ObXmlNode; +class ObXmlAttribute; + +enum ObXmlStandaloneType { + OB_XML_STANDALONE_NONE = 0, + OB_XML_STANDALONE_YES, + OB_XML_STANDALONE_NO, + + OB_XML_STANDALONE_OTHER, +}; + +// valid type for ns check : one floor or all tree +enum ValidType : int8_t { + FLOOR, + ALL +}; + +// flag for operator type : add node or delete node +enum OperaType : int8_t { + APPEND, + DELETE +}; + +#pragma pack(4) +class ObXmlNode : public ObIMulModeBase, public ObLibContainerNode { +public: + ObXmlNode(ObMulModeNodeType type, ObMulModeMemCtx *ctx) + : ObIMulModeBase(ObNodeMemType::TREE_TYPE, ObNodeDataType::OB_XML_TYPE, ctx->allocator_), + ObLibContainerNode(ObNodeDataType::OB_XML_TYPE, ctx), + xml_type_(type), + serialize_size_(0) + {} + + ObXmlNode(ObMulModeNodeType type) + : ObIMulModeBase(ObNodeMemType::TREE_TYPE, ObNodeDataType::OB_XML_TYPE), + ObLibContainerNode(ObNodeDataType::OB_XML_TYPE), + xml_type_(type), + serialize_size_(0) + {} + ObXmlNode(const ObXmlNode& src) + : ObXmlNode(src.type(), src.ctx_) + {} + + virtual ~ObXmlNode() {} + + class iterator : public ObLibContainerNode::iterator { +public: + iterator() + : ObLibContainerNode::iterator() {} + + iterator(const iterator& from) + : ObLibContainerNode::iterator(from) {} + + iterator(const ObLibContainerNode::iterator& from) + : ObLibContainerNode::iterator(from) {} + + + ObIMulModeBase* operator*() { + ObLibContainerNode* tmp = ObLibContainerNode::iterator::operator*(); + ObXmlNode* res = static_cast(tmp); + return res; + } + + ObIMulModeBase* operator[](int64_t pos) { + ObLibContainerNode* tmp = ObLibContainerNode::iterator::operator[](pos); + ObXmlNode* res = static_cast(tmp); + return res; + } + + bool end() { return ObLibContainerNode::iterator::end(); } + iterator next() { return iterator(ObLibContainerNode::iterator::next()); }; + iterator operator++() { return iterator(ObLibContainerNode::iterator::operator++()); } + iterator operator--() { return iterator(ObLibContainerNode::iterator::operator--()); } + iterator operator++(int) { return iterator(ObLibContainerNode::iterator::operator++(0)); } + iterator operator--(int) { return iterator(ObLibContainerNode::iterator::operator--(0)); } + void set_range(int64_t start, int64_t finish) { ObLibContainerNode::iterator::set_range(start, finish); } + + bool operator<(const iterator& iter) { + const ObLibContainerNode::iterator* p = &iter; + return ObLibContainerNode::iterator::operator<(*p); + } + + bool operator>(const iterator& iter) { + const ObLibContainerNode::iterator* p = &iter; + return ObLibContainerNode::iterator::operator>(*p); + } + + bool operator<=(const iterator& iter) { + const ObLibContainerNode::iterator* p = &iter; + return ObLibContainerNode::iterator::operator<=(*p); + } + + iterator operator-(int size) { return iterator(ObLibContainerNode::iterator::operator-(size)); } + iterator operator+(int size) { return iterator(ObLibContainerNode::iterator::operator+(size)); } + iterator operator+=(int size) { return iterator(ObLibContainerNode::iterator::operator+=(size)); } + iterator operator-=(int size) { return iterator(ObLibContainerNode::iterator::operator-=(size)); } + + int64_t operator-(const iterator& iter) { + const ObLibContainerNode::iterator* p = &iter; + return ObLibContainerNode::iterator::operator-(*p); + } + + bool operator==(const iterator& iter) { + const ObLibContainerNode::iterator* p = &iter; + return ObLibContainerNode::iterator::operator==(*p); + } + + bool operator!=(const iterator& iter) { + const ObLibContainerNode::iterator* p = &iter; + return ObLibContainerNode::iterator::operator!=(*p); + } + }; + + iterator begin() { return iterator(ObLibContainerNode::begin()); } + iterator end() { return iterator(ObLibContainerNode::end()); } + + iterator sorted_begin() { return iterator(ObLibContainerNode::sorted_begin()); } + iterator sorted_end() { return iterator(ObLibContainerNode::sorted_end()); } + + void set_flags(uint32_t flags) { ObLibContainerNode::set_flags(flags); } + + int get_range(int64_t start, int64_t end, ObIArray &res, ObMulModeFilter* filter = nullptr); + + int get_before(ObIArray &res, ObMulModeFilter* filter = nullptr); + int get_after(ObIArray &res, ObMulModeFilter* filter = nullptr); + int get_children(ObIArray &res, ObMulModeFilter* filter = nullptr) override; + int get_children(const ObString& key, ObIArray& res, ObMulModeFilter* filter = nullptr) override; + int get_node_count(ObMulModeNodeType node_type, int &count); + int get_descendant(ObIArray& res, scan_type type, ObMulModeFilter* filter = nullptr); + int64_t to_string(char *buf, const int64_t buf_len) { + int64_t pos = 0; + databuff_printf(buf, buf_len, pos, "is_tree = %d", 1); + return pos; + } + + virtual ObXmlNode* get_parent() {return static_cast(ObLibContainerNode::get_parent());} + virtual void set_parent(ObXmlNode* parent) { ObLibContainerNode::set_parent(parent); } + virtual int node_type() { return OB_XML_TYPE; } + // get xml node type + ObMulModeNodeType type() const { return xml_type_; } + void set_xml_type(ObMulModeNodeType xml_type) { xml_type_ = xml_type; } + + // get children size + int64_t size() { return ObLibContainerNode::size(); } + int64_t count() { return size(); } + virtual int set_xml_key(ObString& res) { return OB_SUCCESS; } + virtual int get_key(ObString& res, int64_t index = -1) { return 0; }; + virtual ObString get_key() { return ""; } + virtual void set_value(ObIMulModeBase*& value) {} + virtual void set_value(const ObString &value) {} + virtual int get_value(ObString& value, int64_t index = -1) { return 0; } + int get_value(ObIMulModeBase*& value, int64_t index = -1); + // get all child node (child, attribute, ns) + virtual int get_value(ObIArray &value, const ObString& key_name) { return 0; } + // compare + virtual int compare(const ObIMulModeBase &other, int &res) { return 0; } + + virtual int set_flag_by_descandant(); + + virtual ObMulModeMemCtx* get_mem_ctx() { return ObLibContainerNode::get_mem_ctx(); } + + // key compare + virtual int compare(const ObString& key, int& res) { return 0; } + ObObjType field_type() const { return ObNullType;} + // Add text order to add new nodes + virtual int append(ObIMulModeBase* node); + // Add members at fixed positions + virtual int insert(int64_t pos, ObIMulModeBase* node); + // drop node + virtual int remove(int64_t pos); + // delete the specified node + virtual int remove(ObIMulModeBase* node); + // replace node + virtual int update(int64_t pos, ObIMulModeBase* new_node); + // replace old node + virtual int update(ObIMulModeBase* old_node, ObIMulModeBase* new_node); + // find child with pos + virtual ObXmlNode* at(int64_t pos, ObIMulModeBase* buffer = nullptr) { return static_cast(ObLibContainerNode::member(pos)); } + virtual ObIMulModeBase* attribute_at(int64_t pos, ObIMulModeBase* buffer = nullptr) { return nullptr; } + virtual int64_t attribute_size() { return 0; } + virtual int64_t attribute_count() { return 0; } + // path need + // node judgeprotected + + // serialize + virtual int get_ns_value(ObStack& stk, ObString &ns_value, ObIMulModeBase* extend) { ns_value = ObString(); return OB_SUCCESS; } + virtual int get_ns_value(const ObString& prefix, ObString& ns_value, int& ans_idx) { ns_value = ObString(); return OB_SUCCESS; } + virtual int64_t get_serialize_size() { return serialize_size_; } + virtual void update_serialize_size(int64_t size); + virtual int get_attribute(ObIArray& res, ObMulModeNodeType filter_type, int32_t flags = 0) { return OB_NOT_SUPPORTED; } + + virtual int get_attribute(ObIMulModeBase*& res, ObMulModeNodeType filter_type, const ObString& key1, const ObString &key2 = ObString()) { return OB_NOT_SUPPORTED; } + int get_raw_binary(common::ObString &out, ObIAllocator *allocator); + bool is_xml_doc_over_depth(uint64_t depth); + + void set_delta_serialize_size(int64_t size) { serialize_size_ += size; } + + virtual bool has_flags(ObMulModeNodeFlag flag) { return false; } + virtual ObString get_version() { return ObString(); } + virtual ObString get_encoding() { return ObString(); } + virtual ObString get_prefix() { return ObString(); } + virtual uint16_t get_encoding_flag() { return 0; } + virtual uint16_t has_xml_decl() { return 0; } + virtual uint16_t is_unparse() { return 0; } + virtual ObIMulModeBase* get_attribute_handle() { return nullptr; } + uint16_t get_standalone() { return 0; } + bool get_unparse() { return 0; } + bool get_is_empty() { return false; } + virtual void set_standalone(uint16_t standalone) {} + virtual bool is_equal_node(const ObIMulModeBase* other); + virtual bool is_node_before(const ObIMulModeBase* other); + virtual bool check_extend() { return false; } + virtual bool check_if_defined_ns() { return false; } +protected: + + ObMulModeNodeType xml_type_; + int64_t serialize_size_; +}; +#pragma pack() + +#pragma pack(4) +class ObXmlElement : public ObXmlNode +{ +public: + ObXmlElement(ObMulModeNodeType type, ObMulModeMemCtx *ctx); + ObXmlElement(ObMulModeNodeType type, ObMulModeMemCtx *ctx, const ObString& tag); + virtual ~ObXmlElement() {} + + int init(); + int64_t to_string(char *buf, const int64_t buf_len) const + { + int64_t pos = 0; + databuff_printf(buf, buf_len, pos, "xml_type = %d", type()); + return pos; + } + virtual int64_t get_serialize_size(); + // use key as tag data. + int get_key(ObString& res, int64_t index = -1); + void set_xml_key(ObString str) { tag_info_.assign_ptr(str.ptr(), str.length()); } + ObString get_key() {return tag_info_;} + uint64_t get_attribute_node_size() { return attributes_ == nullptr ? 0 : attributes_->size(); } + ObIMulModeBase* get_attribute_handle() { return attributes_; } + int get_value(ObString& value, int64_t index = -1); + int get_value(ObIArray &value, const ObString& key_name); // child , attr , ns。 + + virtual int compare(const ObString& key, int& res); + // attribute + // path seek + bool is_element(ObString tag); + bool has_attribute() { return attribute_size() > 0; }; // size > 0 + bool has_attribute(const ObString& ns_value, const ObString& name); // name if exist + bool has_attribute_with_ns(ObXmlAttribute *ns); // find if has the namespace of attribute is the given ns + int get_attribute_pos(ObMulModeNodeType xml_type, const ObString& name, int64_t &pos); // return attribute pos + ObXmlAttribute* get_attribute_by_name(const ObString& ns_value, const ObString& name); // get attr by name + ObXmlAttribute* get_ns_by_name(const ObString& name); // get namespace by name + bool is_invalid_namespace(ObXmlAttribute* ns); // whether valid namespace + int get_namespace_default(ObIArray &value); // get all default ns + int get_namespace_list(ObIArray &value); // get all namespace + + int get_attribute(ObXmlAttribute*& res, int64_t pos); + virtual int64_t attribute_count() { return attributes_ == nullptr ? 0 : attributes_->size(); } + ObIMulModeBase* attribute_at(int64_t pos, ObIMulModeBase* buffer = nullptr); + int64_t attribute_size() {return is_init_ ? attributes_->size() : 0;} + int add_attribute(ObXmlNode* xnode, bool ns_check = false, int pos = -1); + int add_attr_by_str(const ObString& name, + const ObString& value, + ObMulModeNodeType type = ObMulModeNodeType::M_NAMESPACE, + bool ns_check = false, + int pos = -1); + int update_attribute(ObXmlNode* xnode, int pos, bool ns_check = false); + int remove_attribute(int pos); + int remove_namespace(int pos, bool ns_check = false); + int get_attribute_list(ObIArray &value); // + int add_element(ObXmlNode* xnode, bool ns_check = false, int pos = -1); + int append_unparse_text(const ObString &str); + int update_element(ObXmlNode* xnode, int pos, bool ns_check = false); + int update_element(ObXmlNode* xnode, const ObString& name, bool ns_check = false); + int remove_element(ObXmlNode* xnode); + int get_element_list(ObIArray &value); // + int get_element_by_name(const ObString& ns_value, const ObString& name, ObIArray &value); + // namespace + void set_ns(ObXmlAttribute* xnode); // set namespace + int get_ns_value(ObStack& stk, ObString &ns_value, ObIMulModeBase* extend); // get value of namespace + int get_ns_value(const ObString& prefix, ObString& ns_value, int& ans_idx); + ObXmlAttribute* get_ns() { return name_spaces_;} // get namespace + // flag & prefix & tag + void set_prefix(const ObString &prefix) { prefix_.assign_ptr(prefix.ptr(), prefix.length()); } + virtual ObString get_prefix() { return prefix_; } + void set_standalone(uint16_t standalone) { standalone_ = standalone; } + uint16_t get_standalone() { return standalone_; } + void set_has_xml_decl(uint16_t has_xml_decl) { has_xml_decl_ = has_xml_decl; } + virtual uint16_t has_xml_decl() { return has_xml_decl_; } + void set_empty(uint16_t empty) {is_empty_ = empty;} + uint16_t is_empty() {return is_empty_;} + void set_unparse(uint16_t unparse) {is_unparse_ = unparse;} + virtual uint16_t is_unparse() {return is_unparse_;} + void set_encoding_flag(uint16_t encoding_val_empty) {encoding_val_empty_ = encoding_val_empty;} + virtual uint16_t get_encoding_flag() {return encoding_val_empty_;} + typedef common::hash::ObHashMap NsMap; + typedef ObArray NsArray; + + int Validate_XML_Tree_Legitimacy(ObXmlNode* node, int8_t operator_data, int8_t valid_type = 0); + int check_node_valid_with_ns(NsArray& ns_array, ObXmlNode* cur_node, int8_t operator_data, int8_t valid_type); + int get_valid_ns_from_parent(NsMap &ns_map, ObXmlNode* cur_node); + int add_update_ns_map(NsMap &ns_map, ObString key, ObXmlNode* cur_node, bool overwrite = false); + ObXmlAttribute* get_ns_value_from_array(NsArray& ns_array, const ObString& prefix); + int remove_ns_value_from_array(NsArray& ns_array, ObXmlAttribute* node); + + virtual bool has_flags(ObMulModeNodeFlag flag); + virtual bool get_is_empty() { return is_empty_; } + virtual bool get_unparse() { return is_unparse_; } + + virtual int get_attribute(ObIArray& res, ObMulModeNodeType filter_type, int32_t flags = 0); + virtual int get_attribute(ObIMulModeBase*& res, ObMulModeNodeType filter_type, const ObString& key1, const ObString &key2 = ObString()); + virtual bool check_if_defined_ns(); +private: + // namespace prefix + ObString prefix_; + // tag info + ObString tag_info_; + // attr + ObXmlNode* attributes_; // include namespace + // namespace + ObXmlAttribute* name_spaces_; // point to namespace in attr + // parse flag + union { + uint16 flags_; + struct { + uint16_t standalone_ : 4; // : default 0, yes 1, no 2, other 3; + uint16_t has_xml_decl_: 1; // no 0, yes 1 + uint16_t is_empty_: 1; // empty + uint16_t is_unparse_: 1; // well format element + uint16_t encoding_val_empty_: 1; // has encoding clause but encoding value is null + uint16_t reserved_: 8; + }; + }; + bool is_init_; + + DISALLOW_COPY_AND_ASSIGN(ObXmlElement); +}; +#pragma pack() + +#pragma pack(4) +// document or content +class ObXmlDocument : public ObXmlElement +{ +public: + ObXmlDocument(ObMulModeNodeType type, ObMulModeMemCtx *ctx) + : ObXmlElement(type, ctx) + {} + virtual ~ObXmlDocument() {} + int64_t to_string(char *buf, const int64_t buf_len) const + { + int64_t pos = 0; + databuff_printf(buf, buf_len, pos, "xml_type = %d", type()); + return pos; + } + + void set_version(ObString version) {version_.assign_ptr(version.ptr(), version.length());} + void set_encoding(ObString encoding) {encoding_.assign_ptr(encoding.ptr(), encoding.length());} + virtual ObString get_version() { return version_ ; } + virtual ObString get_encoding() { return encoding_; } + + void set_inSubset(ObXmlNode* intSubset) { intSubset_ = intSubset; } + void set_extSubset(ObXmlNode* extSubset) { extSubset_ = extSubset; } + ObXmlNode* get_inSubset() { return intSubset_; } + ObXmlNode* get_extSubset() { return extSubset_; } + + int64_t get_serialize_size(); + +protected: + // xml prolog + // + ObString version_; + ObString encoding_; + ObXmlNode* intSubset_; // int DTD + ObXmlNode* extSubset_; // ext DTD + DISALLOW_COPY_AND_ASSIGN(ObXmlDocument); +}; + +// attribute & namespace & PI +class ObXmlAttribute : public ObXmlNode +{ + public: + ObXmlAttribute() + : ObXmlNode(M_MAX_TYPE, nullptr), + prefix_(), + ns_(nullptr), + attr_decl_(NULL), + only_key_(false) + {} + ObXmlAttribute(ObMulModeNodeType type, ObMulModeMemCtx *ctx) + : ObXmlNode(type, ctx), + prefix_(), + ns_(nullptr), + attr_decl_(NULL), + only_key_(false) + {} + ObXmlAttribute(ObMulModeNodeType type, ObMulModeMemCtx *ctx, const ObString& key, const ObString& value) + : ObXmlNode(type, ctx), + prefix_(), + ns_(nullptr), + name_(key), + value_(value), + attr_decl_(NULL), + only_key_(false) + {} + ObXmlAttribute(const ObXmlAttribute& src) + : ObXmlAttribute(src.type(), src.ctx_, src.name_, src.value_) + {} + virtual ~ObXmlAttribute() {} + int64_t to_string(char *buf, const int64_t buf_len) const + { + int64_t pos = 0; + databuff_printf(buf, buf_len, pos, "xml_type = %d", type()); + return pos; + } + + int eval_crc_value(ObXmlNode& xnode); // cal crc_value + int get_ns_value(ObStack& stk, ObString &ns_value, ObIMulModeBase* extend); + bool is_pi(ObString target); + + ObXmlNode* get_parent() { return xml_type_ == M_INSTRUCT ? ObXmlNode::get_parent() : ObXmlNode::get_parent()->get_parent(); } + + void set_xml_key(const ObString &new_key) {name_.assign_ptr(new_key.ptr(), new_key.length());} + int get_key(ObString& res, int64_t index = -1); + ObString get_key() { return name_; } + void set_value(const ObString &value) {value_.assign_ptr(value.ptr(), value.length());} + int get_value(ObString& value, int64_t index = -1); + ObString& get_value() { return value_; } + void set_only_key() { only_key_ = true; } + bool get_only_key() { return only_key_; } + + void set_attr_decl(ObXmlNode *attr_decl) {attr_decl_ = attr_decl;} + void get_attr_decl(const ObXmlNode *&attr_decl) {attr_decl = attr_decl_;} + + void set_prefix(const ObString &prefix) {prefix_.assign_ptr(prefix.ptr(), prefix.length());} + void get_prefix(ObString &prefix) {prefix.assign_ptr(prefix_.ptr(), prefix_.length());} + ObString get_prefix() { return prefix_;} + void set_ns(ObXmlAttribute* ns) {ns_ = ns;} + ObXmlAttribute* get_ns() { return ns_;} + // ObXmlNode *clone(ObIAllocator* allocator) const; + virtual int compare(const ObString& key, int& res); + int64_t get_serialize_size(); +protected: + // namespace prefix + ObString prefix_; + // namespace point (attribute type) + ObXmlAttribute* ns_; + ObString name_; // key + ObString value_; // value + ObXmlNode *attr_decl_; // point to ns + bool only_key_; // only for mysql +}; +#pragma pack() + +#pragma pack(4) +// text / cdata / comment +class ObXmlText : public ObXmlNode { +public: + explicit ObXmlText(ObMulModeNodeType type, ObMulModeMemCtx *ctx) + : ObXmlNode(type, ctx), + text_(), + length_(0), + is_space_(false) + {} + + explicit ObXmlText(ObMulModeNodeType type) + : ObXmlNode(type), + text_(), + length_(0), + is_space_(false) + {} + + virtual ~ObXmlText() {} + int64_t to_string(char *buf, const int64_t buf_len) const + { + int64_t pos = 0; + databuff_printf(buf, buf_len, pos, "xml_type = %d", type()); + return pos; + } + void set_value(const ObString &value) {text_.assign_ptr(value.ptr(), value.length()); length_ = value.length();} + virtual int get_key(ObString& res, int64_t index = -1); + // text without key + virtual ObString get_key() { return ""; } + virtual int get_value(ObString& value, int64_t index = -1); + virtual int compare(const ObString& key, int& res) override; + + // virtual int get_value(ObIArray &value, const ObString& key_name); + const ObString get_text() { return text_; } // get TEXT + size_t get_length() { return length_; } // get length + void set_text(const ObString text) {text_.assign_ptr(text.ptr(), text.length()); length_ = text.length();} // set text and length + void set_length(int length) { length_ = length; } // update length。 + bool is_space() { return is_space_; } + void set_is_space(bool is_space) { is_space_ = is_space; } + int64_t get_serialize_size(); +protected: + ObString text_; + int64_t length_; + bool is_space_; // TODO xml tree to string content \n affect of space : last node is text then not add new line, ignore space; + DISALLOW_COPY_AND_ASSIGN(ObXmlText); +}; +#pragma pack() + +} // namespace common +} // namespace oceanbase + +#endif // OCEANBASE_SQL_OB_JSON_TREE diff --git a/deps/oblib/src/lib/xml/ob_xml_util.cpp b/deps/oblib/src/lib/xml/ob_xml_util.cpp new file mode 100644 index 0000000000..d5599e3918 --- /dev/null +++ b/deps/oblib/src/lib/xml/ob_xml_util.cpp @@ -0,0 +1,1548 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + * This file contains interface implement for the xml util abstraction. + */ +#define USING_LOG_PREFIX SQL_ENG + +#include "lib/xml/ob_xml_util.h" +#include "lib/xml/ob_multi_mode_interface.h" +#include "lib/xml/ob_xml_parser.h" +#include "lib/xml/ob_xml_tree.h" +#include "lib/xml/ob_xml_bin.h" +#include "lib/alloc/malloc_hook.h" + +namespace oceanbase { +namespace common { + +const char *ObXmlUtil::get_charset_name(ObCollationType collation_type) +{ + return get_charset_name(ObCharset::charset_type_by_coll(collation_type)); +} + +const char *ObXmlUtil::get_charset_name(ObCharsetType charset_type) +{ + const char *ret_name = "invalid_type"; + switch(charset_type) { + case CHARSET_BINARY: { + ret_name = "BINARY"; + break; + } + case CHARSET_UTF8MB4: { + ret_name = "UTF-8"; + break; + } + case CHARSET_GBK: { + ret_name = "GBK"; + break; + } + case CHARSET_UTF16: { + ret_name = "UTF-16"; + break; + } + case CHARSET_GB18030: { + ret_name = "GB18030"; + break; + } + case CHARSET_LATIN1: { + ret_name = "LATIN1"; + break; + } + case CHARSET_GB18030_2022: { + ret_name = "GB18030_2022"; + break; + } + default: { + break; + } + } + return ret_name; +} + +ObCharsetType ObXmlUtil::check_support_charset(const ObString& cs_name) +{ + ObCharsetType charset_type = CHARSET_INVALID; + if (cs_name.case_compare("utf-8") == 0) { + charset_type = CHARSET_UTF8MB4; + } else if (cs_name.case_compare("utf-16") == 0) { + charset_type = CHARSET_UTF16; + } else if (cs_name.case_compare("gbk") == 0) { + charset_type = CHARSET_GBK; + } else if (cs_name.case_compare("gb18030") == 0) { + charset_type = CHARSET_GB18030; + } else if (cs_name.case_compare("latin1") == 0) { + charset_type = CHARSET_LATIN1; + } else if (cs_name.case_compare("gb18030_2022") == 0) { + charset_type = CHARSET_GB18030_2022; + } + return charset_type; +} + +bool ObXmlUtil::is_container_tc(ObMulModeNodeType type) +{ + return (type == ObMulModeNodeType::M_DOCUMENT || + type == ObMulModeNodeType::M_CONTENT || + type == ObMulModeNodeType::M_UNPARSED || + type == ObMulModeNodeType::M_UNPARESED_DOC || + type == ObMulModeNodeType::M_ELEMENT); +} + +bool ObXmlUtil::is_node(ObMulModeNodeType type) +{ + return type == ObMulModeNodeType::M_INSTRUCT + || type == ObMulModeNodeType::M_ELEMENT + || type == ObMulModeNodeType::M_TEXT + || type == ObMulModeNodeType::M_CDATA + || type == ObMulModeNodeType::M_COMMENT; +} + +bool ObXmlUtil::is_text(ObMulModeNodeType type) +{ + return type == ObMulModeNodeType::M_TEXT + || type == ObMulModeNodeType::M_CDATA; +} + +bool ObXmlUtil::is_element(ObMulModeNodeType type) +{ + return type == ObMulModeNodeType::M_ELEMENT; +} + +bool ObXmlUtil::is_pi(ObMulModeNodeType type) +{ + return type == ObMulModeNodeType::M_INSTRUCT; +} + +bool ObXmlUtil::use_element_serializer(ObMulModeNodeType type) +{ + return is_container_tc(type); +} + +bool ObXmlUtil::use_attribute_serializer(ObMulModeNodeType type) +{ + return (type == ObMulModeNodeType::M_ATTRIBUTE + || type == ObMulModeNodeType::M_NAMESPACE + || type == ObMulModeNodeType::M_INSTRUCT); +} + +bool ObXmlUtil::use_text_serializer(ObMulModeNodeType type) +{ + return (type == ObMulModeNodeType::M_TEXT + || type == ObMulModeNodeType::M_COMMENT + || type == ObMulModeNodeType::M_CDATA); +} + +bool ObXmlUtil::is_comment(ObMulModeNodeType type) +{ + return type == ObMulModeNodeType::M_COMMENT; +} + +int ObXmlUtil::append_newline_and_indent(ObStringBuffer &j_buf, uint64_t level, uint64_t size) +{ + // Append newline and two spaces per indentation level. + INIT_SUCC(ret); + + if (level > OB_XML_PARSER_MAX_DEPTH_) { + ret = OB_ERR_JSON_OUT_OF_DEPTH; // error code need change + LOG_WARN("is_pretty level is too deep", K(ret), K(level)); + } else if (OB_FAIL(j_buf.append("\n"))) { + LOG_WARN("fail to append newline to buffer", K(ret), K(level), K(size)); + } else if (OB_FAIL(j_buf.reserve(level * size))) { + LOG_WARN("fail to reserve memory for buffer", K(ret), K(level), K(size)); + } else { + char str[level * size]; + MEMSET(str, ' ', level * size); + if (OB_FAIL(j_buf.append(str, level * size))) { + LOG_WARN("fail to append space to buffer", K(ret), K(level), K(size)); + } + } + + return ret; +} + +int ObXmlUtil::append_qname(ObStringBuffer &j_buf, const ObString& prefix, const ObString& localname) { + INIT_SUCC(ret); + if (!prefix.empty()) { + if (OB_FAIL(j_buf.append(prefix))) { + LOG_WARN("fail to print prefix in attr", K(ret), K(prefix)); + } else if (OB_FAIL(j_buf.append(":"))) { + LOG_WARN("fail to print : in attr", K(ret)); + } + } + if (OB_SUCC(ret) && !localname.empty() && OB_FAIL(j_buf.append(localname))) { + LOG_WARN("fail to print value in attr", K(ret), K(localname)); + } + return ret; +} + +int ObXmlUtil::create_mulmode_tree_context(ObIAllocator *allocator, ObMulModeMemCtx*& ctx) +{ + INIT_SUCC(ret); + + if (OB_ISNULL(allocator)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("failed to allocate mem ctx, allocator is null", K(ret)); + } else { + ObMulModeMemCtx* mem_ctx = static_cast(allocator->alloc(sizeof(ObMulModeMemCtx))); + if (OB_ISNULL(mem_ctx)) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("failed to allocate mem ctx, allocator is null", K(ret)); + } else { + mem_ctx->allocator_ = allocator; + new (&mem_ctx->page_allocator_) ModulePageAllocator(*allocator, common::ObModIds::OB_MODULE_PAGE_ALLOCATOR); + new (&mem_ctx->mode_arena_) LibTreeModuleArena(DEFAULT_PAGE_SIZE, mem_ctx->page_allocator_); + ctx = mem_ctx; + } + } + return ret; +} + +int ObXmlUtil::xml_bin_type(const ObString& data, ObMulModeNodeType& type) +{ + INIT_SUCC(ret); + ObMulBinHeaderSerializer desserializer(data.ptr(), data.length()); + if (OB_FAIL(desserializer.deserialize())) { + LOG_WARN("deserialize failed", K(ret), K(data)); + } else { + type = desserializer.type(); + } + return ret; +} + +int ObXmlUtil::xml_bin_header_info(const ObString& data, ObMulModeNodeType& type, int64_t& size) +{ + INIT_SUCC(ret); + ObMulBinHeaderSerializer desserializer(data.ptr(), data.length()); + if (OB_FAIL(desserializer.deserialize())) { + LOG_WARN("deserialize failed", K(ret), K(data)); + } else { + type = desserializer.type(); + size = desserializer.total_; + } + return ret; +} + +int ObMulModeFactory::get_xml_base(ObMulModeMemCtx* ctx, const ObString &buf, + ObNodeMemType in_type, ObNodeMemType expect_type, + ObIMulModeBase *&out, ObMulModeNodeType node_type, + bool is_for_text, bool should_check) +{ + return get_xml_base(ctx, buf.ptr(), buf.length(), in_type, expect_type, out, node_type, is_for_text, should_check); +} + +int ObMulModeFactory::get_xml_tree(ObMulModeMemCtx* ctx, const ObString &str, + ObNodeMemType in_type, ObXmlNode *&out, ObMulModeNodeType parse_type) +{ + INIT_SUCC(ret); + const char *ptr = str.ptr(); + uint64_t length = str.length(); + void *buf = NULL; + + if (OB_ISNULL(ctx) || OB_ISNULL(ctx->allocator_)) { // check allocator + ret = OB_ERR_NULL_VALUE; + LOG_WARN("param allocator is NULL", K(ret), KP(ctx), KP(ctx->allocator_)); + } else if (OB_ISNULL(ptr) || length == 0) { + ret = OB_ERR_INVALID_JSON_TEXT_IN_PARAM; + LOG_WARN("param is NULL", K(ret), KP(ptr), K(length)); + } else if (in_type != ObNodeMemType::TREE_TYPE && in_type != ObNodeMemType::BINARY_TYPE) { // check in_type + ret = OB_ERR_UNEXPECTED; + LOG_WARN("param in_type is invalid", K(ret), K(in_type)); + } else if (in_type == ObNodeMemType::TREE_TYPE) { + ObXmlDocument *xnode = NULL; + if (OB_FAIL(ObXmlParserUtils::parse_document_text(ctx, str, xnode))) { + LOG_WARN("fail to get xml tree", K(ret), K(length), K(in_type)); + } else { + out = xnode; + } + } else if (in_type == ObNodeMemType::BINARY_TYPE) { + ObXmlBin bin(str, ctx); + ObIMulModeBase *xnode = NULL; + if (OB_FAIL(bin.to_tree(xnode))) { + LOG_WARN("fail to change bin to tree", K(ret), K(in_type), K(bin)); + } else { + out = static_cast(xnode); + } + } else { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpect type",K(in_type)); + } + + return ret; +} + +int ObMulModeFactory::add_unparsed_text_into_doc(ObMulModeMemCtx* ctx, ObString text, ObXmlDocument *&doc) // TODO ObXmlDocument -> ObXmlNode +{ + INIT_SUCC(ret); + if (OB_ISNULL(doc = OB_NEWx(ObXmlDocument, ctx->allocator_, ObMulModeNodeType::M_UNPARSED, ctx))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("fail to create document", K(ret)); + } else if(OB_FAIL(doc->append_unparse_text(text))) { + LOG_WARN("fail to add unparse text to doc", K(ret)); + } + return ret; +} + +/* + * get_xml_base special for text + * in_type=binary_type, expect_type=binary_type +*/ +int ObMulModeFactory::get_xml_base(ObMulModeMemCtx* ctx, const char *ptr, uint64_t length, + ObNodeMemType in_type, ObNodeMemType expect_type, + ObIMulModeBase *&out, ObMulModeNodeType parse_type, + bool is_for_text, bool should_check) +{ + INIT_SUCC(ret); + void *buf = NULL; + const ObString str(length, ptr); + + if (OB_ISNULL(ctx) || OB_ISNULL(ctx->allocator_)) { // check allocator + ret = OB_ERR_NULL_VALUE; + LOG_WARN("param allocator is NULL", K(ret), KP(ctx), KP(ptr)); + } else if (OB_ISNULL(ptr) || length == 0) { + ret = OB_ERR_INVALID_JSON_TEXT_IN_PARAM; + LOG_WARN("param is NULL", K(ret), KP(ptr), K(length)); + } else if (in_type != ObNodeMemType::TREE_TYPE && in_type != ObNodeMemType::BINARY_TYPE) { // check in_type + ret = OB_ERR_UNEXPECTED; + LOG_WARN("param in_type is invalid", K(ret), K(in_type)); + } else if (expect_type != ObNodeMemType::TREE_TYPE && expect_type != ObNodeMemType::BINARY_TYPE) { // check expect_type + ret = OB_ERR_UNEXPECTED; + LOG_WARN("param expect_type is invalid", K(ret), K(expect_type)); + } else if (in_type == ObNodeMemType::TREE_TYPE) { + ObXmlDocument *xnode = NULL; + if (parse_type == ObMulModeNodeType::M_UNPARSED && OB_FAIL(add_unparsed_text_into_doc(ctx, str, xnode))) { + LOG_WARN("failed to get unparse tree", K(ret), K(length), K(in_type), K(expect_type)); + } else if (parse_type == ObMulModeNodeType::M_CONTENT && OB_FAIL(ObXmlParserUtils::parse_content_text(ctx, str, xnode))) { + LOG_WARN("fail to get xml content tree", K(ret), K(length), K(in_type), K(expect_type)); + } else if (parse_type == ObMulModeNodeType::M_DOCUMENT && OB_FAIL(ObXmlParserUtils::parse_document_text(ctx, str, xnode))) { + LOG_WARN("fail to get xml tree", K(ret), K(length), K(in_type), K(expect_type)); + } else if (expect_type == ObNodeMemType::TREE_TYPE) { + out = xnode; + } else { // expect bin + ObXmlBin *bin = nullptr; + if (OB_ISNULL(bin = OB_NEWx(ObXmlBin, ctx->allocator_, (ctx)))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("fail to alloc memory", K(ret), K(in_type), K(expect_type), K(sizeof(ObXmlBin))); + } else if (OB_FAIL(bin->parse_tree(xnode))) { + LOG_WARN("fail to parse tree", K(ret), K(in_type), K(expect_type)); + } else { + out = bin; + } + } + } else if (in_type == ObNodeMemType::BINARY_TYPE) { + ObXmlBin bin(ctx); + ObXmlBin *bin_new = nullptr; + if (OB_FAIL(bin.parse(ptr, length))) { + LOG_WARN("fail to reset iter", K(ret), K(in_type), K(expect_type)); + } else if (bin.type() == M_UNPARESED_DOC) { + ObStringBuffer* buffer = nullptr; + ObXmlDocument *x_doc = nullptr; + ObString unparsed_text; + if (OB_ISNULL(buffer = OB_NEWx(ObStringBuffer, ctx->allocator_, (ctx->allocator_)))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("fail to allocate buffer", K(ret), K(in_type), K(expect_type)); + } else if (OB_FAIL(bin.print(*buffer, ObXmlFormatType::NO_FORMAT, 0, 0))) { + LOG_WARN("fail to print xml", K(ret), K(in_type), K(expect_type)); + } else if (OB_FALSE_IT(unparsed_text.assign_ptr(buffer->ptr(), buffer->length()))) { + } else if (is_for_text && bin.type() == M_UNPARESED_DOC) { + // special for text + if (OB_FAIL(bin.construct(bin_new, ctx->allocator_))) { + LOG_WARN("fail to dup res", K(ret), K(in_type), K(expect_type)); + } else { + out = bin_new; + } + } else if (OB_FAIL(ObXmlParserUtils::parse_document_text(ctx, unparsed_text, x_doc))) { + if (ret == OB_ERR_PARSER_SYNTAX) { + ret = OB_ERR_XML_PARSE; + LOG_WARN("parse xml plain text as document failed.", K(ret), K(unparsed_text)); + } else { + LOG_WARN("document unparsed unexpected err", K(ret), K(unparsed_text)); + } + } else if (expect_type == ObNodeMemType::BINARY_TYPE) { + ObIMulModeBase* tree = x_doc; + if (OB_ISNULL(bin_new = OB_NEWx(ObXmlBin, ctx->allocator_, (ctx)))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("fail to alloc memory", K(ret), K(in_type), K(expect_type), K(sizeof(ObXmlBin))); + } else if (OB_FAIL(bin_new->parse_tree(tree))) { + LOG_WARN("fail to reset iter", K(ret), K(in_type), K(expect_type)); + } else { + out = bin_new; + } + } else { + out = x_doc; + } + } else if (bin.type() == M_UNPARSED) { + ObStringBuffer* buffer = nullptr; + ObXmlDocument *x_doc = nullptr; + ObString unparsed_text; + if (OB_ISNULL(buffer = OB_NEWx(ObStringBuffer, ctx->allocator_, (ctx->allocator_)))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("fail to allocate buffer", K(ret), K(in_type), K(expect_type)); + } else if (OB_FAIL(bin.print(*buffer, ObXmlFormatType::NO_FORMAT, 0, 0))) { + LOG_WARN("fail to print xml", K(ret), K(in_type), K(expect_type)); + } else if (OB_FALSE_IT(unparsed_text.assign_ptr(buffer->ptr(), buffer->length()))) { + } else if (OB_FAIL(ObXmlParserUtils::parse_content_text(ctx, unparsed_text, x_doc))) { + LOG_DEBUG("fail to parse unparse", K(ret), K(in_type), K(expect_type)); + if (should_check && ret == OB_ERR_PARSER_SYNTAX) { + ret = OB_ERR_XML_PARSE; + LOG_WARN("unparsed xml parse content failed.", K(ret), K(unparsed_text)); + } else { + ret = OB_SUCCESS; + if (expect_type == BINARY_TYPE) { + if (OB_FAIL(bin.construct(bin_new, ctx->allocator_))) { + LOG_WARN("fail to dup res", K(ret), K(in_type), K(expect_type)); + } else { + out = bin_new; + } + } else if (OB_FAIL(bin.to_tree(out))) { + LOG_WARN("fail to tree", K(ret), K(in_type), K(expect_type)); + } + } + } else if (expect_type == BINARY_TYPE) { + if (OB_FAIL(bin.construct(bin_new, ctx->allocator_))) { + LOG_WARN("fail to dup res", K(ret), K(in_type), K(expect_type)); + } else { + out = bin_new; + } + } else { + out = x_doc; + } + } else { + if (expect_type == TREE_TYPE) { + if (OB_FAIL(bin.to_tree(out))) { + LOG_WARN("fail to tree", K(ret), K(in_type), K(expect_type)); + } + } else { + if (OB_FAIL(bin.construct(bin_new, ctx->allocator_))) { + LOG_WARN("fail to dup res", K(ret), K(in_type), K(expect_type)); + } else { + out = bin_new; + } + } + } + } else { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpect xml type",K(in_type), K(expect_type)); + } + + if (OB_SUCC(ret) && OB_ISNULL(out->get_allocator())) { + out->set_allocator(ctx->allocator_); + } + return ret; +} + + +int ObMulModeFactory::transform(ObMulModeMemCtx* ctx, ObIMulModeBase *src, + ObNodeMemType expect_type, ObIMulModeBase *&out) +{ + INIT_SUCC(ret); + void *buf = NULL; + ObNodeMemType src_type = src->get_internal_type(); + + + if (OB_ISNULL(ctx) || OB_ISNULL(ctx->allocator_) ||OB_ISNULL(src)) { // check allocator + ret = OB_ERR_NULL_VALUE; + LOG_WARN("param allocator is NULL", K(ret), KP(ctx), KP(src)); + } else if (src_type != ObNodeMemType::TREE_TYPE && src_type != ObNodeMemType::BINARY_TYPE) { // check in_type + ret = OB_NOT_SUPPORTED; + LOG_WARN("param src_type is invalid", K(ret), K(src_type)); + } else if (expect_type != ObNodeMemType::TREE_TYPE && expect_type != ObNodeMemType::BINARY_TYPE) { // check expect_type + ret = OB_NOT_SUPPORTED; + LOG_WARN("param expect_type is invali", K(ret), K(expect_type)); + } else if (src_type == ObNodeMemType::TREE_TYPE) { // input:tree + if (expect_type == ObNodeMemType::BINARY_TYPE) { // to bin + if (OB_ISNULL(buf = ctx->allocator_->alloc(sizeof(ObXmlBin)))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("fail to alloc memory", K(ret), K(src_type), K(expect_type), K(sizeof(ObXmlBin))); + } else { + ObXmlBin *bin = new (buf) ObXmlBin(ctx); + if (OB_FAIL(bin->parse_tree(src, false))) { + LOG_WARN("fail to parse tree", K(ret), K(src_type), K(expect_type)); + } else { + out = bin; + } + } + } else { // to tree, itself + out = src; + } + } else if (src_type == ObNodeMemType::BINARY_TYPE) { // input:bin + if (expect_type == ObNodeMemType::TREE_TYPE) { // to tree + ObXmlBin *bin = static_cast(src); + if (OB_FAIL(bin->to_tree(out))) { + LOG_WARN("fail to change bin to tree", K(ret), K(src_type), K(expect_type)); + } + } else { // to bin, itself + out = src; + } + } else { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpect xml type",K(src_type), K(expect_type)); + } + + return ret; +} + +// don use just for cdc +int ObXmlUtil::xml_bin_to_text( + ObIAllocator &allocator, + const ObString &bin, + ObString &text) { + INIT_SUCC(ret); + // oblib can not dep src/share/rc/ob_tenant_base.h, + // so can not use MTL_ID(), so there just use defualt tenant. + // and this function is used for obcdc, not observer, is fine. + ObArenaAllocator tmp_alloc(ObModIds::OB_LOB_ACCESS_BUFFER, OB_MALLOC_NORMAL_BLOCK_SIZE); + ObStringBuffer *buffer = nullptr; + ObMulModeMemCtx *xml_mem_ctx = nullptr; + ObIMulModeBase *base = nullptr; + ObXmlDocument *doc = nullptr; + + if (bin.empty()) { + } else if (OB_ISNULL(buffer = OB_NEWx(ObStringBuffer, &tmp_alloc, (&tmp_alloc)))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("fail to string buffer", K(ret)); + } else if (OB_FAIL(ObXmlUtil::create_mulmode_tree_context(&tmp_alloc, xml_mem_ctx))) { + LOG_WARN("fail to create tree memory context", K(ret)); + } else if (OB_FAIL(ObMulModeFactory::get_xml_base(xml_mem_ctx, + bin, + ObNodeMemType::BINARY_TYPE, + ObNodeMemType::BINARY_TYPE, + base))) { + LOG_WARN("fail to get xml base", K(ret), K(bin)); + } else if (OB_FAIL(base->print(*buffer, 0, 0, 0, CS_TYPE_UTF8MB4_GENERAL_CI))) { + LOG_WARN("print_document failed", K(ret)); + } else if (ob_write_string(allocator, buffer->string(), text)) { + LOG_WARN("ob_write_string failed", K(ret), K(*buffer)); + } + return ret; +} + +int ObXmlUtil::to_string(ObIAllocator &allocator, double &in, char *&out) +{ + INIT_SUCC(ret); + ObStringBuffer res_buf(&allocator); + if (std::isnan(in)) { + res_buf.append("NaN"); + } else if (std::isinf(in) && in > 0) { + res_buf.append("Infinity"); + } else if (std::isinf(in) && in < 0) { + res_buf.append("-Infinity"); + } else { + // TODO 科学计数法是否需要考虑 待定 + uint64_t out_len; + const int64_t number_str_size = 256; + double abs_value = fabs(in); + char number_str[number_str_size] = {0}; + // bool force_sci = (abs_value < NOSCI_MIN_DOUBLE) || (abs_value > NOSCI_MAX_DOUBLE); + out_len = ob_gcvt_strict(in, ob_gcvt_arg_type::OB_GCVT_ARG_DOUBLE, number_str_size, + number_str, NULL, FALSE, TRUE, FALSE); + res_buf.append(number_str, out_len); + } + out = res_buf.ptr(); + return ret; +} + +int ObXmlUtil::to_string(ObIAllocator &allocator, bool &in, char *&out) +{ + INIT_SUCC(ret); + ObStringBuffer res_buf(&allocator); + if (in && OB_FAIL(res_buf.append("true"))) { + LOG_WARN("append true failed", K(ret)); + } else if (!in && OB_FAIL(res_buf.append("false"))) { + LOG_WARN("append false failed", K(ret)); + } else { + out = res_buf.ptr(); + } + return ret; +} + +int ObXmlUtil::to_string(ObIAllocator &allocator, ObNodeTypeAndContent *in, char *&out) +{ + INIT_SUCC(ret); + if (OB_ISNULL(in) || OB_ISNULL(in->content_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected null param", K(ret), K(in)); + } else { + switch(in->type_) { + case ObArgType::PN_BOOLEAN: { + return to_string(allocator, in->content_->boolean_, out); + } + + case ObArgType::PN_DOUBLE: { + return to_string(allocator, in->content_->double_, out); + } + + case ObArgType::PN_STRING: { + if (in->content_->str_.len_ == 0) { + out = nullptr; + } else if (OB_ISNULL(out = static_cast (allocator.alloc(sizeof(char) * in->content_->str_.len_ + 1)))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("ArgNodeContent cast to string invalid value", K(ret), K(in->content_->str_.len_)); + } else { + MEMCPY(out, in->content_->str_.name_, in->content_->str_.len_); + out[in->content_->str_.len_] = 0; + } + + break; + } + + default: + ret = OB_OP_NOT_ALLOW; + LOG_WARN("ArgNodeContent cast to boolean invalid value", K(ret), K(in)); + } + } + return ret; +} + +int ObXmlUtil::to_boolean(double &in, bool &out) +{ + INIT_SUCC(ret); + if (in == 0) out = false; + else if (in == 1) out = true; + else { + ret = OB_OP_NOT_ALLOW; + LOG_WARN("int cast to boolean invalid value", K(ret), K(in)); + } + return ret; +} + +int ObXmlUtil::to_boolean(char *in, bool &out) +{ + INIT_SUCC(ret); + if (strcmp(in, "true") == 0) { + out = true; + } else if (strcmp(in, "false") == 0) { + out = false; + } else { + ret = OB_OP_NOT_ALLOW; + LOG_WARN("char* cast to boolean invalid value", K(ret), K(in)); + } + return ret; +} + +int ObXmlUtil::to_boolean(ObNodeTypeAndContent *in, bool &out) +{ + INIT_SUCC(ret); + if (OB_ISNULL(in)) { + ret = OB_BAD_NULL_ERROR; + LOG_WARN("unexpected null param", K(ret)); + } else if (OB_ISNULL(in->content_)) { + ret = OB_BAD_NULL_ERROR; + LOG_WARN("in content_ null", K(ret)); + } else { + switch(in->type_) { + case ObArgType::PN_BOOLEAN: + return to_boolean(in->content_->boolean_, out); + + case ObArgType::PN_DOUBLE: + return to_boolean(in->content_->double_, out); + + case ObArgType::PN_STRING: + return to_boolean(&(in->content_->str_), out); + + default: + ret = OB_OP_NOT_ALLOW; + LOG_WARN("ArgNodeContent cast to boolean invalid value", K(ret), K(in)); + } + } + return ret; +} + +int ObXmlUtil::to_boolean(ObPathStr *in, bool &out) +{ + INIT_SUCC(ret); + if (OB_ISNULL(in)) { + out = false; + } else if (in->len_ == 0) { + out = false; + } else { + out = true; + } + return ret; +} + +int ObXmlUtil::check_bool_rule(double &in, bool &out) +{ + out = in == 0 || std::isnan(in) ? false : true; + return OB_SUCCESS; +} + +int ObXmlUtil::check_bool_rule(char *in, bool &out) +{ + out = strlen(in) > 0 ? true : false; + return OB_SUCCESS; +} + +int ObXmlUtil::check_bool_rule(ObPathStr *in, bool &out) +{ + out = in->len_ > 0 ? true : false; + return OB_SUCCESS; +} + + +int ObXmlUtil::check_bool_rule(ObNodeTypeAndContent *in, bool &out) +{ + INIT_SUCC(ret); + if (OB_ISNULL(in)) { + ret = OB_BAD_NULL_ERROR; + LOG_WARN("unexpected null param", K(ret)); + } else if (OB_ISNULL(in->content_)) { + ret = OB_BAD_NULL_ERROR; + LOG_WARN("in content_ null", K(ret)); + } else { + switch(in->type_) { + case ObArgType::PN_BOOLEAN: + return check_bool_rule(in->content_->boolean_, out); + + case ObArgType::PN_DOUBLE: + return check_bool_rule(in->content_->double_, out); + + case ObArgType::PN_STRING: + return check_bool_rule(&(in->content_->str_), out); + + default: + ret = OB_OP_NOT_ALLOW; + LOG_WARN("ArgNodeContent cast to boolean invalid value", K(ret), K(in)); + } + } + return ret; +} + +int ObXmlUtil::to_number(const char *in, const uint64_t length, double &out) +{ + INIT_SUCC(ret); + double ret_val = 0.0; + if (OB_ISNULL(in)) { + ret = OB_ERR_NULL_VALUE; + LOG_WARN("in is null", K(ret)); + } else { + char *endptr = NULL; + int err = 0; + ret_val = ObCharset::strntodv2(in, length, &endptr, &err); + if (EOVERFLOW == err && (-DBL_MAX == ret_val || DBL_MAX == ret_val)) { + ret = OB_DATA_OUT_OF_RANGE; + LOG_WARN("faild to cast string to double, cause in is out of range", K(ret), K(length), + KP(in), K(ret_val)); + } else { + ObString tmp_str(length, in); + ObString trimed_str = tmp_str.trim(); + // 1. only one of data and endptr is null, it is invalid input. + if ((OB_ISNULL(in) || OB_ISNULL(endptr)) && in != endptr) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("null pointer(s)", K(ret), KP(in), KP(endptr)); + } else if (OB_UNLIKELY(in == endptr) || OB_UNLIKELY(EDOM == err)) { // 2. data == endptr include NULL == NULL. + ret = OB_ERR_TRUNCATED_WRONG_VALUE_FOR_FIELD; //1366 + LOG_WARN("wrong value", K(ret), K(length), K(ret_val)); + } else { // 3. so here we are sure that both data and endptr are not NULL. + endptr += ObCharset::scan_str(endptr, in + length, OB_SEQ_SPACES); + if (endptr < in + length) { + ret = OB_ERR_DATA_TRUNCATED; //1265 + LOG_DEBUG("check_convert_str_err", K(length), K(in - endptr)); + } + } + } + } + if (OB_FAIL(ret)) { + ret = OB_INVALID_DATA; + LOG_WARN("invalid double value", KP(in), K(length), K(ret)); + } else { + out = ret_val; + } + return ret; +} + +int ObXmlUtil::to_number(ObPathStr *in, double &out) +{ + INIT_SUCC(ret); + if (OB_ISNULL(in)) { + ret = OB_OP_NOT_ALLOW; + LOG_WARN("ArgNodeContent check bool rule invalid value", K(ret), K(in)); + } else if (OB_FAIL(to_number(in->name_, in->len_, out))) { + LOG_WARN("to number failed", K(ret)); + } + return ret; +} + +int ObXmlUtil::to_number(ObNodeTypeAndContent *in, double &out) +{ + INIT_SUCC(ret); + if (OB_ISNULL(in)) { + ret = OB_BAD_NULL_ERROR; + LOG_WARN("unexpected null param", K(ret)); + } else if (OB_ISNULL(in->content_)) { + ret = OB_BAD_NULL_ERROR; + LOG_WARN("in content_ null", K(ret)); + } else { + switch(in->type_) { + case ObArgType::PN_BOOLEAN: + return to_number(in->content_->boolean_, out); + + case ObArgType::PN_DOUBLE: + return to_number(in->content_->double_, out); + + case ObArgType::PN_STRING: + return to_number(&(in->content_->str_), out); + + default: + ret = OB_OP_NOT_ALLOW; + LOG_WARN("ArgNodeContent check bool rule invalid value", K(ret), K(in)); + } + } + + return ret; +} + +// special treatment err=OB_OP_NOT_ALLOW, +// TODO errcode rename +int ObXmlUtil::to_number(ObSeekResult *in, double &out) +{ + INIT_SUCC(ret); + ObArray node_array; + if (OB_ISNULL(in)) { + ret = OB_BAD_NULL_ERROR; + LOG_WARN("to number get in null", K(ret)); + } else if (in->is_scalar_) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("compare get scalar unexpected", K(ret)); + } else if (OB_FAIL(get_array_from_mode_base(in->result_.base_, node_array))) { // get children + LOG_WARN("get child array failed", K(ret)); + } else if (node_array.size() < 1) { + ret = OB_OP_NOT_ALLOW; + LOG_WARN("node size 0", K(ret)); + } else { + ObString text; + double number = 0; + bool tmp_res = false; + ObIMulModeBase *tmp_node = node_array.at(0); + if (OB_ISNULL(tmp_node)) { + ret = OB_BAD_NULL_ERROR; + LOG_WARN("to number get in null", K(ret)); + } else if (ObMulModeNodeType::M_TEXT != tmp_node->type()) { + ret = OB_OP_NOT_ALLOW; + LOG_WARN("first type not text", K(ret)); + } else if (OB_FAIL(tmp_node->get_value(text))) { + LOG_WARN("tmp xml node get value failed", K(ret)); + } else if (OB_FAIL(to_number(text.ptr(), text.length(), out))) { + ret = ret = OB_OP_NOT_ALLOW; // rename error code + LOG_WARN("to number failed", K(ret), K(text)); + } + } + return ret; +} + +int ObXmlUtil::to_number(bool &in, double &out) +{ + out = in ? 1 : 0; + return OB_SUCCESS; +} + + +/* + compare number and number + support all +*/ +int ObXmlUtil::compare(double left, double right, ObFilterType op, bool &res) +{ + INIT_SUCC(ret); + switch(op) { + case ObFilterType::PN_CMP_EQUAL: // = + res = left == right ? true : false; + break; + + case ObFilterType::PN_CMP_UNEQUAL: // != + res = left != right ? true : false; + break; + + case ObFilterType::PN_CMP_GT: // > + res = left > right ? true : false; + break; + + case ObFilterType::PN_CMP_GE: // >= + res = left >= right ? true : false; + break; + + case ObFilterType::PN_CMP_LT: // < + res = left < right ? true : false; + break; + + case ObFilterType::PN_CMP_LE: // <= + res = left <= right ? true : false; + break; + + default: + ret = OB_INVALID_ARGUMENT; + LOG_WARN("compare invalid argument", K(ret), K(left), K(right), K(op)); + break; + } + return ret; +} + +/* + compare: string and string + support: all +*/ +int ObXmlUtil::compare(ObString left, ObString right, ObFilterType op, bool &res) +{ + INIT_SUCC(ret); + switch(op) { + case ObFilterType::PN_CMP_EQUAL: + res = left == right ? true : false; + break; + + case ObFilterType::PN_CMP_UNEQUAL: + res = left != right ? true : false; + break; + + case ObFilterType::PN_CMP_GT: // > + res = left > right ? true : false; + break; + + case ObFilterType::PN_CMP_GE: // >= + res = left >= right ? true : false; + break; + + case ObFilterType::PN_CMP_LT: // < + res = left < right ? true : false; + break; + + case ObFilterType::PN_CMP_LE: // <= + res = left <= right ? true : false; + break; + + default: + ret = OB_INVALID_ARGUMENT; + LOG_WARN("compare invalid argument", K(ret), K(op)); + break; + } + + return ret; +} + +/* + compare: boolean and boolean + support: all +*/ +int ObXmlUtil::compare(bool left, bool right, ObFilterType op, bool &res) +{ + INIT_SUCC(ret); + uint32_t tmp_left = left ? 1 : 0; + uint32_t tmp_right = right ? 1 : 0; + switch(op) { + case ObFilterType::PN_CMP_EQUAL: + res = tmp_left == tmp_right ? true : false; + break; + + case ObFilterType::PN_CMP_UNEQUAL: + res = tmp_left != tmp_right ? true : false; + break; + + case ObFilterType::PN_CMP_GT: + res = tmp_left > tmp_right ? true : false; + break; + + case ObFilterType::PN_CMP_GE: + res = tmp_left >= tmp_right ? true : false; + break; + + case ObFilterType::PN_CMP_LT: + res = tmp_left < tmp_right ? true : false; + break; + + case ObFilterType::PN_CMP_LE: + res = tmp_left <= tmp_right ? true : false; + break; + + default: + ret = OB_INVALID_ARGUMENT; + LOG_WARN("compare invalid argument", K(ret), K(op)); + break; + } + return ret; +} + +int ObXmlUtil::init_print_ns(ObIAllocator *allocator, ObIMulModeBase *src, ObNsSortedVector& ns_vec, ObNsSortedVector*& ns_vec_point) +{ + INIT_SUCC(ret); + if (OB_NOT_NULL(src) && src->check_extend()) { + if (OB_FAIL(ObXmlUtil::init_extend_ns_vec(allocator, src, ns_vec))) { + LOG_WARN("fail to init ns vector by extend area", K(ret)); + } else { + ns_vec_point = &ns_vec; + } + } else { + ns_vec_point = nullptr; + } + return ret; +} + +int ObXmlUtil::calculate(double left, double right, ObFilterType op, double &res) +{ + INIT_SUCC(ret); + switch (op) { + case ObFilterType::PN_CMP_ADD: + res = left + right; + break; + + case ObFilterType::PN_CMP_SUB: + res = left - right; + break; + + case ObFilterType::PN_CMP_MUL: + res = left * right; + break; + + case ObFilterType::PN_CMP_DIV: + res = left / right; + break; + + case ObFilterType::PN_CMP_MOD: + res = fmod(left, right); + break; + + default: + ret = OB_INVALID_ARGUMENT; + LOG_WARN("calculate invalid argument", K(ret), K(op)); + break; + } + return ret; +} + +int ObXmlUtil::logic_compare(bool left, bool right, ObFilterType op, bool &res) +{ + INIT_SUCC(ret); + switch (op) { + case PN_AND_COND: + res = left && right ? true : false; + break; + + case PN_OR_COND: + res = left || right ? true : false; + break; + + default: + ret = OB_INVALID_ARGUMENT; + LOG_WARN("logic compare invalid argument", K(ret), K(op)); + break; + } + return ret; +} + +int ObXmlUtil::dfs_xml_text_node(ObMulModeMemCtx *ctx, ObIMulModeBase *xml_doc, ObString &res) +{ + int ret = OB_SUCCESS; + ObStringBuffer buff(ctx->allocator_); + ObPathExprIter xpath_iter(ctx->allocator_); + ObString xpath_str = ObString::make_string("//text()"); + ObString default_ns; // unused + ObIMulModeBase *result_node = NULL; + if (OB_FAIL(xpath_iter.init(ctx, xpath_str, default_ns, xml_doc, NULL))) { + LOG_WARN("fail to init xpath iterator", K(xpath_str), K(default_ns), K(ret)); + } else if (OB_FAIL(xpath_iter.open())) { + LOG_WARN("fail to open xpath iterator", K(ret)); + } + + while (OB_SUCC(ret)) { + ObString content; + if (OB_FAIL(xpath_iter.get_next_node(result_node))) { + if (ret != OB_ITER_END) { + LOG_WARN("fail to get next xml node", K(ret)); + } + } else if (OB_ISNULL(result_node)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("xpath result node is null", K(ret)); + } else if (result_node->type() != M_TEXT) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("invalid xml node type", K(ret), K(result_node->type())); + } else if (OB_FAIL(result_node->get_value(content))) { + LOG_WARN("fail to get text node content", K(ret)); + } else if (OB_FAIL(buff.append(content))) { + LOG_WARN("fail to append text node content", K(ret), K(content)); + } + } + + if (ret == OB_ITER_END) { + res.assign_ptr(buff.ptr(), buff.length()); + ret = OB_SUCCESS; + } + + int tmp_ret = OB_SUCCESS; + if (OB_SUCCESS != (tmp_ret = xpath_iter.close())) { + LOG_WARN("fail to close xpath iter", K(tmp_ret)); + ret = COVER_SUCC(tmp_ret); + } + return ret; +} + +int ObXmlUtil::get_array_from_mode_base(ObIMulModeBase *node, ObIArray &res) +{ + INIT_SUCC(ret); + if (OB_XML_TYPE != node->data_type()) { // filter xml type + ret = OB_ERR_UNEXPECTED; + LOG_WARN("comprare ObIMulModeBase operator not xml type", K(ret), K(node->data_type())); + } else if (OB_ISNULL(node)) { + ret = OB_BAD_NULL_ERROR; + LOG_WARN("xml node null", K(ret)); + } else if (!is_container_tc(node->type())) { + if (OB_FAIL(res.push_back(node))) { + LOG_WARN("get child failed", K(ret), K(node->type())); + } + } else if (OB_FAIL(node->get_children(res))) { // get children + LOG_WARN("get child failed", K(ret), K(node)); + } + return ret; +} + +int ObXmlUtil::alloc_arg_node(ObIAllocator *allocator, ObPathArgNode*& node) +{ + INIT_SUCC(ret); + if (OB_ISNULL(allocator)) { + ret = OB_BAD_NULL_ERROR; + LOG_WARN("should not be null", K(ret)); + } else { + ObPathArgNode* arg_node = + static_cast (allocator->alloc(sizeof(ObPathArgNode))); + if (OB_ISNULL(arg_node)) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("allocate row buffer failed at location_node", K(ret)); + } else { + node = arg_node; + } + } + return ret; +} + +int ObXmlUtil::alloc_filter_node(ObIAllocator *allocator, ObXmlPathFilter*& node) +{ + INIT_SUCC(ret); + if (OB_ISNULL(allocator)) { + ret = OB_BAD_NULL_ERROR; + LOG_WARN("should not be null", K(ret)); + } else { + ObXmlPathFilter* filter_node = + static_cast (allocator->alloc(sizeof(ObXmlPathFilter))); + if (OB_ISNULL(filter_node)) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("allocate row buffer failed at location_node", K(ret)); + } else { + node = filter_node; + } + } + return ret; +} + +int ObXmlUtil::cast_to_string(const ObString &val, ObIAllocator &allocator, ObStringBuffer& result, ObCollationType cs_type) +{ + INIT_SUCC(ret); + ObMulModeNodeType node_type = M_MAX_TYPE; + ParamPrint param_list; + ObIMulModeBase *node = NULL; + ObMulModeMemCtx* mem_ctx = NULL; + ObNsSortedVector* ns_vec_point = nullptr; + ObNsSortedVector ns_vec; + + if (OB_FAIL(ObXmlUtil::create_mulmode_tree_context(&allocator, mem_ctx))) { + LOG_WARN("fail to create tree memory context", K(ret)); + } else if (OB_FAIL(ObXmlUtil::xml_bin_type(val, node_type))) { + LOG_WARN("xml bin type failed", K(val)); + } else if (OB_FAIL(ObMulModeFactory::get_xml_base(mem_ctx, val, + ObNodeMemType::BINARY_TYPE, + ObNodeMemType::BINARY_TYPE, + node, M_DOCUMENT, true))) { + LOG_WARN("fail to get xml base", K(ret), K(val)); + } else if (OB_FAIL(ObXmlUtil::init_print_ns(&allocator, node, ns_vec, ns_vec_point))) { + LOG_WARN("fail to init ns vector by extend area", K(ret)); + // default size value of print_document is 2 + } else if (OB_FAIL(node->print_document(result, cs_type, node_type == M_UNPARESED_DOC ? ObXmlFormatType::NO_FORMAT : ObXmlFormatType::WITH_FORMAT, 2, ns_vec_point))) { + LOG_WARN("print document failed", K(ret)); + } + + return ret; +} + +bool ObXmlUtil::is_xml_doc_over_depth(uint64_t depth) +{ + return depth > OB_XML_PARSER_MAX_DEPTH_; +} + +int ObXmlUtil::revert_escape_character(ObIAllocator &allocator, ObString &input_str, ObString &output_str) +{ + int ret = OB_SUCCESS; + const char *ptr = input_str.ptr(); + ObStringBuffer buff(&allocator); + int64_t idx = 0; + while(idx < input_str.length() && OB_SUCC(ret)) { + ObString ref; + int64_t ref_len = 0; + if (*(ptr+idx) == '&' && ObXmlParserUtils::is_entity_ref(input_str, idx, ref, ref_len)) { + // append entity ref and increment idx + if (OB_FAIL(buff.append(ref))) { + LOG_WARN("fail to append ref char", K(ret)); + } else { + idx += ref_len; + } + } else if (OB_FAIL(buff.append(ptr+idx, 1))) { + LOG_WARN("fail to append char", K(ret)); + } else { + idx++; + } + } + + if (OB_SUCC(ret)) { + ObString res(buff.length(), buff.ptr()); + if (OB_FAIL(ob_write_string(allocator, res, output_str))) { + LOG_WARN("fail to write string", K(ret), K(res)); + } + } + + return ret; +} + +int ObXmlUtil::init_extend_ns_vec(ObIAllocator *allocator, ObIMulModeBase *src, ObNsSortedVector& ns_vec) +{ + INIT_SUCC(ret); + if (OB_ISNULL(src) || OB_ISNULL(allocator)) { + ret = OB_BAD_NULL_ERROR; + LOG_WARN("should not be null", K(ret)); + } else { + ObXmlBin* bin = static_cast(src); + ObXmlBin extend_bin; + ns_vec.reset(); + if (OB_FAIL(bin->get_extend(extend_bin))) { + LOG_WARN("fail to get extend bin", K(ret)); + } else { + ObNsPairCmp cmp; + ObNsPairUnique unique; + ObXmlBin bin_buffer; + ObNsSortedVector::iterator pos = ns_vec.end(); + ObIMulModeBase* cur = nullptr; + int64_t num_children = extend_bin.attribute_size(); + + for (int64_t i = 0; OB_SUCC(ret) && i < num_children; i ++) { + cur = extend_bin.attribute_at(i, &bin_buffer); + if (OB_ISNULL(cur)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("failed to get child from extend", K(ret), K(i)); + } else if (cur->type() != M_NAMESPACE) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("should be all ns", K(ret), K(i)); + } else { + ObNsPair* tmp_ns = static_cast (allocator->alloc(sizeof(ObNsPair))); + if (OB_ISNULL(tmp_ns)) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("allocate row buffer failed at seek result", K(ret)); + } else { + tmp_ns = new (tmp_ns) ObNsPair(); + ObString tmp_key; + if (OB_FAIL(cur->get_key(tmp_key))) { + LOG_WARN("failed to get ns", K(ret), K(i)); + } else { + tmp_ns->set_xml_key(tmp_key); + } + if (OB_FAIL(ret)) { + } else if (OB_FAIL(cur->get_value(tmp_ns->value_))) { + LOG_WARN("failed to get ns", K(ret), K(i)); + } else if (OB_FAIL(ns_vec.insert_unique(tmp_ns, pos, cmp, unique))) { + LOG_WARN("should notduplicated nodes", K(ret), K(i)); + } + } + } + } // end for + } + } + return ret; +} +int ObXmlUtil::delete_dup_ns_definition(ObIMulModeBase *data, ObNsSortedVector& origin_vec, ObVector& delete_vec) +{ + INIT_SUCC(ret); + if (OB_ISNULL(data)) { + ret = OB_BAD_NULL_ERROR; + LOG_WARN("should not be null", K(ret)); + } else if (data->attribute_size() > 0) { + ObNsPairCmp cmp; + ObNsPairUnique unique; + ObNsPair tmp_pair; + ObXmlBin bin_buffer; + ObIMulModeBase* cur = nullptr; + int64_t num_children = data->attribute_size(); + + for (int64_t i = 0; OB_SUCC(ret) && i < num_children; i++) { + ObNsSortedVector::iterator pos = origin_vec.end(); + cur = data->attribute_at(i, &bin_buffer); + if (OB_ISNULL(cur)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("failed to get child from element", K(ret), K(i)); + } else if (cur->type() != ObMulModeNodeType::M_NAMESPACE) { + } else if (OB_FAIL(cur->get_key(tmp_pair.key_)) || OB_FAIL(cur->get_value(tmp_pair.value_))) { + LOG_WARN("failed to get ns", K(ret), K(i)); + } else if (OB_FAIL(origin_vec.find(&tmp_pair, pos, cmp, unique)) || pos == origin_vec.end()) { + if (ret == OB_ENTRY_NOT_EXIST) { // didn't find, not duplicate ns, it's normal + ret = OB_SUCCESS; + } + } else if (OB_FAIL(delete_vec.push_back(*pos))) { // record ns that will be delete + LOG_WARN("failed to record", K(ret), K(i)); + } else if (OB_FAIL(origin_vec.remove(pos))) { // remove duplicate ns + LOG_WARN("failed to remove duplicate", K(ret), K(i)); + } + } + } + return ret; +} +int ObXmlUtil::check_ns_conflict(ObIMulModeBase* cur_parent, + ObIMulModeBase* &last_parent, + ObXmlBin *cur, + common::hash::ObHashMap& ns_map, + bool& conflict) +{ + INIT_SUCC(ret); + conflict = false; + if (OB_ISNULL(cur)) { + ret = OB_BAD_NULL_ERROR; + LOG_WARN("should not be null", K(ret)); + } else if (OB_ISNULL(last_parent) && ns_map.size() == 0) { + // add directly + int ns_num = cur->attribute_size(); + bool end_check = false; + for (int pos = 0; OB_SUCC(ret) && !end_check &&pos < ns_num; ++pos) { + ObXmlBin buff(*cur); + ObXmlBin* tmp = &buff; + if (OB_FAIL(cur->construct(tmp, nullptr))) { + LOG_WARN("failed to dup bin.", K(ret)); + } else if (OB_FAIL(tmp->set_at(pos))) { + LOG_WARN("failed to set at child.", K(ret)); + } else if (tmp->type() == M_NAMESPACE) { + ObString key; + ObString value; + // init ns node + if (OB_FAIL(tmp->get_key(key))) { + LOG_WARN("failed to eval key.", K(ret)); + } else if (OB_FAIL(tmp->get_value(value))) { + LOG_WARN("failed to eval value.", K(ret)); + } else if (OB_FAIL(ns_map.set_refactored(key, value))) { + LOG_WARN("fail to add ns from map", K(ret), K(key)); + } + } else if (tmp->type() == M_ATTRIBUTE) { + } else { + end_check = true; // neither ns nor attribute, stop searching + } + } + } else if (last_parent == cur_parent) { + // do nothing, same parent means same ancestor, don't need to check/add ns + } else { + // check conflicts, record new ns definition idx + int ns_num = cur->attribute_size(); + ObArray new_ns_idx; + bool end_check = false; + for (int pos = 0; OB_SUCC(ret) && !end_check && pos < ns_num && !conflict; ++pos) { + ObXmlBin buff(*cur); + ObXmlBin* tmp = &buff; + if (OB_FAIL(cur->construct(tmp, nullptr))) { + LOG_WARN("failed to dup bin.", K(ret)); + } else if (OB_FAIL(tmp->set_at(pos))) { + LOG_WARN("failed to set at child.", K(ret)); + } else if (tmp->type() == M_NAMESPACE) { + ObString key; + ObString value; + ObString* find_val; + // init ns node + if (OB_FAIL(tmp->get_key(key))) { + LOG_WARN("failed to eval key.", K(ret)); + } else if (OB_FAIL(tmp->get_value(value))) { + LOG_WARN("failed to eval value.", K(ret)); + } + if (OB_FAIL(ret)) { + } else if (OB_NOT_NULL(find_val = ns_map.get(key))) { + if (find_val->compare(value) != 0) { + conflict = true; + } + } else if (OB_FAIL(new_ns_idx.push_back(pos))){ + LOG_WARN("failed to record idx.", K(ret)); + } + } else if (tmp->type() == M_ATTRIBUTE) { + } else { + end_check = true; // neither ns nor attribute, stop searching + } + } + for (int pos = 0; OB_SUCC(ret) && pos < new_ns_idx.size() && !conflict; ++pos) { + ObXmlBin buff(*cur); + ObXmlBin* tmp = &buff; + if (OB_FAIL(cur->construct(tmp, nullptr))) { + LOG_WARN("failed to dup bin.", K(ret)); + } else if (OB_FAIL(tmp->set_at(new_ns_idx[pos]))) { + LOG_WARN("failed to set at child.", K(ret)); + } else if (tmp->type() == M_NAMESPACE) { + ObString key; + ObString value; + // init ns node + if (OB_FAIL(tmp->get_key(key))) { + LOG_WARN("failed to eval key.", K(ret)); + } else if (OB_FAIL(tmp->get_value(value))) { + LOG_WARN("failed to eval value.", K(ret)); + } else if (OB_FAIL(ns_map.set_refactored(key, value))) { + LOG_WARN("fail to add ns from map", K(ret), K(key)); + } + } else { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("must be ns.", K(ret)); + } + } + } + // update parent anyways + last_parent = cur_parent; + return ret; +} +int ObXmlUtil::ns_to_extend(ObMulModeMemCtx* mem_ctx, + common::hash::ObHashMap& ns_map, + ObStringBuffer *buffer) +{ + INIT_SUCC(ret); + if (OB_ISNULL(buffer)) { + ret = OB_BAD_NULL_ERROR; + LOG_WARN("should not be null", K(ret)); + } else { + ObXmlElement element_ns(ObMulModeNodeType::M_ELEMENT, mem_ctx); + ret = element_ns.init(); + ObArray ns_vec; + common::hash::ObHashMap::iterator ns_map_iter; + for (ns_map_iter = ns_map.begin(); OB_SUCC(ret) && ns_map_iter != ns_map.end(); ns_map_iter++) { + ObXmlAttribute ns_node(ObMulModeNodeType::M_NAMESPACE, mem_ctx); + ObString key; + ObString value; + // init ns node + if (OB_SUCC(ns_vec.push_back(ns_node))) { + ns_vec[ns_vec.size() - 1].set_xml_key(ns_map_iter->first); + ns_vec[ns_vec.size() - 1].set_value(ns_map_iter->second); + if (OB_FAIL(element_ns.add_attribute(&ns_vec[ns_vec.size() - 1]))) { + LOG_WARN("fail to add ns", K(ret)); + } + } + } + // serialize element node as extend area + if (OB_SUCC(ret)) { + ObXmlElementSerializer serializer_element(&element_ns, buffer); + if (OB_FAIL(serializer_element.serialize(0))) { + LOG_WARN("failed to serialize.", K(ret)); + } + } + } + return ret; +} +int ObXmlUtil::add_ns_def_if_necessary(uint32_t format_flag, ObStringBuffer &x_buf, const ObString& origin_prefix, + ObNsSortedVector* element_ns_vec, ObVector& delete_ns_vec) +{ + INIT_SUCC(ret); + ObString prefix; + if (origin_prefix.empty()) { + prefix = "xmlns"; + } else { + prefix = origin_prefix; + } + if (OB_ISNULL(element_ns_vec)) { + ret = OB_BAD_NULL_ERROR; + LOG_WARN("should not be null", K(ret)); + } else if (element_ns_vec->size() > 0) { + ObNsPair tmp_pair(prefix); + ObNsPairCmp cmp; + ObNsPairUnique unique; + ObNsSortedVector::iterator pos = element_ns_vec->end(); + if (OB_FAIL(element_ns_vec->find(&tmp_pair, pos, cmp, unique)) || pos == element_ns_vec->end()) { + if (ret == OB_ENTRY_NOT_EXIST) { // didn't find, not duplicate ns, it's normal + ret = OB_SUCCESS; + } + } else if (OB_NOT_NULL(pos)) { + if (OB_FAIL(x_buf.append(" "))) { + LOG_WARN("fail to print space in ns", K(ret)); + } + // append default ns or prefix ns + if (OB_FAIL(ret)) { + } else if ((*pos)->key_.ptr() == nullptr + || (*pos)->key_.length() == 0 + || (*pos)->key_.case_compare("xmlns") == 0) { + if (OB_FAIL(x_buf.append("xmlns"))) { + LOG_WARN("fail to append default ns", K(ret)); + } + } else if (OB_FAIL(ObXmlUtil::append_qname(x_buf, "xmlns", (*pos)->key_))) { + LOG_WARN("fail to print prefix in ns", K(ret)); + } + // append ns value + if (OB_FAIL(ret)) { + } else if (OB_FAIL(x_buf.append("=\""))) { + LOG_WARN("fail to print =\" in ns", K(ret)); + } else if (!(format_flag & NO_ENTITY_ESCAPE)) { + if (OB_FAIL(ObXmlParserUtils::escape_xml_text((*pos)->value_, x_buf))) { + LOG_WARN("fail to print text with escape char", K(ret)); + } + } else if (OB_FAIL(x_buf.append((*pos)->value_))) { + LOG_WARN("fail to print value in ns", K(ret), K((*pos)->value_)); + } + // delete ns definition that already printed + if (OB_FAIL(ret)) { + } else if (OB_FAIL(x_buf.append("\""))) { + LOG_WARN("fail to print \" in ns", K(ret)); + } else if (OB_FAIL(delete_ns_vec.push_back(*pos))) { // record duplicate ns + LOG_WARN("failed to record duplicate", K(ret)); + } else if (OB_FAIL(element_ns_vec->remove(pos))) { // remove duplicate ns + LOG_WARN("failed to remove duplicate", K(ret)); + } + } + } + return ret; +} + +int ObXmlUtil::add_attr_ns_def(ObIMulModeBase *cur, uint32_t format_flag, ObStringBuffer &buf, + ObNsSortedVector* element_ns_vec, ObVector& delete_ns_vec) +{ + INIT_SUCC(ret); + ObXmlBin* bin = nullptr; + if (OB_ISNULL(element_ns_vec) || OB_ISNULL(cur) || OB_ISNULL(bin = static_cast(cur))) { + ret = OB_BAD_NULL_ERROR; + LOG_WARN("should not be null", K(ret)); + } else { + int attr_num = bin->attribute_size(); + bool end_check = false; + for (int pos = 0; OB_SUCC(ret) && !end_check && pos < attr_num; ++pos) { + ObXmlBin buff(*bin); + ObXmlBin* tmp = &buff; + if (OB_FAIL(bin->construct(tmp, nullptr))) { + LOG_WARN("failed to dup bin.", K(ret)); + } else if (OB_FAIL(tmp->set_at(pos))) { + LOG_WARN("failed to set at child.", K(ret)); + } else if (tmp->type() == M_NAMESPACE) { + } else if (tmp->type() == M_ATTRIBUTE) { + ObString prefix = tmp->get_prefix(); + if (prefix.empty()) { + } else if (OB_FAIL(ObXmlUtil::add_ns_def_if_necessary(format_flag, buf, prefix, element_ns_vec, delete_ns_vec))) { + LOG_WARN("failed to add attribute ns.", K(ret)); + } + } else { + end_check = true; // neither ns nor attribute, stop searching + } + } + } + return ret; +} +int ObXmlUtil::restore_ns_vec(ObNsSortedVector* element_ns_vec, ObVector& delete_ns_vec) +{ + INIT_SUCC(ret); + if (OB_ISNULL(element_ns_vec)) { + ret = OB_BAD_NULL_ERROR; + LOG_WARN("should not be null", K(ret)); + } else { + ObNsPairCmp cmp; + ObNsPairUnique unique; + for (int i = 0; OB_SUCC(ret) && i < delete_ns_vec.size(); ++i) { + ObNsSortedVector::iterator pos = element_ns_vec->end(); + if (OB_FAIL(element_ns_vec->insert_unique(delete_ns_vec[i], pos, cmp, unique))) { + LOG_WARN("failed to restore ns", K(ret), K(i)); + } + } + } + return ret; +} + +} // namespace common +} // namespace oceanbase diff --git a/deps/oblib/src/lib/xml/ob_xml_util.h b/deps/oblib/src/lib/xml/ob_xml_util.h new file mode 100644 index 0000000000..0466f76659 --- /dev/null +++ b/deps/oblib/src/lib/xml/ob_xml_util.h @@ -0,0 +1,581 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + * This file contains interface define for the xml util abstraction. + */ + +#ifndef OCEANBASE_SQL_OB_XML_UTIL +#define OCEANBASE_SQL_OB_XML_UTIL + +#include "lib/xml/ob_multi_mode_interface.h" +#include "ob_tree_base.h" +#include "lib/xml/ob_xml_tree.h" +#include "ob_xpath.h" + +namespace oceanbase { +namespace common { + +enum ObXpathArgType { + XC_TYPE_BOOLEAN = 0, + XC_TYPE_NUMBER, + XC_TYPE_STRING, + XC_TYPE_NODE, + XC_TYPE_MAX // invalid type, or count of ObXpathArgType +}; + +enum ObXpathCompareType { + XC_EQ = 0, + XC_NE, + XC_LT, + XC_LE, + XC_GT, + XC_GE, + XC_MAX // invalid type, or count of ObXpathCompareType +}; + +static constexpr int CMP_ARG_TYPE_NUM = static_cast(ObXpathArgType::XC_TYPE_MAX); +static constexpr int XC_TYPE_NUM = static_cast(ObXpathCompareType::XC_MAX); + + +enum ObXmlBinaryType { + DocumentType = 0, + ContentType, + UnparsedType, + MaxType +}; + +struct ObIMulModeBaseCmp { + bool operator()(const ObIMulModeBase* a, const ObIMulModeBase* b) { + bool is_smaller = false; + if (OB_ISNULL(a) && OB_ISNULL(b)) { + is_smaller = true; + } else if (OB_NOT_NULL(a) && OB_NOT_NULL(b)) { // is tree node + ObNodeMemType a_type = a->get_internal_type(); + ObNodeMemType b_type = b->get_internal_type(); + if (a_type == ObNodeMemType::TREE_TYPE && b_type == ObNodeMemType::TREE_TYPE) { + is_smaller = (a < b); + } else { + ObIMulModeBase* ref_a = const_cast(a); + ObIMulModeBase* ref_b = const_cast(b); + is_smaller = (ref_a->is_node_before(ref_b)); + } + } else { + is_smaller = false; + } + return is_smaller; + } +}; + +struct ObIMulModeBaseUnique { + bool operator()(const ObIMulModeBase* a, const ObIMulModeBase* b) { + bool is_eq = false; + if (OB_ISNULL(a) && OB_ISNULL(b)) { + is_eq = true; + } else if (OB_NOT_NULL(a) && OB_NOT_NULL(b)) { // is tree node + ObNodeMemType a_type = a->get_internal_type(); + ObNodeMemType b_type = b->get_internal_type(); + if (a_type == ObNodeMemType::TREE_TYPE && b_type == ObNodeMemType::TREE_TYPE) { + is_eq = (a == b); + } else { + if (a->type() == b->type()) { + ObIMulModeBase* ref_a = const_cast(a); + ObIMulModeBase* ref_b = const_cast(b); + is_eq = (ref_a->is_equal_node(ref_b)); + } else { + is_eq = false; + } + } + } else { + is_eq = false; + } + return is_eq; + } +}; + +struct ObXmlKeyCompare { + int operator()(const ObString& left_key, const ObString& right_key) { + return left_key.compare(right_key); + } +}; + +class ObNsPair final +{ +public: + ObNsPair() : key_(nullptr), value_(nullptr) {} + explicit ObNsPair(const ObString &key, ObString value) + : key_(key), + value_(value) + { + } + ObNsPair(const ObString& key) : key_(key), value_(nullptr) {} + explicit ObNsPair(const ObString &key, ObString& value) + : key_(key), + value_(value) + {} + ~ObNsPair() {} + OB_INLINE common::ObString get_key() const { return key_; } + OB_INLINE void set_xml_key(const common::ObString &new_key) + { + key_.assign_ptr(new_key.ptr(), new_key.length()); + } + OB_INLINE common::ObString get_value() const { return value_; } + OB_INLINE void set_value(const common::ObString &new_value) + { + value_.assign_ptr(new_value.ptr(), new_value.length()); + } + bool operator<(const ObNsPair& right) const + { + return key_ < right.key_; + } + bool operator==(const ObNsPair& right) const + { + return key_ == right.key_; + } + bool operator==(long null) const + { + // ptr must be null + return key_.ptr() == nullptr && value_.ptr() == nullptr; + } + int64_t to_string(char *buf, const int64_t buf_len) const + { + int64_t pos = 0; + databuff_printf(buf, buf_len, pos, "key = %s", key_.ptr()); + return pos; + } + common::ObString key_; + common::ObString value_; +}; +struct ObNsPairCmp { + int operator()(const ObNsPair* left, const ObNsPair* right) { + INIT_SUCC(ret); + if (OB_ISNULL(left) || OB_ISNULL(right)) { + ret = (left < right); + } else { + if (left->key_.length() != right->key_.length()) { + ret = (left->key_.length() < right->key_.length()); + } else { // do Lexicographic order when length equals + ret = (left->key_.compare(right->key_) < 0); + } + } + return ret; + } +}; +struct ObNsPairUnique { + // for ns, if key is equal, then definition is duplicate + int operator()(const ObNsPair* left, const ObNsPair* right) { + bool ret_bool = false; + if (OB_ISNULL(left) || OB_ISNULL(right)) { + ret_bool = (left == right); + } else { + ret_bool = (left->key_ == right->key_); + } + return ret_bool; + } +}; + +class ObXmlUtil { +public: + static const char* get_charset_name(ObCollationType collation_type); + static const char* get_charset_name(ObCharsetType charset_type); + static ObCharsetType check_support_charset(const ObString& cs_name); + + static bool is_container_tc(ObMulModeNodeType type); + static bool is_node(ObMulModeNodeType type); + // test type + static bool is_text(ObMulModeNodeType type); + // comment type + static bool is_comment(ObMulModeNodeType type); + // element type + static bool is_element(ObMulModeNodeType type); + // pi type + static bool is_pi(ObMulModeNodeType type); + static bool use_text_serializer(ObMulModeNodeType type); + static bool use_attribute_serializer(ObMulModeNodeType type); + static bool use_element_serializer(ObMulModeNodeType type); + + static int append_newline_and_indent(ObStringBuffer &j_buf, uint64_t level, uint64_t size); + + static int append_qname(ObStringBuffer &j_buf, const ObString& prefix, const ObString& localname); + static int add_ns_def_if_necessary(uint32_t format_flag, ObStringBuffer &buf, const ObString& origin_prefix, + ObNsSortedVector* element_ns_vec, ObVector& delete_ns_vec); + static int add_attr_ns_def(ObIMulModeBase *cur, uint32_t format_flag, ObStringBuffer &buf, + ObNsSortedVector* element_ns_vec, ObVector& delete_ns_vec); + static int restore_ns_vec(ObNsSortedVector* element_ns_vec, ObVector& delete_ns_vec); + static int init_extend_ns_vec(ObIAllocator *allocator, + ObIMulModeBase *src, + ObNsSortedVector& ns_vec); + static int delete_dup_ns_definition(ObIMulModeBase *src, + ObNsSortedVector& origin_vec, + ObVector& delete_vec); + static int check_ns_conflict(ObIMulModeBase* cur_parent, + ObIMulModeBase* &last_parent, + ObXmlBin *cur, + common::hash::ObHashMap& ns_map, + bool& conflict); + static int ns_to_extend(ObMulModeMemCtx* mem_ctx, + common::hash::ObHashMap& ns_map, + ObStringBuffer *buffer); + static int create_mulmode_tree_context(ObIAllocator *allocator, ObMulModeMemCtx*& ctx); + static int xml_bin_type(const ObString& data, ObMulModeNodeType& type); + static int xml_bin_header_info(const ObString& data, ObMulModeNodeType& type, int64_t& size); + static int cast_to_string(const ObString &val, ObIAllocator &allocator, ObStringBuffer& result, ObCollationType cs_type); + + // safe cast + // if cast type not match, return null; + // should be carefull when use these functions + template + static XmlNodeClass* xml_node_cast(ObXmlNode* src, ObMulModeNodeType xml_type) { + XmlNodeClass* res = nullptr; + if (OB_NOT_NULL(src) && src->type() == xml_type) { + res = static_cast(src); + } + return res; + } + // cast to string + static int to_string(ObIAllocator &allocator, double &in, char *&out); + static int to_string(ObIAllocator &allocator, bool &in, char *&out); + static int to_string(ObIAllocator &allocator, ObNodeTypeAndContent *in, char *&out); + + // cast to boolean + static int to_boolean(double &in, bool &out); + static int to_boolean(bool &in, bool &out) { out = in; return 0; }; + static int to_boolean(char *in, bool &out); + static int to_boolean(ObNodeTypeAndContent *in, bool &out); + static int to_boolean(ObPathStr *in, bool &out); + + static int check_bool_rule(bool &in, bool &out) { out = in; return 0; }; + static int check_bool_rule(double &in, bool &out); + static int check_bool_rule(char *in, bool &out); + static int check_bool_rule(ObPathStr *in, bool &out); + static int check_bool_rule(ObNodeTypeAndContent *in, bool &out); + + // cast to number + static int to_number(const char *in, const uint64_t length, double &out); + static int to_number(bool &in, double &out); + static int to_number(double &in, double &out) { out = in; return 0; }; + static int to_number(ObSeekResult *in, double &out); + static int to_number(ObPathStr *in, double &out); + static int to_number(ObNodeTypeAndContent *in, double &out); + static int dfs_xml_text_node(ObMulModeMemCtx *ctx, ObIMulModeBase *xml_doc, ObString &res); + static int get_array_from_mode_base(ObIMulModeBase *left, ObIArray &res); + static int alloc_arg_node(ObIAllocator *allocator, ObPathArgNode*& node); + static int alloc_filter_node(ObIAllocator *allocator, ObXmlPathFilter*& node); + static int compare(double left, double right, ObFilterType op, bool &res); + static int compare(ObString left, ObString right, ObFilterType op, bool &res); + static int compare(bool left, bool right, ObFilterType op, bool &res); + static int init_print_ns(ObIAllocator *allocator, ObIMulModeBase *src, ObNsSortedVector& ns_vec, ObNsSortedVector*& vec_point); + + // 调用的时候特殊处理OB_OP_NOT_ALLOW + // calculate: + - * div % + static int calculate(double left, double right, ObFilterType op, double &res); + + // logic compare: and/or + static int logic_compare(bool left, bool right, ObFilterType op, bool &res); + + // union: | + template + static int inner_union(LeftType left, RightType right, bool &res) + { + INIT_SUCC(ret); + UNUSED(right); + if (OB_FAIL(ObXmlUtil::check_bool_rule(left, res))) { + } + return ret; + } + + static bool check_xpath_arg_type(ObArgType type) + { + if (type != ObArgType::PN_BOOLEAN || + type != ObArgType::PN_DOUBLE || + type != ObArgType::PN_STRING || + type != ObArgType::PN_SUBPATH) { + return false; + } + return true; + } + + static ObXpathArgType arg_type_correspondence(ObArgType arg_type) + { + switch (arg_type) { + case ObArgType::PN_BOOLEAN: + return ObXpathArgType::XC_TYPE_BOOLEAN; + + case ObArgType::PN_DOUBLE: + return ObXpathArgType::XC_TYPE_NUMBER; + + case ObArgType::PN_STRING: + return ObXpathArgType::XC_TYPE_STRING; + + case ObArgType::PN_SUBPATH: + return ObXpathArgType::XC_TYPE_NODE; + + default: + return ObXpathArgType::XC_TYPE_BOOLEAN; + + } + } + + static ObXpathCompareType filter_type_correspondence(ObFilterType filter_type) { + switch (filter_type) { + case ObFilterType::PN_CMP_EQUAL: + return ObXpathCompareType::XC_EQ; + + case ObFilterType::PN_CMP_UNEQUAL: + return ObXpathCompareType::XC_NE; + + case ObFilterType::PN_CMP_GT: + return ObXpathCompareType::XC_GT; + + case ObFilterType::PN_CMP_GE: + return ObXpathCompareType::XC_GE; + + case ObFilterType::PN_CMP_LE: + return ObXpathCompareType::XC_LE; + + case ObFilterType::PN_CMP_LT: + return ObXpathCompareType::XC_LT; + + default: + return ObXpathCompareType::XC_EQ; + } + } + + /* + compare[all 6]: =, !=, >, >=, <, <= + calculate[all 5]: +, -, *, div, % + union[all 1]: | + logic operation[all 2]: and, or + */ + + // 0-7: = != < <= > >= + // 0-2: bool number string node-set + static constexpr ObXpathArgType compare_cast[CMP_ARG_TYPE_NUM][CMP_ARG_TYPE_NUM][XC_TYPE_NUM] = { + // left bool + { + /*right bool*/ + { + XC_TYPE_BOOLEAN, // = + XC_TYPE_BOOLEAN, // != + XC_TYPE_NUMBER, // > + XC_TYPE_NUMBER, // >= + XC_TYPE_NUMBER, // < + XC_TYPE_NUMBER // <= + }, + + /*right number*/ + { + XC_TYPE_BOOLEAN, // = + XC_TYPE_BOOLEAN, // != + XC_TYPE_NUMBER, // > + XC_TYPE_NUMBER, // >= + XC_TYPE_NUMBER, // < + XC_TYPE_NUMBER // <= + }, + + /*right string*/ + { + XC_TYPE_BOOLEAN, // = + XC_TYPE_BOOLEAN, // != + XC_TYPE_NUMBER, // > + XC_TYPE_NUMBER, // >= + XC_TYPE_NUMBER, // < + XC_TYPE_NUMBER // <= + }, + + /*right node-set*/ + { + XC_TYPE_BOOLEAN, // = + XC_TYPE_BOOLEAN, // != + XC_TYPE_BOOLEAN, // > + XC_TYPE_BOOLEAN, // >= + XC_TYPE_BOOLEAN, // < + XC_TYPE_BOOLEAN // <= + }, + }, + // left number + { + /*right bool*/ + { + XC_TYPE_BOOLEAN, // = + XC_TYPE_BOOLEAN, // != + XC_TYPE_NUMBER, // > + XC_TYPE_NUMBER, // >= + XC_TYPE_NUMBER, // < + XC_TYPE_NUMBER // <= + }, + + /*right number*/ + { + XC_TYPE_NUMBER, // = + XC_TYPE_NUMBER, // != + XC_TYPE_NUMBER, // > + XC_TYPE_NUMBER, // >= + XC_TYPE_NUMBER, // < + XC_TYPE_NUMBER // <= + }, + + /*right string*/ + { + XC_TYPE_NUMBER, // = + XC_TYPE_NUMBER, // != + XC_TYPE_NUMBER, // > + XC_TYPE_NUMBER, // >= + XC_TYPE_NUMBER, // < + XC_TYPE_NUMBER // <= + }, + + /*right node-set*/ + { + XC_TYPE_NUMBER, // = + XC_TYPE_NUMBER, // != + XC_TYPE_NUMBER, // > + XC_TYPE_NUMBER, // >= + XC_TYPE_NUMBER, // < + XC_TYPE_NUMBER // <= + }, + }, + // left string + { + /*right bool*/ + { + XC_TYPE_BOOLEAN, // = + XC_TYPE_BOOLEAN, // != + XC_TYPE_NUMBER, // > + XC_TYPE_NUMBER, // >= + XC_TYPE_NUMBER, // < + XC_TYPE_NUMBER // <= + }, + + /*right number*/ + { + XC_TYPE_STRING, // = + XC_TYPE_STRING, // != + XC_TYPE_NUMBER, // > + XC_TYPE_NUMBER, // >= + XC_TYPE_NUMBER, // < + XC_TYPE_NUMBER // <= + }, + + /*right string*/ + { + XC_TYPE_STRING, // = + XC_TYPE_STRING, // != + XC_TYPE_STRING, // > + XC_TYPE_STRING, // >= + XC_TYPE_STRING, // < + XC_TYPE_STRING // <= + }, + + /*right node-set*/ + { + XC_TYPE_STRING, // = + XC_TYPE_STRING, // != + XC_TYPE_STRING, // > + XC_TYPE_STRING, // >= + XC_TYPE_STRING, // < + XC_TYPE_STRING // <= + } + }, + // left node-set + { + /*right bool*/ + { + XC_TYPE_BOOLEAN, // = + XC_TYPE_BOOLEAN, // != + XC_TYPE_BOOLEAN, // > + XC_TYPE_BOOLEAN, // >= + XC_TYPE_BOOLEAN, // < + XC_TYPE_BOOLEAN // <= + }, + + /*right number*/ + { + XC_TYPE_NUMBER, // = + XC_TYPE_NUMBER, // != + XC_TYPE_NUMBER, // > + XC_TYPE_NUMBER, // >= + XC_TYPE_NUMBER, // < + XC_TYPE_NUMBER // <= + }, + + /*right string*/ + { + XC_TYPE_STRING, // = + XC_TYPE_STRING, // != + XC_TYPE_STRING, // > + XC_TYPE_STRING, // >= + XC_TYPE_STRING, // < + XC_TYPE_STRING // <= + }, + + /*right node-set*/ + { + XC_TYPE_STRING, // = + XC_TYPE_STRING, // != + XC_TYPE_STRING, // > + XC_TYPE_STRING, // >= + XC_TYPE_STRING, // < + XC_TYPE_STRING // <= + } + } + }; + + // don't use, this is just for obcdc + static int xml_bin_to_text( + ObIAllocator &allocator, + const ObString &bin, + ObString &text); + + static bool is_xml_doc_over_depth(uint64_t depth); + static int revert_escape_character(ObIAllocator &allocator, ObString &input_str, ObString &output_str); +}; + +class ObMulModeFactory +{ +public: + ObMulModeFactory() {} + ~ObMulModeFactory() {} + + static int get_xml_base(ObMulModeMemCtx* ctx, + const ObString &buf, + ObNodeMemType in_type, + ObNodeMemType expect_type, + ObIMulModeBase *&out, + ObMulModeNodeType parse_type = ObMulModeNodeType::M_DOCUMENT, + bool is_for_text = false, + bool should_check = false); + + static int get_xml_tree(ObMulModeMemCtx* ctx, + const ObString &str, + ObNodeMemType in_type, + ObXmlNode *&out, + ObMulModeNodeType parse_type = ObMulModeNodeType::M_DOCUMENT); + + static int get_xml_base(ObMulModeMemCtx* ctx, + const char *ptr, + uint64_t length, + ObNodeMemType in_type, + ObNodeMemType expect_type, + ObIMulModeBase *&out, + ObMulModeNodeType parse_type = ObMulModeNodeType::M_DOCUMENT, + bool is_for_text = false, + bool should_check = false); + + static int transform(ObMulModeMemCtx* ctx, ObIMulModeBase *src, + ObNodeMemType expect_type, ObIMulModeBase *&out); + static int add_unparsed_text_into_doc(ObMulModeMemCtx* ctx, + ObString text, + ObXmlDocument *&doc); +}; + +} // namespace common +} // namespace oceanbase + +#endif // OCEANBASE_SQL_OB_XML_UTIL diff --git a/deps/oblib/src/lib/xml/ob_xpath.cpp b/deps/oblib/src/lib/xml/ob_xpath.cpp new file mode 100644 index 0000000000..9f75a8aa68 --- /dev/null +++ b/deps/oblib/src/lib/xml/ob_xpath.cpp @@ -0,0 +1,3474 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + * This file contains implementation support for the XML path abstraction. + */ + +#define USING_LOG_PREFIX SQL_RESV +#include "lib/xml/ob_xpath.h" +#include "lib/xml/ob_path_parser.h" +#include "lib/xml/ob_xml_util.h" +#include "lib/string/ob_sql_string.h" +#include "lib/ob_errno.h" +#include "lib/string/ob_string.h" +#include "rpc/obmysql/ob_mysql_global.h" // DOUBLE_TO_STRING_CONVERSION_BUFFER_SIZE +#include "common/data_buffer.h" +#include +#include + +namespace oceanbase { +namespace common { +int ObPathCtx::init(ObMulModeMemCtx* ctx, ObIMulModeBase *doc_root, ObIMulModeBase *cur_doc, + ObIAllocator *tmp_alloc, bool is_auto_wrap, bool need_record, bool add_ns) +{ + INIT_SUCC(ret); + if (OB_ISNULL(ctx) || OB_ISNULL(doc_root)) { + ret = OB_BAD_NULL_ERROR; + LOG_WARN("should not be null", K(ret)); + } else { + ctx_ = ctx; + alloc_ = ctx->allocator_; + tmp_alloc_ = tmp_alloc; + doc_root_ = doc_root; + cur_doc_ = cur_doc; + is_auto_wrap_ = is_auto_wrap ? 1 : 0; + need_record_ = need_record ? 1 : 0; + add_ns_ = add_ns ? 1 : 0; + is_inited_ = 1; + defined_ns_ = 0; + if (doc_root->data_type() == OB_XML_TYPE) { + ret = bin_pool_.init(sizeof(ObXmlBin), tmp_alloc_); + if (OB_FAIL(bin_pool_.init(sizeof(ObXmlBin), tmp_alloc_))) { + LOG_WARN("fail to init binary pool", K(ret)); + } else if (OB_FAIL(init_extend())) { + LOG_WARN("fail to init extend", K(ret)); + } + } else if (doc_root->data_type() == OB_JSON_TYPE) { + // ret = bin_pool_.init(sizeof(ObJsonBin), tmp_alloc_); + ret = OB_NOT_SUPPORTED; + LOG_WARN("not supported yet", K(ret)); + } else { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("can't be path node", K(ret)); + } + } + return ret; +} + +int ObPathCtx::init_extend() +{ + INIT_SUCC(ret); + if (doc_root_->check_extend()) { + ObXmlBin* bin = static_cast(doc_root_); + ObXmlBin extend(doc_root_->get_mem_ctx()); + extend_ = &extend; + if (OB_FAIL(bin->get_extend(extend))) { + LOG_WARN("fail to get extend", K(ret)); + } else if (OB_FAIL(bin->remove_extend())) { + LOG_WARN("fail to remove extend", K(ret)); + } else if (OB_FAIL(alloc_new_bin(extend_))){ + LOG_WARN("fail init extend", K(ret)); + } + } else { + extend_ = nullptr; // without extend area + } + return ret; +} + +int ObPathCtx::reinit(ObIMulModeBase* doc, ObIAllocator *tmp_alloc) +{ + INIT_SUCC(ret); + cur_doc_ = doc; + doc_root_ = doc; + tmp_alloc_ = tmp_alloc; + ancestor_record_.reset(); + bin_pool_.reset(); + defined_ns_ = 0; + if (doc->data_type() == OB_XML_TYPE) { + if (OB_FAIL(bin_pool_.init(sizeof(ObXmlBin), tmp_alloc_))) { + LOG_WARN("fail to init binary pool", K(ret)); + } else if (OB_FAIL(init_extend())) { + LOG_WARN("fail to init extend", K(ret)); + } + } else if (doc->data_type() == OB_JSON_TYPE) { + // ret = bin_pool_.init(sizeof(ObJsonBin), tmp_alloc_); + ret = OB_NOT_SUPPORTED; + LOG_WARN("not supported yet", K(ret)); + } else { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("can't be path node", K(ret)); + } + return ret; +} + +int ObPathCtx::push_ancestor(ObIMulModeBase*& base_node) +{ + INIT_SUCC(ret); + if (OB_ISNULL(base_node)) { + ret = OB_BAD_NULL_ERROR; + LOG_WARN("should not be null", K(ret)); + } else if (base_node->is_tree()) { + if (OB_FAIL(ancestor_record_.push(base_node))) { + LOG_WARN("should be inited", K(ret)); + } + } else { + if (add_ns_ && base_node->check_if_defined_ns()) { + ++defined_ns_; + } + if (OB_FAIL(alloc_new_bin(base_node))) { + LOG_WARN("allocate xmlbin failed", K(ret)); + } else if (OB_FAIL(ancestor_record_.push(base_node))) { + LOG_WARN("should be inited", K(ret)); + } + } + return ret; +} + +int ObPathCtx::alloc_new_bin(ObIMulModeBase*& base_node) +{ + INIT_SUCC(ret); + if (base_node->is_binary()) { + // is binary + ObXmlBin* bin_node = static_cast(base_node); + ObXmlBin* new_bin = static_cast(bin_pool_.alloc()); + if (OB_ISNULL(new_bin)) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("allocate xmlbin failed", K(ret)); + } else { + new_bin = new (new_bin) ObXmlBin(*bin_node, base_node->get_mem_ctx()); + base_node = new_bin; + } + } + return ret; +} + +int ObPathCtx::alloc_new_bin(ObXmlBin*& bin, ObMulModeMemCtx* ctx) +{ + INIT_SUCC(ret); + bin = static_cast(bin_pool_.alloc()); + if (OB_ISNULL(bin)) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("allocate xmlbin failed", K(ret)); + } else { + bin = new (bin) ObXmlBin(ctx); + } + return ret; +} + +int ObPathCtx::pop_ancestor() +{ + INIT_SUCC(ret); + if (ancestor_record_.size() <= 0) { + ret = OB_OP_NOT_ALLOW; + LOG_WARN("fail to pop", K(ret)); + } else { + ObIMulModeBase* top = ancestor_record_.top(); + if (OB_ISNULL(top)) { + ret = OB_BAD_NULL_ERROR; + LOG_WARN("should not be null", K(ret)); + } else if (top->is_tree()) { + ancestor_record_.pop(); + } else { + if (add_ns_ && top->check_if_defined_ns()) { + --defined_ns_; + } + ObXmlBin* bin_top = static_cast(top); + ancestor_record_.pop(); + bin_pool_.free(bin_top); + } + } + return ret; +} + +ObIMulModeBase* ObPathCtx::top_ancestor() +{ + return ancestor_record_.top(); +} + +bool ObPathCtx::if_need_record() const +{ + return need_record_ == 1; +} + +bool ObPathCtx::is_inited() const +{ + return is_inited_ == 1; +} + +int ObPathLocationNode::init(const ObLocationType& location_type) +{ + INIT_SUCC(ret); + if (location_type > ObLocationType::PN_LOCATION_ERROR + && location_type < ObLocationType::PN_LOCATION_MAX) { + node_type_.node_subtype_ = location_type; + set_prefix_ns_info(false); + set_default_prefix_ns(false); + } else { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("fail to init location node", K(ret)); + } + return ret; +} + +int ObPathLocationNode::init(const ObLocationType& location_type, const ObSeekType& seek_type) +{ + INIT_SUCC(ret); + if (location_type > ObLocationType::PN_LOCATION_ERROR + && location_type < ObLocationType::PN_LOCATION_MAX + && seek_type > ObSeekType::ERROR_SEEK + && seek_type < ObSeekType::MAX_SEEK) { + node_type_.node_subtype_ = location_type; + seek_type_ = seek_type; + } else { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("fail to init location node", K(ret)); + } + return ret; +} + +int ObPathLocationNode::init(const ObLocationType& location_type, const ObPathNodeAxis& axis_type) +{ + INIT_SUCC(ret); + if (location_type > ObLocationType::PN_LOCATION_ERROR + && location_type < ObLocationType::PN_LOCATION_MAX + && axis_type > ObPathNodeAxis::ERROR_AXIS + && axis_type < ObPathNodeAxis::MAX_AXIS) { + node_type_.node_subtype_ = location_type; + node_axis_ = axis_type; + } else { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("fail to init location node", K(ret)); + } + return ret; +} + +int ObPathLocationNode::init(const ObLocationType& location_type, const ObSeekType& seek_type, const ObPathNodeAxis& axis_type) +{ + INIT_SUCC(ret); + if (location_type > ObLocationType::PN_LOCATION_ERROR + && location_type < ObLocationType::PN_LOCATION_MAX + && seek_type > ObSeekType::ERROR_SEEK + && seek_type < ObSeekType::MAX_SEEK + && axis_type > ObPathNodeAxis::ERROR_AXIS + && axis_type < ObPathNodeAxis::MAX_AXIS) { + node_type_.node_subtype_ = location_type; + seek_type_ = seek_type; + node_axis_ = axis_type; + } else { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("fail to init location node", K(ret)); + } + return ret; +} + +int ObPathFilterNode::init(const ObXpathFilterChar& filter_char, ObPathNode* left, ObPathNode* right, bool pred) +{ + INIT_SUCC(ret); + ObFilterType type = ObFilterType::PN_FILTER_ERROR; + if (OB_ISNULL(left) || OB_ISNULL(right)) { + ret = OB_BAD_NULL_ERROR; + LOG_WARN("should not be null", K(ret)); + } else if (OB_FAIL(ObPathUtil::char_to_filter_type(filter_char, type))) { + LOG_WARN("fail to get filter type", K(ret)); + } else { + node_type_.set_filter_type(type); + if (ObPathParserUtil::is_boolean_ans(type)) { + is_boolean_ = true; + } + in_predication_ = pred; + contain_relative_path_ = (ObPathUtil::check_contain_relative_path(left) || ObPathUtil::check_contain_relative_path(right)); + need_cache_ = (ObPathUtil::check_need_cache(left) || ObPathUtil::check_need_cache(right)); + if (OB_FAIL(this->append(left))) { + LOG_WARN("fail to append arg", K(ret)); + } else if (OB_FAIL(this->append(right))) { + LOG_WARN("fail to append arg", K(ret)); + } else if (!pred && ObPathParserUtil::is_illegal_comp_for_filter(type, left, right)) { + ret = OB_OP_NOT_ALLOW; + LOG_WARN("Given XPATH expression not supported", K(ret)); + } + } + return ret; +} + +int ObPathFilterOpNode::append_filter(ObPathNode* filter) +{ + INIT_SUCC(ret); + if (!contain_relative_path_ && filter->contain_relative_path_) { + contain_relative_path_ = true; + } + if (!need_cache_ && filter->need_cache_) { + need_cache_ = true; + } + if (OB_FAIL(append(filter))) { + LOG_WARN("fail to append filter", K(ret)); + } + return ret; +} + +int ObPathFilterNode::init(ObFilterType type) +{ + INIT_SUCC(ret); + if (type > ObFilterType::PN_FILTER_ERROR && type < ObFilterType::PN_FILTER_MAX) { + node_type_.set_filter_type(type); + } else { + ret = OB_ERR_WRONG_VALUE_FOR_VAR; + LOG_WARN("fail to init func", K(ret)); + } + return ret; +} + +int ObPathFuncNode::init(ObFuncType& func_type) +{ + INIT_SUCC(ret); + if (func_type > ObFuncType::PN_FUNC_ERROR && func_type < ObFuncType::PN_FUNC_MAX) { + node_type_.set_func_type(func_type); + min_arg_num_ = func_arg_num[func_type - ObFuncType::PN_ABS][0]; + max_arg_num_ = func_arg_num[func_type - ObFuncType::PN_ABS][1]; + } else { + ret = OB_ERR_WRONG_VALUE_FOR_VAR; + LOG_WARN("fail to init func", K(ret)); + } + return ret; +} + +int ObPathArgNode::init(char* str, uint64_t len, bool pred) +{ + INIT_SUCC(ret); + node_type_.set_arg_type(ObArgType::PN_STRING); + arg_.str_.name_ = str; + arg_.str_.len_ = len; + in_predication_ = pred; + return ret; +} + +int ObPathArgNode::init(double num, bool pred) +{ + INIT_SUCC(ret); + node_type_.node_subtype_ = ObArgType::PN_DOUBLE; + arg_.double_ = num; + in_predication_ = pred; + return ret; +} + +int ObPathArgNode::init(bool boolean, bool pred) +{ + INIT_SUCC(ret); + node_type_.node_subtype_ = ObArgType::PN_BOOLEAN; + arg_.boolean_ = boolean; + in_predication_ = pred; + return ret; +} + +int ObPathArgNode::init(ObPathNode* node, bool pred) +{ + INIT_SUCC(ret); + if (OB_ISNULL(node)) { + ret = OB_BAD_NULL_ERROR; + LOG_WARN("should not be null", K(ret)); + } else { + node_type_.node_subtype_ = ObArgType::PN_SUBPATH; + arg_.subpath_ = node; + in_predication_ = pred; + } + return ret; +} + +void ObPathLocationNode::set_nodetest_by_name(ObSeekType seek_type, const char* name, uint64_t len) +{ + seek_type_ = seek_type; + if (seek_type_ == ObSeekType::PROCESSING_INSTRUCTION) { + node_content_.key_.len_ = len; + node_content_.key_.name_ = name; + len > 0 ? set_wildcard_info(false) : set_wildcard_info(true); + } else { + set_wildcard_info(true); + } + check_namespace_ = false; +} + +// when there is not nodetest, set seek type by axis +void ObPathLocationNode::set_nodetest_by_axis() +{ + if (node_axis_ != ObPathNodeAxis::NAMESPACE && node_axis_ != ObPathNodeAxis::ATTRIBUTE) { + seek_type_ = ObSeekType::ELEMENT; + } +} + +int ObPathLocationNode::set_check_ns_by_nodetest(ObIAllocator *allocator, ObString& default_ns) +{ + INIT_SUCC(ret); + if (check_namespace_) { // with prefix ns + // make sure is element or attribute + if (seek_type_ != ObSeekType::ELEMENT + && node_axis_ != ObPathNodeAxis::ATTRIBUTE + && node_axis_ != ObPathNodeAxis::NAMESPACE) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("must be element or attribute when there is prefix ns.", K(ret), K(seek_type_)); + } + } else { + // without prefix ns + // if has wildcard , don't need check ns + // else use default ns when is element, use null ns when is attribute + if (!node_content_.has_wildcard_) { + check_namespace_ = true; + // for attribute, if there is no prefix ns, use null as default ns + if (node_axis_ == ObPathNodeAxis::ATTRIBUTE) { + set_ns_info(nullptr, 0); + } else if (node_axis_ == ObPathNodeAxis::NAMESPACE) { + // for namespace, mustn't have prefix ns(checked) + // if there is tag name after namespace axis, find first prefix ns, else find first default ns + set_ns_info(nullptr, 0); + } else if (seek_type_ == ObSeekType::ELEMENT) { + // for element, if there is not prefix ns + // if default ns is null, use null ns, else use default ns anyway + ObString ns_str; + if (default_ns.length() != 0) { + if (OB_FAIL(ob_write_string(*allocator, default_ns, ns_str))) { + LOG_WARN("fail to wirte string", K(ret), K(default_ns)); + } + } + if (OB_SUCC(ret)) set_ns_info(ns_str.ptr(), ns_str.length()); + } + } else { + check_namespace_ = false; + } + } // do not have prefix ns + return ret; +} + +int ObPathRootNode::node_to_string(ObStringBuffer& str) +{ + INIT_SUCC(ret); + if (node_type_.is_root()) { + if (node_type_.is_xml_path() && size() == 0) { + if (OB_FAIL(str.append("/"))) { + LOG_WARN("root fail to string", K(ret), K(str)); + } + } /*else if (node_type_.is_json_path()) { + if (OB_FAIL(str.append("$"))) { + LOG_WARN("root fail to string", K(ret), K(str)); + }*/ + if (OB_SUCC(ret) && size() > 0) { + for (int64_t i = 0; i < size() && OB_SUCC(ret); ++i) { + ObPathNode* temp_node = static_cast(member(i)); + if (OB_ISNULL(temp_node)) { + ret = OB_BAD_NULL_ERROR; + LOG_WARN("should not be null", K(ret), K(str)); + } else if (OB_FAIL(temp_node->node_to_string(str))) { + LOG_WARN("location node fail to string", K(ret), K(str)); + } + } // end for + } // end child is null + } else { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("must be root node", K(ret), K(str)); + } + return ret; +} + +bool ObPathRootNode::is_abs_subpath() +{ + bool is_absolute = false; + ObPathNode* first_node = static_cast(member(0)); + if (OB_NOT_NULL(first_node) && (first_node->node_type_.is_location())) { + ObPathLocationNode* location = static_cast(first_node); + is_absolute = location->is_absolute_; + } + return is_absolute; +} + +int ObPathLocationNode::node_to_string(ObStringBuffer& str) +{ + INIT_SUCC(ret); + if (node_type_.is_xml_path()) { + if (is_absolute_ && OB_FAIL(str.append("/"))) { + LOG_WARN("fail to append slash", K(ret)); + } else if (node_type_.get_location_type() == ObLocationType::PN_ELLIPSIS && OB_FAIL(str.append("/"))) { + } else if (OB_FAIL(axis_to_string(str))) { + LOG_WARN("fail to append axis", K(ret)); + } else if (OB_FAIL(nodetest_to_string(str))) { + LOG_WARN("fail to append nodetest", K(ret)); + } else if (has_filter_ && size() > 0) { + for (int64_t i = 0; i < size() && OB_SUCC(ret); ++i) { + ObPathNode* temp_node = static_cast(member(i)); + if (OB_ISNULL(temp_node)) { + ret = OB_BAD_NULL_ERROR; + LOG_WARN("should not be null", K(ret), K(str)); + } else if (OB_FAIL(str.append("["))) { + LOG_WARN("fail to append [", K(ret)); + } else if (OB_FAIL(temp_node->node_to_string(str))) { + LOG_WARN("location node fail to string", K(ret), K(str)); + } else if (OB_FAIL(str.append("]"))) { + LOG_WARN("fail to append ]", K(ret)); + } + } + } // if without filter, do nothing + } // if not xml node + return ret; +} + +int ObPathLocationNode::axis_to_string(ObStringBuffer& str) +{ + INIT_SUCC(ret); + if (node_axis_ != ObPathNodeAxis::CHILD) { + if (OB_FAIL(str.append(axis_str_map[node_axis_ - ObPathNodeAxis::SELF]))) { + LOG_WARN("fail to append axis", K(axis_str_map[node_axis_ - ObPathNodeAxis::SELF]), K(ret)); + } + } + return ret; +} +int ObPathLocationNode::nodetest_to_string(ObStringBuffer& str) +{ + INIT_SUCC(ret); + if (check_namespace_ && OB_NOT_NULL(node_content_.namespace_.name_) && node_content_.namespace_.len_ > 0) { + if (OB_FAIL(str.append(node_content_.namespace_.name_, node_content_.namespace_.len_))) { + LOG_WARN("fail to append axis", K(ret)); + } else if (OB_FAIL(str.append(":"))) { + LOG_WARN("fail to append :", K(ret)); + } + } + if (OB_FAIL(ret)) { + } else { + switch (seek_type_) { + case ObSeekType::NODES: + case ObSeekType::TEXT: + case ObSeekType::COMMENT: { + if (OB_FAIL(str.append(nodetest_str_map[seek_type_ - ObSeekType::NODES]))) { + LOG_WARN("fail to append axis", K(nodetest_str_map[seek_type_ - ObSeekType::NODES]), K(ret)); + } + break; + } + case ObSeekType::PROCESSING_INSTRUCTION: { + if (OB_FAIL(str.append(ObPathItem::PROCESSING_INSTRUCTION))) { + LOG_WARN("fail to append processing_instruction", K(ret)); + } else if (node_content_.key_.len_ > 0 && OB_FAIL(str.append("\""))) { + LOG_WARN("fail to append \"", K(ret)); + } else if (OB_FAIL(str.append(node_content_.key_.name_, node_content_.key_.len_))) { + LOG_WARN("fail to append processing_instruction name", K(node_content_.key_.name_), K(ret)); + } else if (node_content_.key_.len_ > 0 &&OB_FAIL(str.append("\""))) { + LOG_WARN("fail to append \"", K(ret)); + } else if (OB_FAIL(str.append(")"))) { + LOG_WARN("fail to append )", K(ret)); + } + break; + } + default: { + if (node_content_.has_wildcard_) { + if (OB_FAIL(str.append("*"))) { + LOG_WARN("fail to append *", K(ret)); + } + } else if (OB_FAIL(str.append(node_content_.key_.name_, node_content_.key_.len_))) { + LOG_WARN("fail to append key name", K(node_content_.key_.name_), K(ret)); + } + break; + } + } + } + return ret; +} + +int ObPathFilterNode::filter_arg_to_string(ObStringBuffer& str, bool is_left) +{ + INIT_SUCC(ret); + int index = (is_left)? 0 : 1; + ObPathNode* temp_node = static_cast(member(index)); + if (OB_ISNULL(temp_node)) { + ret = OB_BAD_NULL_ERROR; + LOG_WARN("should not be null", K(ret), K(str)); + } else if (OB_FAIL(temp_node->node_to_string(str))) { + LOG_WARN("location node fail to string", K(ret), K(str)); + } + return ret; +} + +int ObPathFilterNode::filter_type_to_string(ObStringBuffer& str) +{ + INIT_SUCC(ret); + if (node_type_.is_xml_path()) { + ObFilterType xml_filter = node_type_.get_filter_type(); + if (xml_filter >= ObFilterType::PN_CMP_UNION && xml_filter <= ObFilterType::PN_CMP_MOD) { + if (OB_FAIL(str.append(filter_type_str_map[xml_filter - ObFilterType::PN_NOT_COND]))) { + LOG_WARN("fail to append axis", + K(filter_type_str_map[xml_filter - ObFilterType::PN_NOT_COND]), K(ret)); + } + } else { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("Wrong filter type", K(ret), K(xml_filter)); + } + } else { + ret = OB_NOT_SUPPORTED; + LOG_WARN("not supported yet", K(ret)); + } + return ret; +} + +int ObPathFilterNode::node_to_string(ObStringBuffer& str) +{ + INIT_SUCC(ret); + if (node_type_.is_xml_path()) { + if (size() != 2) { + ret = OB_INVALID_ARGUMENT_NUM; + LOG_WARN("wrong arg num", K(size()), K(ret)); + } else if (OB_FAIL(filter_arg_to_string(str, true))) { + LOG_WARN("left arg fail to str", K(size()), K(ret)); + } else if (OB_FAIL(filter_type_to_string(str))) { + LOG_WARN("filter type fail to str", K(size()), K(ret)); + } else if (OB_FAIL(filter_arg_to_string(str, false))) { + LOG_WARN("rigth arg fail to str", K(size()), K(ret)); + } + } else { + ret = OB_NOT_SUPPORTED; + LOG_WARN("not supported yet", K(ret)); + } + return ret; +} + +int ObPathFilterOpNode::filter_op_arg_to_str(bool is_left, ObStringBuffer& str) +{ + INIT_SUCC(ret); + ObPathNode* node = is_left ? left_ : right_; + if (node->get_node_type().is_root()) { + ObPathRootNode* root = static_cast(node); + if (!root->is_abs_path_ && root->size() > 0) { + ObPathNode* first = static_cast(root->member(0)); + if (first->get_node_type().is_location() + && first->get_node_type().get_location_type() != PN_ELLIPSIS + && OB_FAIL(str.append("/"))) { + LOG_WARN("fail to append /", K(ret)); + } + } + } + return ret; +} + +int ObPathFilterOpNode::node_to_string(ObStringBuffer& str) +{ + INIT_SUCC(ret); + if (node_type_.is_xml_path()) { + if (OB_ISNULL(left_)) { + } else if (OB_FAIL(filter_op_arg_to_str(true, str))) { + } else if (OB_FAIL(left_->node_to_string(str))) { + LOG_WARN("left arg fail to str", K(size()), K(ret)); + } else { + for (int64_t i = 0; i < size() && OB_SUCC(ret); ++i) { + ObPathNode* temp_node = static_cast(member(i)); + if (OB_ISNULL(temp_node)) { + ret = OB_BAD_NULL_ERROR; + LOG_WARN("should not be null", K(ret), K(str)); + } else if (OB_FAIL(str.append("["))) { + LOG_WARN("fail to append [", K(ret)); + } else if (OB_FAIL(temp_node->node_to_string(str))) { + LOG_WARN("location node fail to string", K(ret), K(str)); + } else if (OB_FAIL(str.append("]"))) { + LOG_WARN("fail to append ]", K(ret)); + } + } + } + if (OB_FAIL(ret)) { + } else if (OB_ISNULL(right_)) { + } else if (OB_FAIL(filter_op_arg_to_str(false, str))) { + } else if (OB_FAIL(right_->node_to_string(str))) { + LOG_WARN("right arg fail to str", K(size()), K(ret)); + } // right to string + } else { + ret = OB_NOT_SUPPORTED; + LOG_WARN("not supported yet", K(ret)); + } + return ret; +} + +int ObPathFuncNode::node_to_string(ObStringBuffer& str) +{ + INIT_SUCC(ret); + if (node_type_.is_xml_path()) { + ObFuncType xml_func = node_type_.get_func_type(); + if (OB_FAIL(str.append(func_str_map[xml_func - ObFuncType::PN_ABS]))) { + LOG_WARN("fail to append function", + K(func_str_map[xml_func - ObFuncType::PN_ABS]), K(ret)); + } else if (OB_FAIL(func_arg_to_string(str))) { + LOG_WARN("arg fail to append )", K(ret)); + } + } else { + ret = OB_NOT_SUPPORTED; + LOG_WARN("not supported yet", K(ret)); + } + return ret; +} + +int ObPathFuncNode::func_arg_to_string(ObStringBuffer& str) +{ + INIT_SUCC(ret); + if (size() > 0) { + for (int64_t i = 0; i < size() && OB_SUCC(ret); ++i) { + ObPathNode* temp_node = static_cast(member(i)); + if (OB_ISNULL(temp_node)) { + ret = OB_BAD_NULL_ERROR; + LOG_WARN("should not be null", K(ret), K(str)); + } else if (OB_FAIL(temp_node->node_to_string(str))) { + LOG_WARN("location node fail to string", K(ret), K(str)); + } else if ( i + 1 < size() && OB_FAIL(str.append(", "))) { + LOG_WARN("fail to append ','", K(ret)); + } + } // end for + }// end child is null + + if (OB_FAIL(ret)) { + } else if (OB_FAIL(str.append(")"))) { + LOG_WARN("fail to append ')'", K(ret)); + } + return ret; +} + +int ObPathArgNode::node_to_string(ObStringBuffer& str) +{ + INIT_SUCC(ret); + if (node_type_.is_xml_path()) { + ObArgType xml_arg = node_type_.get_arg_type(); + switch (xml_arg) { + case ObArgType::PN_STRING: { + if (OB_FAIL(str.append("\""))) { + LOG_WARN("fail to append quote", K(ret)); + } else if (OB_FAIL(str.append(arg_.str_.name_, arg_.str_.len_))) { + LOG_WARN("fail to append literal", K(ret)); + } else if (OB_FAIL(str.append("\""))) { + LOG_WARN("fail to append quote", K(ret)); + } + break; + } + case ObArgType::PN_DOUBLE: { + char buf[DOUBLE_TO_STRING_CONVERSION_BUFFER_SIZE] = {0}; + uint64_t length = ob_gcvt(arg_.double_, ob_gcvt_arg_type::OB_GCVT_ARG_DOUBLE, + sizeof(buf) - 1, buf, NULL); + if (length== 0) { + ret = OB_SIZE_OVERFLOW; + LOG_WARN("fail to convert double to string", K(ret)); + } else { + ObString num_str(sizeof(buf), static_cast(length), buf); + if (OB_FAIL(str.append(num_str))) { + LOG_WARN("fail to set j_buf len", K(ret), K(str.length()), K(num_str)); + } + } + break; + } + case ObArgType::PN_SUBPATH: { + if (OB_ISNULL(arg_.subpath_)) { + ret = OB_BAD_NULL_ERROR; + LOG_WARN("should not be null", K(ret), K(str)); + } else if (OB_FAIL(arg_.subpath_->node_to_string(str))) { + LOG_WARN("fail to append processing_instruction name", K(ret)); + } + break; + } + case ObArgType::PN_BOOLEAN: { + if (arg_.boolean_ == true) { + if (OB_FAIL(str.append("true"))) { + LOG_WARN("fail to append true", K(ret)); + } + } else if (OB_FAIL(str.append("false"))) { + LOG_WARN("fail to append false", K(ret)); + } + break; + } + default: { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("Wrong arg type", K(ret), K(xml_arg)); + } + } + } else { + ret = OB_NOT_SUPPORTED; + LOG_WARN("not supported yet", K(ret)); + } + return ret; +} + +int ObPathUtil::alloc_seek_result(ObIAllocator *allocator, ObIMulModeBase* base, ObSeekResult*& res) +{ + INIT_SUCC(ret); + if (OB_ISNULL(allocator)) { + ret = OB_BAD_NULL_ERROR; + LOG_WARN("should not be null", K(ret)); + } else { + res = static_cast (allocator->alloc(sizeof(ObSeekResult))); + if (OB_ISNULL(res)) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("allocate row buffer failed at seek result", K(ret)); + } else { + res = new (res) ObSeekResult(false); + res->result_.base_ = base; + } + } + return ret; +} + +int ObPathUtil::alloc_seek_result(ObIAllocator *allocator, ObPathArgNode* arg, ObSeekResult*& res) +{ + INIT_SUCC(ret); + if (OB_ISNULL(allocator)) { + ret = OB_BAD_NULL_ERROR; + LOG_WARN("should not be null", K(ret)); + } else { + res = static_cast (allocator->alloc(sizeof(ObSeekResult))); + if (OB_ISNULL(res)) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("allocate row buffer failed at seek result", K(ret)); + } else { + res = new (res) ObSeekResult(true); + res->result_.scalar_ = arg; + } + } + return ret; +} + +int ObPathUtil::trans_scalar_to_base(ObIAllocator *allocator, ObPathArgNode* arg, ObIMulModeBase*& base) +{ + INIT_SUCC(ret); + if (OB_ISNULL(allocator) || OB_ISNULL(arg)) { + ret = OB_BAD_NULL_ERROR; + LOG_WARN("should not be null", K(ret)); + } else { + ObXmlText* res = static_cast (allocator->alloc(sizeof(ObXmlText))); + if (OB_ISNULL(res)) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("allocate row buffer failed at seek result", K(ret)); + } else { + res = new (res) ObXmlText(ObMulModeNodeType::M_TEXT); + if (arg->node_type_.get_arg_type() == ObArgType::PN_STRING) { + res->set_text(ObString(arg->arg_.str_.len_, arg->arg_.str_.name_)); + } else { + ObStringBuffer buf(allocator); + if (OB_FAIL(arg->node_to_string(buf))) { + LOG_WARN("fail to string", K(ret)); + } else { + res->set_text(ObString(buf.length(), buf.ptr())); + base = res; + } + } + } // alloc success + } + return ret; +} + +int ObPathFuncNode::check_is_all_location_without_filter(ObPathNode* arg_root) +{ + INIT_SUCC(ret); + if (OB_ISNULL(arg_root) || !arg_root->node_type_.is_root() || arg_root->size() == 0) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("should be root", K(ret)); + } else { + for (int i = 0; i < arg_root->size() && OB_SUCC(ret); ++i) { + ObPathNode* node = static_cast(arg_root->member(i)); + if (OB_ISNULL(node)) { + ret = OB_BAD_NULL_ERROR; + LOG_WARN("should not be null", K(ret)); + } else if (node->node_type_.is_location()) { + ObPathLocationNode* location = static_cast(node); + if (location->has_filter_) { + ret = OB_OP_NOT_ALLOW; + LOG_WARN("Given XPATH expression not supported", K(ret)); + } // check if without filter + } else if (node->node_type_.is_func()) { + ret = OB_ERR_PARSER_SYNTAX; // ORA-31011: XML parsing failed + LOG_WARN("Function call with invalid number of arguments", K(ret), K(node->node_type_.node_class_)); + } + } // end for + } + return ret; +} + +int ObPathFuncNode::check_is_legal_count_arg() +{ + INIT_SUCC(ret); + // after size check, size must be 1 + ObPathNode* func_arg = static_cast(member(0)); + if (OB_ISNULL(func_arg) || func_arg->node_type_.is_location()) { + ret = OB_ERR_PARSER_SYNTAX; // ORA-31011: XML parsing failed + LOG_WARN("Function call with invalid arguments", K(ret), K(func_arg->node_type_.node_class_)); + } + + return ret; +} + +int ObPathFuncNode::checek_cache_and_abs() +{ + INIT_SUCC(ret); + switch(get_node_type().get_func_type()) { + case ObFuncType::PN_FALSE: + case ObFuncType::PN_TRUE: { + need_cache_ = false; + contain_relative_path_ = false; + break; + } + case ObFuncType::PN_COUNT: + case ObFuncType::PN_NOT_FUNC: + case ObFuncType::PN_BOOLEAN_FUNC: { + if (size() != 1) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("Function call with invalid arguments", K(ret), K(size())); + } else { + ObPathNode* path = static_cast(member(0)); + need_cache_ = path->need_cache_; + contain_relative_path_ = path->contain_relative_path_; + } + break; + } + case ObFuncType::PN_ABS: + case ObFuncType::PN_BOOL_ONLY: + case ObFuncType::PN_CEILING: + case ObFuncType::PN_DATE_FUNC: + case ObFuncType::PN_DOUBLE_FUNC: + case ObFuncType::PN_FLOOR: + case ObFuncType::PN_LENGTH: + case ObFuncType::PN_LOWER: + case ObFuncType::PN_NUMBER_FUNC: + case ObFuncType::PN_NUM_ONLY: + case ObFuncType::PN_SIZE: + case ObFuncType::PN_STRING_FUNC: + case ObFuncType::PN_STR_ONLY: + case ObFuncType::PN_TIMESTAMP: + case ObFuncType::PN_TYPE: + case ObFuncType::PN_UPPER: + case ObFuncType::PN_CONCAT: + case ObFuncType::PN_CONTAINS: + case ObFuncType::PN_LOCAL_NAME: + case ObFuncType::PN_LANG: + case ObFuncType::PN_SUM: + case ObFuncType::PN_NAME: + case ObFuncType::PN_NS_URI: + case ObFuncType::PN_NORMALIZE_SPACE: + case ObFuncType::PN_SUBSTRING_FUNC: + case ObFuncType::PN_POSITION: + case ObFuncType::PN_LAST: + case ObFuncType::PN_ROUND: { + need_cache_ = true; + contain_relative_path_ = true; + break; + } + default: { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("axis not supported yet", K(ret)); + break; + } + } + return ret; +} + +int ObPathFuncNode::check_is_legal_arg() +{ + INIT_SUCC(ret); + if (min_arg_num_ > size() || max_arg_num_ < size()) { // check_arg_num + ret = OB_ERR_PARSER_SYNTAX; // ORA-31011: XML parsing failed + LOG_WARN("Function call with invalid number of arguments", K(ret), K(min_arg_num_), K(max_arg_num_)); + } else { // check arg type + switch (node_type_.get_func_type()) { + case ObFuncType::PN_COUNT: { + if (OB_FAIL(check_is_legal_count_arg())) { + LOG_WARN("Function call with invalid arguments for count", K(ret)); + } + break; + } + default: { + break; + } + } // end of check arg type + } + return ret; +} + +int ObPathNode::node_to_string(ObStringBuffer& str) +{ + return OB_NOT_SUPPORTED; +} + +int ObPathNode::eval_node(ObPathCtx &ctx, ObSeekResult& res) +{ + return OB_NOT_SUPPORTED; +} + +int ObPathRootNode::init_adapt(ObPathCtx &ctx, ObIMulModeBase*& ans) +{ + INIT_SUCC(ret); + ans = (is_abs_path_ = is_abs_subpath()) ? ctx.doc_root_ : ctx.cur_doc_; + if (adapt_.size() == 0) { // alloc and init for the first time + for (int i = 0; i < size() && OB_SUCC(ret); ++i) { + ObPathLocationNode* loc = static_cast(member(i)); + ObSeekIterator* ada = nullptr; + if (OB_ISNULL(ans) || OB_ISNULL(loc)) { + ret = OB_BAD_NULL_ERROR; + LOG_WARN("should not be null", K(ans), K(loc)); + } else if (OB_FAIL(ObPathUtil::get_seek_iterator(ctx.alloc_, loc, ada))) { + LOG_WARN("fail to alloc ada", K(ret)); + } else if (OB_FAIL(ada->init(ctx, loc, ans))) { + LOG_WARN("fail to init ada", K(ret)); + } else if (OB_FAIL(adapt_.push_back(ada))) { + LOG_WARN("fail to push ada", K(ret)); + } else { + ++iter_pos_; + } + if (OB_FAIL(ret)) { + } else if (OB_FAIL(next_adapt(ctx, ans))) { + LOG_WARN("fail to get next", K(ret)); + } else if (OB_ISNULL(ans)) { + ret = OB_ITER_END; + } + } // end for + } else { + for (int i = 0; i < size() && iter_pos_ < adapt_.size() && OB_SUCC(ret); ++i) { + ObSeekIterator* ada = nullptr; + if (i < adapt_.size()) { + ada = adapt_[i]; + ada->reset(ans); + } else { + ObPathLocationNode* loc = static_cast(member(i)); + ObSeekIterator* ada = nullptr; + if (OB_ISNULL(ans) || OB_ISNULL(loc)) { + ret = OB_BAD_NULL_ERROR; + LOG_WARN("should not be null", K(ans), K(loc)); + } else if (OB_FAIL(ObPathUtil::get_seek_iterator(ctx.alloc_, loc, ada))) { + LOG_WARN("fail to alloc ada", K(ret)); + } else if (OB_FAIL(ada->init(ctx, loc, ans))) { + LOG_WARN("fail to init ada", K(ret)); + } else if (OB_FAIL(adapt_.push_back(ada))) { + LOG_WARN("fail to push ada", K(ret)); + } + } + if (OB_SUCC(ret)) { + ++iter_pos_; + } + if (OB_FAIL(ret)) { + } else if (OB_FAIL(next_adapt(ctx, ans))) { + LOG_WARN("fail to get next", K(ret)); + } else if (OB_ISNULL(ans)) { + ret = OB_ITER_END; + } + } // end for + } + + is_seeked_ = true; + return ret; +} + +int ObPathRootNode::next_adapt(ObPathCtx &ctx, ObIMulModeBase*& ans) +{ + INIT_SUCC(ret); + if (iter_pos_ >= adapt_.size()) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("wrong pos", K(ret)); + } else if (iter_pos_ < 0) { + for (int i = 0; i < adapt_.size() && OB_NOT_NULL(adapt_[i]); ++i) { + adapt_[i]->reset(); + } + is_seeked_ = false; + iter_pos_ = -1; + ret = OB_ITER_END; + } else { + ObIMulModeBase* tmp_ans = nullptr; + ObSeekIterator* top = adapt_[iter_pos_]; + if (OB_ISNULL(top)) { + ret = OB_BAD_NULL_ERROR; + LOG_WARN("should not be null", K(ret)); + } else if (OB_SUCC(top->next(ctx, tmp_ans)) && OB_NOT_NULL(tmp_ans)) { + ans = tmp_ans; + } else { + --iter_pos_; + ObIMulModeBase* new_root = nullptr; + if (OB_SUCC(next_adapt(ctx, new_root)) && OB_NOT_NULL(new_root)) { + top->reset(new_root); + ++iter_pos_; + if (OB_SUCC(next_adapt(ctx, tmp_ans)) && OB_NOT_NULL(tmp_ans)) { + ans = tmp_ans; + } else { + ret = OB_ITER_END; + } + } else { // fail to update root, search end + ret = OB_ITER_END; + } + } + } + return ret; +} + +int ObPathRootNode::eval_node(ObPathCtx &ctx, ObSeekResult& res) +{ + INIT_SUCC(ret); + ObIMulModeBase* ans = nullptr; + if (!ctx.is_inited()) { + ret = OB_INIT_FAIL; + LOG_WARN("should be inited", K(ret)); + } else if (!node_type_.is_root()) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("must be root node", K(ret)); + } else if (size() == 0) { // last path node + if (!is_seeked_) { + ans = is_abs_path_ ? ctx.doc_root_ : ctx.cur_doc_; + is_seeked_ = true; + } else { + ret = OB_ITER_END; + is_seeked_ = false; + } + } else if (!is_seeked_ || iter_pos_ == -1) { + if (OB_FAIL(init_adapt(ctx, ans))) { + LOG_WARN("fail to init", K(ret)); + } + } else if (OB_FAIL(next_adapt(ctx, ans))) { + if (ret != OB_ITER_END) { + LOG_WARN("fail to get next", K(ret)); + } + } + if (OB_FAIL(ret)) { + } else if (OB_ISNULL(ans)) { + ret = OB_ITER_END; + } else { + res.is_scalar_ = false; + res.result_.base_ = ans; + } + return ret; +} + + int ObPathLocationNode::set_seek_info(ObPathSeekInfo& seek_info) + { + INIT_SUCC(ret); + ObLocationType sub_type = ObLocationType(node_type_.node_subtype_); + bool wildcard = get_wildcard_info(); + if (sub_type == ObLocationType::PN_KEY) { + if (node_axis_ == ATTRIBUTE || node_axis_ == NAMESPACE) { + seek_info.type_ = SimpleSeekType::ATTR_KEY; + if (node_content_.key_.len_ > 0) { + seek_info.key_ = get_key_name(); + } else { + seek_info.key_ = ObString(0, nullptr); + } + } else if (wildcard || node_axis_ == DESCENDANT_OR_SELF || node_axis_ == DESCENDANT) { + seek_info.type_ = SimpleSeekType::ALL_KEY_TYPE; + } else { + seek_info.type_ = SimpleSeekType::KEY_TYPE; + seek_info.key_ = ObString(node_content_.key_.len_, node_content_.key_.name_); + } + } else if (sub_type == ObLocationType::PN_ELLIPSIS) { + seek_info.type_ = SimpleSeekType::ALL_KEY_TYPE; + } else if (sub_type == ObLocationType::PN_ARRAY) { + if (wildcard) { + seek_info.type_ = SimpleSeekType::ALL_ARR_TYPE; + } else { + seek_info.type_ = SimpleSeekType::INDEX_TYPE; + // seek_info.index_ = node_content_.index; + } + } else { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("must be sub location type", K(ret)); + } + return ret; +} + +int ObPathLocationNode::eval_node(ObPathCtx &ctx, ObSeekResult& res) +{ + return OB_NOT_SUPPORTED; // should finish by SeekIterator +} + +int ObPathFuncNode::eval_node(ObPathCtx &ctx, ObSeekResult& res) +{ + INIT_SUCC(ret); + switch(node_type_.get_func_type()) { + case ObFuncType::PN_FALSE: + case ObFuncType::PN_TRUE: { + if (OB_FAIL(eval_true_or_false(ctx, node_type_.get_func_type() == ObFuncType::PN_TRUE, res))) { + LOG_WARN("fail to eval position", K(ret)); + } + break; + } + case ObFuncType::PN_COUNT: { + if (OB_FAIL(eval_count(ctx, res))) { + LOG_WARN("fail to eval position", K(ret)); + } + break; + } + case ObFuncType::PN_NOT_FUNC: + case ObFuncType::PN_BOOLEAN_FUNC: + case ObFuncType::PN_ABS: + case ObFuncType::PN_BOOL_ONLY: + case ObFuncType::PN_CEILING: + case ObFuncType::PN_DATE_FUNC: + case ObFuncType::PN_DOUBLE_FUNC: + case ObFuncType::PN_FLOOR: + case ObFuncType::PN_LENGTH: + case ObFuncType::PN_LOWER: + case ObFuncType::PN_NUMBER_FUNC: + case ObFuncType::PN_NUM_ONLY: + case ObFuncType::PN_SIZE: + case ObFuncType::PN_STRING_FUNC: + case ObFuncType::PN_STR_ONLY: + case ObFuncType::PN_TIMESTAMP: + case ObFuncType::PN_TYPE: + case ObFuncType::PN_UPPER: + case ObFuncType::PN_CONCAT: + case ObFuncType::PN_CONTAINS: + case ObFuncType::PN_LOCAL_NAME: + case ObFuncType::PN_LANG: + case ObFuncType::PN_SUM: + case ObFuncType::PN_NAME: + case ObFuncType::PN_NS_URI: + case ObFuncType::PN_NORMALIZE_SPACE: + case ObFuncType::PN_SUBSTRING_FUNC: + case ObFuncType::PN_POSITION: + case ObFuncType::PN_LAST: + case ObFuncType::PN_ROUND: { + ret = OB_NOT_SUPPORTED; + LOG_WARN("axis not supported yet", K(ret)); + break; + } + default: { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("axis not supported yet", K(ret)); + break; + } + } + return ret; +} + +bool ObPathUtil::is_filter_nodetest(const ObSeekType& seek_type) +{ + bool ret_bool = false; + switch (seek_type) + { + case ObSeekType::COMMENT: + case ObSeekType::PROCESSING_INSTRUCTION:{ + ret_bool = true; + break; + } + default: + break; + } + return ret_bool; +} + +bool ObPathUtil::is_upper_axis(const ObPathNodeAxis& axis) +{ + bool ret_bool = false; + switch (axis) + { + case ObPathNodeAxis::PARENT: + case ObPathNodeAxis::ANCESTOR: + case ObPathNodeAxis::ANCESTOR_OR_SELF: { + ret_bool = true; + break; + } + default: + break; + } + return ret_bool; +} + +bool ObPathUtil::is_down_axis(const ObPathNodeAxis& axis) +{ + bool ret_bool = false; + switch (axis) + { + case ObPathNodeAxis::CHILD: + case ObPathNodeAxis::DESCENDANT: + case ObPathNodeAxis::DESCENDANT_OR_SELF: { + ret_bool = true; + break; + } + default: + break; + } + return ret_bool; +} + +bool ObPathUtil::include_self_axis(const ObPathNodeAxis& axis) +{ + bool ret_bool = false; + switch (axis) + { + case ObPathNodeAxis::SELF: + case ObPathNodeAxis::ANCESTOR_OR_SELF: + case ObPathNodeAxis::DESCENDANT_OR_SELF: { + ret_bool = true; + break; + } + default: + break; + } + return ret_bool; +} + +bool ObPathUtil::check_contain_relative_path(ObPathNode* path) +{ + bool ret_bool = true; + if (path->get_node_type().is_arg()) { + ret_bool = false; + } else { + ret_bool = path->contain_relative_path_; + } + return ret_bool; +} + +bool ObPathUtil::check_need_cache(ObPathNode* path) +{ + return path->need_cache_; +} + +int ObPathUtil::add_dup_if_missing(ObIAllocator* allocator, ObIMulModeBase*& path_res, ObIBaseSortedVector &dup, bool& end_seek) +{ + INIT_SUCC(ret); + ObIMulModeBaseCmp cmp; + ObIMulModeBaseUnique unique; + ObIBaseSortedVector::iterator pos = dup.end(); + if (OB_ISNULL(path_res)) { + } else if (path_res->is_tree()) { + if ((OB_SUCC(dup.insert_unique(path_res, pos, cmp, unique)))) { + end_seek = true; + } else if (ret == OB_CONFLICT_VALUE) { + ret = OB_SUCCESS; // confilict means found duplicated nodes, it is not an error. + } + } else if (OB_FAIL(dup.find(path_res, pos, cmp))) { + if (ret == OB_ENTRY_NOT_EXIST) { + ObXmlBin* old_bin = static_cast(path_res); + ObXmlBin* new_ans = nullptr; + if (OB_FAIL(ObPathUtil::alloc_binary(allocator, new_ans))) { + LOG_WARN("fail to alloc", K(ret)); + } else { + new_ans = new (new_ans) ObXmlBin(*old_bin); + pos = dup.end(); + if (OB_FAIL(dup.insert(new_ans, pos, cmp))) { + } else if (OB_NOT_NULL(new_ans)) { + path_res = new_ans; + end_seek = true; + } + } // alloc binary + } // duplicate bin, do nothing + } + return ret; +} + +int ObPathUtil::add_scalar(ObIAllocator *allocator, ObPathArgNode* ans, ObSeekVector &res) +{ + INIT_SUCC(ret); + if (OB_ISNULL(ans)) { + ret = OB_BAD_NULL_ERROR; + LOG_WARN("should not be null", K(ret)); + } else { + ObSeekResult* final_ans = nullptr; + if (OB_FAIL(ObPathUtil::alloc_seek_result(allocator, ans, final_ans))) { + LOG_WARN("fail to get final ans", K(ret), K(res.size())); + } else if (OB_FAIL(res.push_back(final_ans))) { + LOG_WARN("fail to push_back value into result", K(ret), K(res.size())); + } + } + return ret; +} + +int ObPathUtil::alloc_num_arg(ObMulModeMemCtx *ctx, ObPathArgNode*& arg, ObParserType parser_type, double num) +{ + INIT_SUCC(ret); + if (OB_ISNULL(ctx) || OB_ISNULL(ctx->allocator_)) { + ret = OB_BAD_NULL_ERROR; + LOG_WARN("should not be null", K(ret)); + } else { + ObPathArgNode* num_arg = static_cast (ctx->allocator_->alloc(sizeof(ObPathArgNode))); + if (OB_ISNULL(num_arg)) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("allocate row buffer failed at location_node", K(ret)); + } else { + num_arg = new (num_arg) ObPathArgNode(ctx, parser_type); + if (OB_FAIL(num_arg->init(num, false))) { + LOG_WARN("fail to init", K(ret)); + } else { + arg = num_arg; + } + } + } + return ret; +} + +int ObPathUtil::alloc_boolean_arg(ObMulModeMemCtx *ctx, ObPathArgNode*& arg, ObParserType parser_type, bool ans) +{ + INIT_SUCC(ret); + if (OB_ISNULL(ctx) || OB_ISNULL(ctx->allocator_)) { + ret = OB_BAD_NULL_ERROR; + LOG_WARN("should not be null", K(ret)); + } else { + ObPathArgNode* boolean_arg = static_cast (ctx->allocator_->alloc(sizeof(ObPathArgNode))); + if (OB_ISNULL(boolean_arg)) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("allocate row buffer failed at location_node", K(ret)); + } else { + boolean_arg = new (boolean_arg) ObPathArgNode(ctx, parser_type); + if (OB_FAIL(boolean_arg->init(ans, false))) { + LOG_WARN("fail to init", K(ret)); + } else { + arg = boolean_arg; + } + } + } + return ret; +} + +int ObPathFuncNode::eval_count(ObPathCtx &ctx, ObSeekResult& res) +{ + INIT_SUCC(ret); + if (is_seeked_ == true) { + ret = OB_ITER_END; + } else if (OB_ISNULL(ans_) || contain_relative_path_) { + is_seeked_ = true; + if (size() != 1) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("error arg", K(ret)); + } else { + ObPathNode* arg = static_cast(member(0)); + if (OB_ISNULL(arg)) { + ret = OB_BAD_NULL_ERROR; + LOG_WARN("value is NULL", K(ret)); + } else { + int count = 0; + ObSeekResult tmp_res; + while (OB_SUCC(arg->eval_node(ctx, res))) { + ++count; + } + if (ret == OB_ITER_END) { + ret = OB_SUCCESS; + ObPathArgNode* ans = nullptr; + if (OB_FAIL(ObPathUtil::alloc_num_arg(ctx.ctx_, ans, node_type_.get_path_type(), count))) { + LOG_WARN("fail to alloc arg", K(ret)); + } else { + res.is_scalar_ = true; + res.result_.scalar_ = ans; + ans_ = ans; + } + } + } + } + } else { + res.is_scalar_ = true; + res.result_.scalar_ = ans_; + is_seeked_ = true; + } + return ret; +} + +int ObPathFuncNode::eval_true_or_false(ObPathCtx &ctx, bool is_true, ObSeekResult& res) +{ + INIT_SUCC(ret); + if (is_seeked_) { + is_seeked_ = false; + ret = OB_ITER_END; + } else if (OB_ISNULL(ans_)) { + is_seeked_ = true; + ObPathArgNode* ans = nullptr; + if (OB_FAIL(ObPathUtil::alloc_boolean_arg(ctx.ctx_, ans, node_type_.get_path_type(), is_true))) { + LOG_WARN("fail to alloc arg", K(ret)); + } else { + res.is_scalar_ = true; + res.result_.scalar_ = ans; + ans_ = ans; + } + } else { + res.is_scalar_ = true; + res.result_.scalar_ = ans_; + is_seeked_ = true; + } + return ret; +} + +int ObSeekIterator::init(ObPathCtx &ctx, ObPathLocationNode* location, ObIMulModeBase* ada_root) +{ + INIT_SUCC(ret); + if (OB_ISNULL(location) || OB_ISNULL(ada_root)) { + ret = OB_BAD_NULL_ERROR; + LOG_WARN("value is NULL", K(ret)); + } else { + ada_root_ = ada_root; + ObXmlPathFilter* filter = nullptr; + if (OB_FAIL(ObXmlUtil::alloc_filter_node(ctx.alloc_, filter))) { + LOG_WARN("fail to alloc arg node", K(ret)); + } else if (OB_FALSE_IT(filter = new (filter) ObXmlPathFilter(location, &ctx))) { + } else if (OB_FALSE_IT(seek_info_.filter_ = filter)) { + } else if (location->get_node_type().get_location_type() == PN_ELLIPSIS) { + if (ObPathUtil::is_upper_axis(location->get_axis())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("can't be upper axis for ellipsis node", K(ret)); + } else if (ObPathUtil::include_self_axis(location->get_axis())) { + axis_ = DESCENDANT_OR_SELF; + } else { + axis_ = DESCENDANT; + } + } else { + axis_ = location->get_axis(); + } + if (OB_FAIL(ret)) { + } else if (OB_FAIL(location->set_seek_info(seek_info_))) { + LOG_WARN("fail to set seek info", K(ret)); + } + } + return ret; +} + +int ObSeekIterator::next(ObPathCtx &ctx, ObIMulModeBase*& res) +{ + INIT_SUCC(ret); + switch(axis_) { + case ObPathNodeAxis::CHILD: { + if (OB_FAIL(next_child(ctx, res)) && ret != OB_ITER_END) { + LOG_WARN("fail to find child", K(ret)); + } + break; + } + case ObPathNodeAxis::ATTRIBUTE: { + if (OB_FAIL(next_attribute(ctx, res)) && ret != OB_ITER_END) { + LOG_WARN("fail to find attribute", K(ret)); + } + break; + } + case ObPathNodeAxis::NAMESPACE: { + if (OB_FAIL(next_namespace(ctx, res)) && ret != OB_ITER_END) { + LOG_WARN("fail to find attribute", K(ret)); + } + break; + } + case ObPathNodeAxis::PARENT: { + if (OB_FAIL(next_parent(ctx, res)) && ret != OB_ITER_END) { + LOG_WARN("fail to find self", K(ret)); + } + break; + } + case ObPathNodeAxis::SELF: { + if (OB_FAIL(next_self(ctx, res)) && ret != OB_ITER_END) { + LOG_WARN("fail to find self", K(ret)); + } + break; + } + default: { + ret = OB_NOT_SUPPORTED; + } + } + return ret; +} + +int ObSeekComplexIterator::next(ObPathCtx &ctx, ObIMulModeBase*& res) +{ + INIT_SUCC(ret); + switch(axis_) { + case ObPathNodeAxis::DESCENDANT_OR_SELF: + case ObPathNodeAxis::DESCENDANT: { + if (OB_FAIL(next_descendant(ctx, DESCENDANT_OR_SELF == axis_, res)) && ret != OB_ITER_END) { + LOG_WARN("fail to find child", K(ret)); + } + break; + } + default: { + ret = OB_NOT_SUPPORTED; + } + } + return ret; +} + +int ObSeekAncestorIterator::next(ObPathCtx &ctx, ObIMulModeBase*& res) +{ + INIT_SUCC(ret); + switch(axis_) { + case ObPathNodeAxis::ANCESTOR_OR_SELF: + case ObPathNodeAxis::ANCESTOR: + { + if (OB_FAIL(next_ancestor(ctx, ANCESTOR_OR_SELF == axis_, res)) && ret != OB_ITER_END) { + LOG_WARN("fail to find child", K(ret)); + } + break; + } + default: { + ret = OB_NOT_SUPPORTED; + } + } + return ret; +} + +void ObSeekIterator::reset(ObIMulModeBase* new_ada_root) +{ + is_seeked_ = false; + ada_root_ = new_ada_root; +} + +int ObSeekIterator::next_attribute(ObPathCtx &ctx, ObIMulModeBase*& res) +{ + INIT_SUCC(ret); + ObIMulModeBase* tmp_res = nullptr; + ObXmlPathFilter* filter = static_cast (seek_info_.filter_); + ObPathLocationNode* path = (filter == nullptr) ? nullptr : filter->path_; + if (OB_NOT_NULL(path) && (path->get_default_prefix_ns_info() + || ObPathUtil::is_filter_nodetest(path->get_seek_type()))) { + ret = OB_ITER_END; + } else if (!is_seeked_) { + if (OB_FAIL(ctx.push_ancestor(ada_root_))) { + LOG_WARN("fail to push ancestor", K(ret)); + } else { + iter_.construct(ada_root_, seek_info_); + is_seeked_ = true; + } + } + while (OB_SUCC(ret) && OB_ISNULL(tmp_res)) { + ret = iter_.attr_next(tmp_res, M_ATTRIBUTE); + } + if (OB_SUCC(ret)) { + res = tmp_res; + } + return ret; +} + +int ObSeekIterator::next_namespace(ObPathCtx &ctx, ObIMulModeBase*& res) +{ + INIT_SUCC(ret); + ObIMulModeBase* tmp_res = nullptr; + ObXmlPathFilter* filter = static_cast (seek_info_.filter_); + ObPathLocationNode* path = (filter == nullptr) ? nullptr : filter->path_; + if (OB_ISNULL(ada_root_) || ada_root_->type() != ObMulModeNodeType::M_ELEMENT + // such as: namespace::comment(), return null + || (path->get_wildcard_info() && ObPathUtil::is_filter_nodetest(path->get_seek_type())) ) { + ret = OB_ITER_END; + } else if (!is_seeked_) { + if (OB_FAIL(ctx.push_ancestor(ada_root_))) { + LOG_WARN("fail to push ancestor", K(ret)); + } else { + iter_.construct(ada_root_, seek_info_); + is_seeked_ = true; + } + } + while (OB_SUCC(ret) && OB_ISNULL(tmp_res)) { + ret = iter_.attr_next(tmp_res, M_NAMESPACE); + } + if (OB_SUCC(ret)) { + res = tmp_res; + } + return ret; +} + +int ObSeekIterator::next_child(ObPathCtx &ctx, ObIMulModeBase*& res) +{ + INIT_SUCC(ret); + ObIMulModeBase* tmp_res = nullptr; + if (!is_seeked_) { + if (OB_FAIL(ctx.push_ancestor(ada_root_))) { + LOG_WARN("fail to push parent", K(ret)); + } else { + iter_.construct(ada_root_, seek_info_); + } + is_seeked_ = true; + } + while (OB_SUCC(ret) && OB_ISNULL(tmp_res)) { + ret = iter_.next(tmp_res); + } + if (OB_FAIL(ret)) { + if (ret == OB_ITER_END) { + res = nullptr; + if (OB_SUCC(ctx.pop_ancestor())) { + ret = OB_ITER_END; + } + } + } else { + res = tmp_res; + } + return ret; +} + +int ObSeekIterator::next_parent(ObPathCtx &ctx, ObIMulModeBase*& res) +{ + INIT_SUCC(ret); + if (is_seeked_) { + // parent node must only one, if already seeked, then ITER_END + // push cur node into ancestor_record + if (OB_FAIL(ctx.push_ancestor(ada_root_))) { + LOG_WARN("fail to push parent", K(ret)); + } else { + res = nullptr; + ret = OB_ITER_END; + } + } else if (ctx.ancestor_record_.size() > 0) { // check if have parent + ObIMulModeBase* top = ctx.top_ancestor(); + ada_root_ = top; + bool filtered = false; + is_seeked_ = true; + if (OB_ISNULL(top)) { + ret = OB_BAD_NULL_ERROR; + LOG_WARN("should not be null", K(ret)); + } else if (OB_FAIL(filter_ans(top, filtered))) { + LOG_WARN("fail to filter", K(ret)); + } else if (filtered) { + if (top->is_tree()) { + ret = ctx.pop_ancestor(); + } else { + // record parent + ada_root_ = top; + if (OB_FAIL(ctx.alloc_new_bin(ada_root_))) { + LOG_WARN("fail to record parent", K(ret)); + } else if (OB_FAIL(ctx.pop_ancestor())) { + LOG_WARN("fail to pop parent", K(ret)); + } + } + if (OB_FAIL(ret)) { + } else { + res = ada_root_; + } + } else { + res = nullptr; + ret = OB_ITER_END; + } + } else { + res = nullptr; + ret = OB_ITER_END; + } + return ret; +} + +// just filter cur node +int ObSeekIterator::next_self(ObPathCtx &ctx, ObIMulModeBase*& res) +{ + INIT_SUCC(ret); + if (is_seeked_) { + ret = OB_ITER_END; + } else { + is_seeked_ = true; + bool filtered = false; + ObXmlPathFilter* filter = static_cast (seek_info_.filter_); + ObPathLocationNode* path = (filter == nullptr) ? nullptr : filter->path_; + if (OB_ISNULL(filter) || OB_ISNULL(path)) { + ret = OB_BAD_NULL_ERROR; + LOG_WARN("should not be null", K(ret)); + } else if (path->get_wildcard_info() + && path->get_seek_type() == ObSeekType::NODES) { + // if wildcard, do not need filter + filtered = true; + } else if (OB_FAIL((*filter)(ada_root_, filtered))) { + LOG_WARN("fail to filter xnode", K(ret)); + } + if (OB_FAIL(ret)) { + } else if (filtered) { + res = ada_root_; + } else { + res = nullptr; + ret = OB_ITER_END; + } + } + return ret; +} + +int ObSeekComplexIterator::ellipsis_inner_next(ObPathCtx &ctx, ObIMulModeBase*& res) +{ + INIT_SUCC(ret); + if (iter_stack_.size() == 0) { + ret = OB_ITER_END; + } else { + ObMulModeReader& top = iter_stack_.top(); + bool filtered = false; + // check top node, if valid, return ans + // if not valid, seek its child + if (!top.is_eval_cur_) { + if (OB_FAIL(filter_ans(top.cur_, filtered))) { + LOG_WARN("fail to filter", K(ret)); + } else if (filtered) { + res = top.cur_; + } + top.is_eval_cur_ = true; + } else { + ObIMulModeBase* unfilter_res = nullptr; + // checked top node, get its child + if (OB_FAIL(ctx.push_ancestor(top.cur_))) { + LOG_WARN("fail to push", K(ret)); + } else if (OB_FAIL(top.next(unfilter_res)) || OB_ISNULL(unfilter_res)) { + if (ret == OB_ITER_END) { + res = nullptr; + iter_stack_.pop(); + ret = ctx.pop_ancestor(); + } + } else if (get_mul_mode_tc(unfilter_res->type()) == MulModeContainer) { + // check is leaf node, if node leaf node, push + ObMulModeReader child_iter(nullptr); + child_iter.construct(unfilter_res, seek_info_); + child_iter.alter_filter(nullptr); + child_iter.is_eval_cur_ = false; + iter_stack_.push(child_iter); + } else if (OB_FAIL(filter_ans(unfilter_res, filtered))) { // leaf node, do not push, but filter + LOG_WARN("fail to filter", K(ret)); + } else if (filtered) { + res = unfilter_res; + } + } + } + return ret; +} + +int ObSeekComplexIterator::next_descendant(ObPathCtx &ctx, bool include_self, ObIMulModeBase*& res) +{ + INIT_SUCC(ret); + ObIMulModeBase* tmp_res = nullptr; + if (!is_seeked_) { + ObMulModeReader iter(nullptr); + iter.construct(ada_root_, seek_info_); + iter.is_eval_cur_ = !include_self; + // first time, seek without filter + iter.alter_filter(nullptr); + iter_stack_.push(iter); + is_seeked_ = true; + } + while (OB_SUCC(ret) && OB_ISNULL(tmp_res)) { + // get valid next + if (OB_FAIL(ellipsis_inner_next(ctx, tmp_res)) && ret != OB_ITER_END) { + LOG_WARN("fail to get next", K(ret)); + } + } + if (OB_FAIL(ret)) { + } else if (OB_NOT_NULL(tmp_res)) { + res = tmp_res; + } else { + res = nullptr; + iter_stack_.reset(); + ret = OB_ITER_END; + } + return ret; +} + +int ObSeekAncestorIterator::next_ancestor(ObPathCtx &ctx, bool include_self, ObIMulModeBase*& res) +{ + INIT_SUCC(ret); + ObIMulModeBase* tmp_res = nullptr; + if (!is_seeked_) { + is_seeked_ = true; + if (include_self) { + ret = anc_stack_push(ctx, ada_root_); + } + while (OB_SUCC(ret) && ctx.ancestor_record_.size() > 0) { + // record ancestor + ObIMulModeBase* top = ctx.top_ancestor(); + if (OB_FAIL(anc_stack_push(ctx, top))) { + LOG_WARN("fail to push", K(ret)); + } else { + ctx.pop_ancestor(); + } + } // end while + } + while (OB_SUCC(ret) && OB_ISNULL(tmp_res)) { + if (OB_FAIL(ancestor_inner_next(ctx, tmp_res)) && ret != OB_ITER_END) { + LOG_WARN("fail to get next", K(ret)); + } + } + if (OB_FAIL(ret)) { + } else if (OB_NOT_NULL(tmp_res)) { + res = tmp_res; + } else { + res = nullptr; + anc_stack_.reset(); + ret = OB_ITER_END; + } + return ret; +} + +int ObSeekAncestorIterator::anc_stack_push(ObPathCtx &ctx, ObIMulModeBase* push_node) +{ + INIT_SUCC(ret); + if (OB_ISNULL(push_node)) { + ret = OB_BAD_NULL_ERROR; + LOG_WARN("should not be null", K(ret)); + } else if (push_node->is_tree()) { + if (OB_FAIL(anc_stack_.push(push_node))) { + LOG_WARN("should be inited", K(ret)); + } + } else if (OB_FAIL(ctx.alloc_new_bin(push_node))) { + LOG_WARN("allocate xmlbin failed", K(ret)); + } else if (OB_FAIL(anc_stack_.push(push_node))) { + LOG_WARN("should be inited", K(ret)); + } + return ret; +} + +void ObSeekAncestorIterator::anc_stack_pop(ObPathCtx &ctx) +{ + ObIMulModeBase* top = anc_stack_.top(); + if (OB_ISNULL(top)) { + } else if (top->is_tree()) { + anc_stack_.pop(); + } else { + ObXmlBin* bin_top = static_cast(top); + anc_stack_.pop(); + ctx.bin_pool_.free(bin_top); + } +} + +int ObSeekAncestorIterator::ancestor_inner_next(ObPathCtx &ctx, ObIMulModeBase*& res) +{ + // first, return root + // last, return parent + INIT_SUCC(ret); + if (anc_stack_.size() == 0) { + ret = OB_ITER_END; + } else { + ObIMulModeBase* top = anc_stack_.top(); + if (OB_FAIL(ctx.push_ancestor(top))) { + LOG_WARN("fail to push", K(ret)); + } else { + anc_stack_pop(ctx); + bool filtered = false; + if (OB_FAIL(filter_ans(top, filtered))) { + LOG_WARN("fail to filter", K(ret)); + } else if (filtered) { + res = top; + } + } + } + return ret; +} + +int ObSeekIterator::filter_ans(ObIMulModeBase* ans, bool& filtered) +{ + INIT_SUCC(ret); + filtered = false; + ObXmlPathFilter* seek_filter = static_cast (seek_info_.filter_); + if (OB_ISNULL(seek_filter)) { + ret = OB_BAD_NULL_ERROR; + LOG_WARN("should not be null", K(ret)); + } else if (OB_FAIL((*seek_filter)(ans, filtered))) { + LOG_WARN("fail to filter xnode", K(ret)); + } + return ret; +} + +ObDatum *ObPathVarObject::get_value(const common::ObString &key) const +{ + ObDatum *data = NULL; + ObPathVarPair pair(key, NULL); + ObPathKeyCompare cmp; + + ObPathVarArray::const_iterator low_iter = std::lower_bound(object_array_.begin(), + object_array_.end(), + pair, cmp); + if (low_iter != object_array_.end() && low_iter->get_key() == key) { + data = low_iter->get_value(); + } + + return data; +} + +int ObPathVarObject::add(const common::ObString &key, ObDatum *value, bool with_unique_key) +{ + INIT_SUCC(ret); + + if (OB_ISNULL(value)) { // check param + ret = OB_INVALID_ARGUMENT; + LOG_WARN("param value is NULL", K(ret)); + } else { + ObPathVarPair pair(key, value); + ObPathKeyCompare cmp; + ObPathVarArray::iterator low_iter = std::lower_bound(object_array_.begin(), + object_array_.end(), pair, cmp); + if (low_iter != object_array_.end() && low_iter->get_key() == key) { // Found and covered + if (with_unique_key) { + ret = OB_ERR_DUPLICATE_KEY; + LOG_WARN("Found duplicate key inserted before!", K(key), K(ret)); + } else { + low_iter->set_value(value); + } + } else if (OB_FAIL(object_array_.push_back(pair))) {// not found, push back, sort + LOG_WARN("fail to push back", K(ret)); + } else { // sort again. + ObPathKeyCompare cmp; + std::sort(object_array_.begin(), object_array_.end(), cmp); + } + } + return ret; +} + +int ObPathExprIter::init(ObMulModeMemCtx *ctx, ObString& path, ObString& default_ns, + ObIMulModeBase* doc, ObPathVarObject* pass_var, bool add_namespace) +{ + INIT_SUCC(ret); + if (OB_ISNULL(ctx) || OB_ISNULL(ctx->allocator_) || OB_ISNULL(doc) || OB_ISNULL(path.ptr())) { + ret = OB_BAD_NULL_ERROR; + LOG_WARN("should not be null", K(ret), K(ctx->allocator_), K(doc), K(path.ptr())); + } else { + ctx_ = ctx; + allocator_ = ctx->allocator_; + path_ = path; + doc_ = doc; + default_ns_ = default_ns; + pass_var_ = pass_var; + path_node_ = nullptr; + is_inited_ = true; + is_open_ = false; + add_ns_ = add_namespace; + } + return ret; +} + +int ObPathUtil::get_parser_type(ObIMulModeBase *doc, ObParserType& parser_type) +{ + INIT_SUCC(ret); + switch (doc->data_type()) { + case ObNodeDataType::OB_XML_TYPE: { + parser_type = ObParserType::PARSER_XML_PATH; + break; + } + case ObNodeDataType::OB_JSON_TYPE: { + if (lib::is_oracle_mode()) { + parser_type = ObParserType::PARSER_JSON_PATH_LAX; + } else { + parser_type = ObParserType::PARSER_JSON_PATH_STRICT; + } + } + default: { + ret = OB_INVALID_DATA; + LOG_WARN("Wrong type for seek", K(ret)); + } + } + return ret; +} + +int ObPathUtil::char_to_filter_type(const ObXpathFilterChar& ch, ObFilterType& type) +{ + INIT_SUCC(ret); + if (ch >= ObXpathFilterChar::CHAR_UNION && ch <= ObXpathFilterChar::CHAR_MOD) { + type = (ObFilterType)ch; + } else { + type = ObFilterType::PN_FILTER_ERROR; + } + return ret; +} + +int ObPathUtil::pop_char_stack(ObFilterCharPointers& char_stack) +{ + INIT_SUCC(ret); + uint64_t size = char_stack.size(); + if (size > 0) { + if (OB_FAIL(char_stack.remove(size - 1))) { + LOG_WARN("fail to remove char top.", K(ret), K(char_stack[size - 1])); + } + } else { + ret = OB_ERR_NULL_VALUE; + LOG_WARN("should not be null", K(ret)); + } + return ret; +} + +int ObPathUtil::pop_node_stack(ObPathVectorPointers& node_stack, ObPathNode*& top_node) +{ + INIT_SUCC(ret); + uint64_t size = node_stack.size(); + if (size > 0) { + top_node = node_stack[size - 1]; + if (OB_FAIL(node_stack.remove(size - 1))) { + LOG_WARN("fail to remove char top.",K(ret), K(top_node)); + } + } else { + ret = OB_ERR_NULL_VALUE; + LOG_WARN("should not be null", K(ret)); + } + return ret; +} + +int ObPathExprIter::open() +{ + INIT_SUCC(ret); + if (!is_inited_) { + ret = OB_INIT_FAIL; + LOG_WARN("should be inited", K(ret)); + } else { + ObParserType parser_type; + if (OB_FAIL(ObPathUtil::get_parser_type(doc_, parser_type))) { + LOG_WARN("fail to init parser type", K(ret)); + } else { + ObPathParser parser(ctx_, parser_type, path_, default_ns_, pass_var_); + // parse + if (OB_FAIL(parser.parse_path())) { + LOG_WARN("fail to parse", K(ret)); + } else { + path_node_ = parser.get_root(); + ret = path_ctx_.init(ctx_, doc_, doc_, tmp_allocator_, true, need_record_, add_ns_); + is_open_ = true; + } + } + } + return ret; +} + +int ObPathExprIter::get_first_node(ObPathNode*& loc) +{ + INIT_SUCC(ret); + if (OB_ISNULL(path_node_)) { + ret = OB_BAD_NULL_ERROR; + LOG_WARN("param value is NULL", K(ret)); + } else if (path_node_->get_node_type().is_root()) { + if (path_node_->size() > 0) { + loc = static_cast(path_node_->member(0)); + } + } else if (path_node_->get_node_type().is_location_filter()) { + ObPathFilterOpNode* op_node = static_cast(path_node_); + ObPathNode* left = op_node->left_; + if (left->get_node_type().is_root()) { + if (path_node_->size() > 0) { + loc = static_cast(path_node_->member(0)); + } + } + } + return ret; +} + +int ObPathExprIter::get_first_axis(ObPathNodeAxis& first_axis) +{ + INIT_SUCC(ret); + ObPathNode* first = nullptr; + if (OB_FAIL(get_first_node(first))) { + LOG_WARN("fail to get first node", K(ret)); + } else if (OB_ISNULL(first)) { + first_axis = ObPathNodeAxis::ERROR_AXIS; + } else if (first->get_node_type().is_location() + && first->get_node_type().get_location_type() == PN_ELLIPSIS) { + first_axis = ObPathNodeAxis::DESCENDANT_OR_SELF; + } else if (first->get_node_type().is_location()) { + ObPathLocationNode* loc = static_cast(first); + first_axis = loc->get_axis(); + } else { + first_axis = ObPathNodeAxis::ERROR_AXIS; + } + return ret; +} +int ObPathExprIter::get_first_seektype(ObSeekType& first_seektype) +{ + INIT_SUCC(ret); + ObPathNode* first = nullptr; + if (OB_FAIL(get_first_node(first))) { + LOG_WARN("fail to get first node", K(ret)); + } else if (OB_ISNULL(first)) { + first_seektype = ObSeekType::ERROR_SEEK; + } else if (first->get_node_type().is_location() + && first->get_node_type().get_location_type() == PN_ELLIPSIS) { + first_seektype = ObSeekType::NODES; + } else if (first->get_node_type().is_location()) { + ObPathLocationNode* loc = static_cast(first); + first_seektype = loc->get_seek_type(); + } else { + first_seektype = ObSeekType::ERROR_SEEK; + } + return ret; +} + +ObIMulModeBase* ObPathExprIter::get_cur_res_parent() +{ + return path_ctx_.ancestor_record_.size() > 0 ? path_ctx_.ancestor_record_.top() : nullptr; +} + +int ObPathExprIter::get_next_node(ObIMulModeBase*& res) +{ + INIT_SUCC(ret); + if (!is_inited_ || OB_ISNULL(path_node_)) { + ret = OB_INIT_FAIL; + LOG_WARN("should be inited", K(ret)); + } else { + ObSeekResult path_res; + bool end_seek = false; + while (OB_SUCC(ret) && !end_seek) { + if (OB_FAIL(path_node_->eval_node(path_ctx_, path_res))) { + if (ret != OB_ITER_END) { + LOG_WARN("fail to seek", K(ret)); + } + } else if (path_res.is_scalar_) { + if (OB_FAIL(ObPathUtil::trans_scalar_to_base(tmp_allocator_, path_res.result_.scalar_, res))) { + LOG_WARN("fail to trans", K(ret)); + } + end_seek = true; + } else if (OB_ISNULL(path_res.result_.base_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("res is NULL", K(ret)); + } else if (OB_FAIL(ObPathUtil::add_dup_if_missing(tmp_allocator_, path_res.result_.base_, dup_, end_seek))) { + LOG_WARN("fail to trans", K(ret)); + } else if (add_ns_ && ((path_res.result_.base_->type() == M_ELEMENT && path_ctx_.defined_ns_ > 0) + || ((path_res.result_.base_->type() == M_ELEMENT || path_res.result_.base_->type() == M_DOCUMENT + || path_res.result_.base_->type() == M_CONTENT) && OB_NOT_NULL(path_ctx_.extend_))) + && OB_FAIL(ObPathUtil::add_ns_if_need(path_ctx_, path_res.result_.base_))) { + LOG_WARN("fail to add_ns", K(ret)); + } else if (end_seek) { + res = path_res.result_.base_; + } + } // end while + } + return ret; +} + +int ObPathExprIter::close() +{ + INIT_SUCC(ret); + if (is_inited_) { + is_inited_ = false; + dup_.reset(); + path_ctx_.reset(); + } + return ret; +} + +int ObPathExprIter::reset() +{ + INIT_SUCC(ret); + if (is_inited_) { + dup_.reset(); + is_inited_ = false; + } + return ret; +} + +int ObPathExprIter::reset(ObIMulModeBase* doc, ObIAllocator *tmp_allocator) +{ + INIT_SUCC(ret); + if (OB_ISNULL(doc) || OB_ISNULL(tmp_allocator)) { + ret = OB_BAD_NULL_ERROR; + LOG_WARN("should not be null.", K(ret)); + } else { + doc_ = doc; + tmp_allocator_ = tmp_allocator; + dup_.reset(); + ret = path_ctx_.reinit(doc, tmp_allocator); + } + return ret; +} + +void ObPathExprIter::set_add_ns(bool add_ns) { + add_ns_ = add_ns; + if (path_ctx_.is_inited()) { + path_ctx_.add_ns_ = add_ns; + } +} + +int ObPathExprIter::set_tmp_alloc(ObIAllocator *tmp_allocator) +{ + INIT_SUCC(ret); + if (OB_ISNULL(tmp_allocator)) { + ret = OB_BAD_NULL_ERROR; + LOG_WARN("should not be null.", K(ret)); + } else { + tmp_allocator_ = tmp_allocator; + } + return ret; +} + +int ObXmlPathFilter::operator()(ObIMulModeBase* doc, bool& filtered) +{ + INIT_SUCC(ret); + filtered = false; + if (OB_ISNULL(doc) || OB_ISNULL(path_)) { + ret = OB_BAD_NULL_ERROR; + LOG_WARN("param value is NULL", K(ret)); + } else if (doc->data_type() == ObNodeDataType::OB_XML_TYPE) { + ObMulModeNodeType xtype = doc->type(); + ObSeekType seek_info = path_->get_seek_type(); + ObString ns_value = path_->get_ns_name(); + ObString key_value = path_->get_key_name(); + if (path_->get_prefix_ns_info() && path_->get_default_prefix_ns_info() && path_->get_axis() != NAMESPACE) { + filtered = false; + } else if (path_->get_axis() == ATTRIBUTE || path_->get_axis() == NAMESPACE) { + filtered = (path_->get_axis() == ATTRIBUTE) ? (xtype == ObMulModeNodeType::M_ATTRIBUTE) : (xtype == ObMulModeNodeType::M_NAMESPACE); + if (!filtered) { + } else if (path_->get_wildcard_info()) { + if (xtype == ObMulModeNodeType::M_ATTRIBUTE) { + filtered = true; + } else if (seek_info >= ObSeekType::TEXT) { + filtered = false; + } else { + filtered = true; + } + } else if (key_value.length() > 0 && OB_NOT_NULL(key_value.ptr())) { + int res = 0; + doc->compare(key_value, res); + filtered = (res == 0)? true : false; + if (filtered && xtype == ObMulModeNodeType::M_NAMESPACE + && !path_->get_prefix_ns_info() + && !path_->get_default_prefix_ns_info() + && key_value.compare("xmlns") != 0) { + filtered = false; + } + } + } else if (path_->get_wildcard_info()) { + switch (seek_info) { + case ObSeekType::NODES: { + filtered = true; + break; + } + case ObSeekType::TEXT: { + if (xtype == ObMulModeNodeType::M_TEXT || xtype == ObMulModeNodeType::M_CDATA) { + filtered = true; + } + break; + } + case ObSeekType::ELEMENT: { + filtered = (xtype == ObMulModeNodeType::M_ELEMENT); + break; + } + case ObSeekType::COMMENT: { + filtered = (xtype == ObMulModeNodeType::M_COMMENT); + break; + } + case ObSeekType::PROCESSING_INSTRUCTION: { + filtered = (xtype == ObMulModeNodeType::M_INSTRUCT); + break; + } + default: { + filtered = false; + break; + } + } + } else if (key_value.length() > 0 && OB_NOT_NULL(key_value.ptr())) { + int res = 0; + doc->compare(key_value, res); + filtered = (res == 0)? true : false; + if (filtered && seek_info == ObSeekType::PROCESSING_INSTRUCTION) { + filtered = (xtype == ObMulModeNodeType::M_INSTRUCT); + } + } + + if (OB_SUCC(ret) && filtered + && (seek_info == ObSeekType::ELEMENT || path_->get_axis() == ATTRIBUTE)// if element, check ns + && !(path_->get_wildcard_info() && !path_->get_prefix_ns_info())) { // if * without prefix_ns do not check + ObString node_ns_value; + if (OB_FAIL(doc->get_ns_value(path_ctx_->ancestor_record_, node_ns_value, path_ctx_->extend_))) { + LOG_WARN("failed to get ns from element node", K(ret), K(doc->is_tree())); + } else if (ns_value.length() == 0 || OB_ISNULL(ns_value.ptr())) { + filtered = (node_ns_value.length() == 0 || OB_ISNULL(node_ns_value.ptr())) ? true : false; + } else { + filtered = (ns_value.compare(node_ns_value) == 0)? true : false; + } + } + } // may be json type + return ret; +} + +int ObPathUtil::logic_compare_rule(ObPathCtx &ctx, ObPathNode *path_node, bool &ret_bool) +{ + INIT_SUCC(ret); + ObArgType node_type; + ObPathArgNode *arg_node; + if (OB_ISNULL(ctx.alloc_)) { + ret = OB_BAD_NULL_ERROR; + LOG_WARN("should not be null", K(ret)); + } else if (OB_ISNULL(path_node)) { + ret = OB_BAD_NULL_ERROR; + LOG_WARN("get path node null", K(ret)); + } else if (OB_FAIL(ObPathUtil::get_arg_type(node_type, path_node))) { + LOG_WARN("fail to get node type", K(ret)); + } else if (OB_ISNULL(arg_node = static_cast(path_node))) { + ret = OB_BAD_NULL_ERROR; + LOG_WARN("get arg node null", K(ret)); + } else if (ObArgType::PN_SUBPATH == node_type) { + ObSeekVector seek_vector; + if (OB_FAIL(get_seek_vec(ctx, path_node, seek_vector))) { + LOG_WARN("left get_seek_vec failed", K(ret), K(path_node)); + } else if (seek_vector.size() > 0) { + ret_bool = true; + } else { + ret_bool = false; + } + if (OB_SUCC(ret) && OB_FAIL(release_seek_vector(ctx,seek_vector))) { + LOG_WARN("release_seek_vec failed", K(ret), K(path_node)); + } + } else { + ObNodeTypeAndContent *content = nullptr; + if (OB_FAIL(ObPathUtil::alloc_node_content_info(ctx.alloc_, &arg_node->arg_, node_type, content))) { + LOG_WARN("alloc node content info failed", K(ret), K(node_type)); + } else if (OB_FAIL(ObXmlUtil::check_bool_rule(content, ret_bool))) { + LOG_WARN("check bool rule failed", K(ret), K(content)); + } + } + return ret; +} + +int ObPathUtil::alloc_node_set_vector(ObPathCtx &ctx, ObPathNode *path_node, ObArgType& node_type, ObNodeSetVector &node_vec) +{ + INIT_SUCC(ret); + ObNodeTypeAndContent *content = nullptr; + if (OB_ISNULL(ctx.alloc_)) { + ret = OB_BAD_NULL_ERROR; + LOG_WARN("should not be null", K(ret)); + } else if (OB_ISNULL(path_node)) { + ret = OB_BAD_NULL_ERROR; + LOG_WARN("get path node null", K(ret)); + } else if (ObArgType::PN_SUBPATH == node_type) { + ObSeekVector seek_vector; + if (OB_FAIL(get_seek_vec(ctx, path_node, seek_vector))) { + LOG_WARN("left get_seek_vec failed", K(ret), K(path_node)); + } else if (seek_vector.size() == 0) { + if (path_node->node_type_.is_root()) { + ObPathNode *member_path = nullptr; + ObPathLocationNode *location_node = nullptr; + if (path_node->size() > 0 + && OB_NOT_NULL(path_node->member((path_node->size()) - 1)) + && OB_NOT_NULL(member_path = static_cast(path_node->member((path_node->size()) - 1)))) { + if (member_path->node_type_.is_location() && OB_NOT_NULL(location_node = static_cast(member_path))) { + if (location_node->get_axis() == ObPathNodeAxis::ATTRIBUTE) { + ObString input_str(""); + if (OB_FAIL(ObPathUtil::alloc_node_content_info(ctx.alloc_, &input_str, content))) { + LOG_WARN("alloc node content info failed", K(ret)); + } else if (OB_FAIL(node_vec.push_back(content))){ + LOG_WARN("failed to push back content.", K(ret)); + } + } + } + } + } + } else if (seek_vector.size() > 0 && seek_vector[0].is_scalar_) { + ObPathArgNode* arg_node; + if (OB_ISNULL(arg_node = seek_vector[0].result_.scalar_)) { + ret = OB_BAD_NULL_ERROR; + LOG_WARN("scalar get null", K(ret)); + } else if (OB_FAIL(ObPathUtil::alloc_node_content_info(ctx.alloc_, &(arg_node->arg_), + arg_node->node_type_.get_arg_type(), + content))) { + LOG_WARN("alloc node content info failed", K(ret)); + } else if (OB_FAIL(node_vec.push_back(content))) { + LOG_WARN("failed to push back content.", K(ret)); + } + } else { + for (int i = 0; OB_SUCC(ret) && i < seek_vector.size(); i++) { + ObSeekResult* tmp_result = &seek_vector[i]; + ObArray node_array; + ObIMulModeBase *base = tmp_result->result_.base_; + ObString text_str; + if (OB_ISNULL(tmp_result)) { + LOG_WARN("seek result is null", K(ret), K(i)); + } else if (tmp_result->is_scalar_) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("compare get scalar unexpected", K(ret)); + } else if (base->size() == 0) { // leaf node + if ((base->type() == ObMulModeNodeType::M_TEXT || + base->type() == ObMulModeNodeType::M_ATTRIBUTE || + base->type() == ObMulModeNodeType::M_NAMESPACE) + && OB_FAIL(base->get_value(text_str))) { + LOG_WARN("get value failed", K(ret)); + } else if (OB_FAIL(ObPathUtil::alloc_node_content_info(ctx.alloc_, &text_str, content))) { + LOG_WARN("alloc node content info failed", K(ret), K(text_str)); + } else if (OB_FAIL(node_vec.push_back(content))) { + LOG_WARN("failed to push back content.", K(ret)); + } + } else if (OB_FAIL(ObXmlUtil::get_array_from_mode_base(base, node_array))) { // get children + LOG_WARN("get child array failed", K(ret)); + } else if (node_array.size() == 0) { + ObString text(""); + if (OB_FAIL(ObPathUtil::alloc_node_content_info(ctx.alloc_, &text, content))) { + LOG_WARN("alloc node content info", K(ret)); + } else if (OB_FAIL(node_vec.push_back(content))) { + LOG_WARN("failed to push back content.", K(ret)); + } + } else { + ObString text; + bool is_scalar = (base->type() == ObMulModeNodeType::M_TEXT || + base->type() == ObMulModeNodeType::M_ATTRIBUTE || + base->type() == ObMulModeNodeType::M_NAMESPACE); + if (is_scalar && OB_FAIL(base->get_value(text))) { + LOG_WARN("get value failed", K(ret)); + } else if (!is_scalar && OB_FAIL(ObXmlUtil::dfs_xml_text_node(ctx.ctx_, base, text))) { + LOG_WARN("dfs get text failed", K(ret)); + } else if (OB_FAIL(ObPathUtil::alloc_node_content_info(ctx.alloc_, &text, content))) { + LOG_WARN("alloc node content info failed", K(ret), K(text)); + } else if (OB_FAIL(node_vec.push_back(content))) { + LOG_WARN("failed to push back content.", K(ret)); + } + } + } + } + if (OB_SUCC(ret) && OB_FAIL(release_seek_vector(ctx,seek_vector))) { + LOG_WARN("release_seek_vec failed", K(ret), K(path_node)); + } + } else { + ObPathArgNode* arg_node = static_cast(path_node); + if (OB_ISNULL(arg_node)) { + ret = OB_BAD_NULL_ERROR; + LOG_WARN("scalar get null", K(ret)); + } else if (OB_FAIL(ObPathUtil::alloc_node_content_info(ctx.alloc_, &arg_node->arg_, node_type, content))) { + LOG_WARN("alloc node content info failed", K(ret), K(node_type)); + } else if (OB_FAIL(node_vec.push_back(content))) { + LOG_WARN("failed to push back content.", K(ret)); + } + } + return ret; +} + +int ObPathUtil::filter_compare(ObPathCtx &ctx, + ObNodeSetVector &left, ObArgType left_type, + ObNodeSetVector &right, ObArgType right_type, + ObFilterType op, ObSeekResult& res) +{ + INIT_SUCC(ret); + bool ret_bool = false; + ObXpathCompareType compare_type = ObXmlUtil::filter_type_correspondence(op); + ObXpathArgType left_arg_type = ObXmlUtil::arg_type_correspondence(left_type); + ObXpathArgType right_arg_type = ObXmlUtil::arg_type_correspondence(right_type); + ObXpathArgType target_type = ObXmlUtil::compare_cast[left_arg_type][right_arg_type][compare_type]; + + if (OB_FAIL(ret)) { + } else if ((ObFilterType::PN_CMP_UNEQUAL == op || ObFilterType::PN_CMP_EQUAL == op) + &&(left_arg_type == ObXpathArgType::XC_TYPE_BOOLEAN && + right_arg_type == ObXpathArgType::XC_TYPE_STRING)) { + // special for left=bool, right=string, use to_boolean + bool str_bool = false; + if (right.size() != 1 || left.size() != 1) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("right size unexpect", K(ret)); + } else if (OB_FAIL(ObXmlUtil::to_boolean(&right[0]->content_->str_, str_bool))) { + LOG_WARN("right to boolean failed", K(ret), K(right[0])); + } else if (OB_FAIL(ObXmlUtil::compare(left[0]->content_->boolean_, str_bool, op, ret_bool))) { + LOG_WARN("compare failed", K(ret), K(left[0]), K(str_bool)); + } + } else if ((ObFilterType::PN_CMP_UNEQUAL == op || ObFilterType::PN_CMP_EQUAL == op) && + ((left_type == ObArgType::PN_SUBPATH && left.size() == 0 && right_type == ObArgType::PN_BOOLEAN) || + (left_type == ObArgType::PN_BOOLEAN && right_type == ObArgType::PN_SUBPATH && right.size() == 0))) { + if (left.size() == 0 && OB_FAIL(ObXmlUtil::compare(false, right[0]->content_->boolean_, op, ret_bool))) { + LOG_WARN("left size = 0 and compare failed", K(ret)); + } else if (right.size() == 0 && OB_FAIL(ObXmlUtil::compare(left[0]->content_->boolean_, false, op, ret_bool))) { + LOG_WARN("right size = 0 and compare failed", K(ret)); + } + } else { + for (int32_t i = 0; OB_SUCC(ret) && !ret_bool && i < left.size(); i++) { + for (int32_t j = 0; OB_SUCC(ret) && !ret_bool && j < right.size(); j++) { + if (ObXpathArgType::XC_TYPE_BOOLEAN == target_type) { + bool left_bool = false; + bool right_bool = false; + if (OB_FAIL(ObXmlUtil::check_bool_rule(left[i], left_bool))) { + LOG_WARN("check left bool rule failed", K(ret), K(left[i])); + } else if (OB_FAIL(ObXmlUtil::check_bool_rule(right[j], right_bool))) { + LOG_WARN("check right bool rule failed", K(ret), K(right[j])); + } else if (OB_FAIL(ObXmlUtil::compare(left_bool, right_bool, op, ret_bool))) { + LOG_WARN("compare failed", K(ret), K(left_bool), K(right_bool)); + } + } else if (ObXpathArgType::XC_TYPE_NUMBER == target_type) { + double left_double = 0.0; + double right_double = 0.0; + if (OB_FAIL(ObXmlUtil::to_number(left[i], left_double)) && ret != OB_INVALID_DATA) { + LOG_WARN("check left bool rule failed", K(ret), K(left[i])); + } else if (ret == OB_INVALID_DATA) { + ret = OB_SUCCESS; + ret_bool = op == ObFilterType::PN_CMP_UNEQUAL ? true : false; + } else if (OB_FAIL(ObXmlUtil::to_number(right[j], right_double)) && ret != OB_INVALID_DATA) { + LOG_WARN("check right bool rule failed", K(ret), K(right[j])); + } else if (ret == OB_INVALID_DATA) { + ret = OB_SUCCESS; + ret_bool = op == ObFilterType::PN_CMP_UNEQUAL ? true : false; + } else if (OB_FAIL(ObXmlUtil::compare(left_double, right_double, op, ret_bool))) { + LOG_WARN("compare failed", K(ret), K(left_double), K(right_double)); + } + } else if (ObXpathArgType::XC_TYPE_STRING == target_type) { + char *str_left = NULL; + char *str_right = NULL; + double left_double = 0.0; + double right_double = 0.0; + // bugfix 49485495: if left and right are both subpath then try to cover to number + if (left_type == ObArgType::PN_SUBPATH && right_type == ObArgType::PN_SUBPATH + && OB_SUCC(ObXmlUtil::to_number(left[i], left_double)) + && OB_SUCC(ObXmlUtil::to_number(right[j], right_double))) { + if (OB_FAIL(ObXmlUtil::compare(left_double, right_double, op, ret_bool))) { + LOG_WARN("compare failed", K(ret), K(left_double), K(right_double)); + } + } else if (OB_FAIL(ObXmlUtil::to_string(*ctx.alloc_, left[i], str_left))) { + LOG_WARN("check left bool rule failed", K(ret), K(left[i])); + } else if (OB_FAIL(ObXmlUtil::to_string(*ctx.alloc_, right[j], str_right))) { + LOG_WARN("check right bool rule failed", K(ret), K(right[j])); + } else if ((op == ObFilterType::PN_CMP_EQUAL || op == ObFilterType::PN_CMP_UNEQUAL) + && OB_ISNULL(str_left) && OB_ISNULL(str_right)) { + ret_bool = op == ObFilterType::PN_CMP_EQUAL ? true : false; + } else if (OB_ISNULL(str_left) || OB_ISNULL(str_right)) { + ret_bool = false; + } else if (OB_FAIL(ObXmlUtil::compare(ObString(strlen(str_left), str_left), ObString(strlen(str_right), str_right), op, ret_bool))) { + LOG_WARN("compare failed", K(ret), K(str_left), K(str_right)); + } + } else { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("invalid target type", K(ret), K(target_type), K(left_type), K(right_type)); + } + } + } + } + if (OB_SUCC(ret)) { + ObPathArgNode* ans = nullptr; + if (OB_FAIL(ObXmlUtil::alloc_arg_node(ctx.alloc_, ans))) { + LOG_WARN("fail to alloc arg node", K(ret)); + } else if (OB_FALSE_IT(ans = new (ans) ObPathArgNode(ctx.ctx_, ObParserType::PARSER_XML_PATH))) { + } else if (OB_FAIL(ans->init(ret_bool, false))) { + LOG_WARN("fail to init arg node", K(ret)); + } else { + res.is_scalar_ = true; + res.result_.scalar_ = ans; + } + } + return ret; +} + +int ObPathUtil::filter_calculate(ObPathCtx &ctx, + ObNodeSetVector &left, ObArgType left_type, + ObNodeSetVector &right, ObArgType right_type, + ObFilterType op, ObSeekVector &res) +{ + INIT_SUCC(ret); + bool ret_bool = true; + double ret_double = 0.0; + double left_double = 0.0; + double right_double = 0.0; + if (left.size() == 0 || right.size() == 0) { + ret_bool = false; + } else if (OB_FAIL(ObXmlUtil::to_number(left[0], left_double)) && OB_OP_NOT_ALLOW != ret) { + LOG_WARN("to_number failed", K(ret), K(op)); + } else if (OB_OP_NOT_ALLOW == ret) { + ret = OB_SUCCESS; + ret_bool = false; + } else if (OB_FAIL(ObXmlUtil::to_number(right[0], right_double)) && OB_OP_NOT_ALLOW != ret) { + LOG_WARN("to_number failed", K(ret), K(op)); + } else if (OB_OP_NOT_ALLOW == ret) { + ret = OB_SUCCESS; + ret_bool = false; + } else if (OB_FAIL(ObXmlUtil::calculate(left_double, right_double, op, ret_double))) { + LOG_WARN("calculate failed", K(ret), K(left_double), K(right_double)); + } + if (OB_SUCC(ret)) { + ObPathArgNode* ans = nullptr; + if (OB_FAIL(ObXmlUtil::alloc_arg_node(ctx.alloc_, ans))) { + LOG_WARN("fail to alloc arg node", K(ret)); + } else if (OB_FALSE_IT(ans = new (ans) ObPathArgNode(ctx.ctx_, ObParserType::PARSER_XML_PATH))) { + } else if (ret_bool && OB_FAIL(ans->init(ret_double, false))) { + LOG_WARN("fail to init arg node", K(ret)); + } else if (!ret_bool && OB_FAIL(ans->init(ret_bool, false))) { + LOG_WARN("fail to init arg node", K(ret)); + } else if (OB_FAIL(ObPathUtil::add_scalar(ctx.alloc_, ans, res))) { + LOG_WARN("add scalar failed"); + } + } + return ret; +} + +int ObPathUtil::filter_logic_compare(ObPathCtx &ctx, ObPathNode* left_node, ObPathNode* right_node, ObFilterType op, ObSeekResult &res) +{ + INIT_SUCC(ret); + bool ret_bool = false; + bool left_bool = false; + bool right_bool = false; + if (OB_FAIL(ObPathUtil::logic_compare_rule(ctx, left_node, left_bool))) { + LOG_WARN("left logic compare rule failed", K(ret), K(left_node)); + } else if (op == ObFilterType::PN_AND_COND && !left_bool) { + ret_bool = false; + } else if (op == ObFilterType::PN_OR_COND && left_bool) { + ret_bool = true; + } else if (OB_FAIL(ObPathUtil::logic_compare_rule(ctx, right_node, right_bool))) { + LOG_WARN("right logic compare rule failed", K(ret), K(right_node)); + } else if (OB_FAIL(ObXmlUtil::logic_compare(left_bool, right_bool, op, ret_bool))) { + LOG_WARN("logic compare failed", K(ret), K(left_bool), K(right_bool)); + } + if (OB_SUCC(ret)) { + ObPathArgNode* ans = nullptr; + if (OB_FAIL(ObXmlUtil::alloc_arg_node(ctx.alloc_, ans))) { + LOG_WARN("fail to alloc arg node", K(ret)); + } else if (OB_FALSE_IT(ans = new (ans) ObPathArgNode(ctx.ctx_, ObParserType::PARSER_XML_PATH))) { + } else if (OB_FAIL(ans->init(ret_bool, false))) { + LOG_WARN("fail to init arg node", K(ret)); + } else { + res.is_scalar_ = true; + res.result_.scalar_ = ans; + } + } + return ret; +} + +int ObPathUtil::filter_union(ObPathCtx &ctx, ObPathNode* left_node, ObPathNode* right_node, ObFilterType op, ObSeekVector &res) +{ + INIT_SUCC(ret); + bool ret_bool = false; + ObArgType left_type = left_node->node_type_.get_arg_type(); + ObArgType right_type = right_node->node_type_.get_arg_type(); + ObPathArgNode *left_arg_node = static_cast(left_node); + ObPathArgNode *right_arg_node = static_cast(right_node); + bool left_union = true; + if (ObArgType::PN_SUBPATH == left_type) { + ObSeekVector left_vec; + if (OB_FAIL(get_seek_vec(ctx, left_node, left_vec))) { + LOG_WARN("right get_seek_vec failed", K(ret)); + } else if (left_vec.size() <= 0) { + left_union = false; + } + } + if (OB_FAIL(ret)) { + LOG_WARN("ret failed", K(ret)); + } else if (ObArgType::PN_SUBPATH == right_type) { + ObNodeTypeAndContent *left_info = nullptr; + if (ObArgType::PN_SUBPATH == left_type && left_union == true) { + ret_bool = true; + } else if (OB_FAIL(ObArgType::PN_SUBPATH != left_type && ObPathUtil::alloc_node_content_info(ctx.alloc_, &left_arg_node->arg_, left_type, left_info))) { + LOG_WARN("alloc left content info failed", K(ret)); + } else if (OB_FAIL(ObArgType::PN_SUBPATH != left_type && ObXmlUtil::check_bool_rule(left_info, left_union))) { + LOG_WARN("check bool_rule failed", K(ret)); + } else if (left_union) { + ret_bool = true; + } else { + ObSeekVector right_vec; + if (OB_FAIL(get_seek_vec(ctx, right_node, right_vec))) { + LOG_WARN("right get_seek_vec failed", K(ret)); + } else { + ret_bool = right_vec.size() > 0 ? true : false; + } + } + } else { + if (ObArgType::PN_SUBPATH == left_type) { + ret_bool = left_union; + } else { + ObNodeTypeAndContent *left_info = nullptr; + ObNodeTypeAndContent *right_info = nullptr; + if (OB_FAIL(ObPathUtil::alloc_node_content_info(ctx.alloc_, &left_arg_node->arg_, left_type, left_info))) { + LOG_WARN("alloc left content info failed", K(ret)); + } else if (OB_FAIL(ObPathUtil::alloc_node_content_info(ctx.alloc_, &right_arg_node->arg_, right_type, right_info))) { + LOG_WARN("alloc left content info failed", K(ret)); + } else if (OB_FAIL(ObXmlUtil::inner_union(left_info, right_info, ret_bool))) { + LOG_WARN("inner union failed", K(ret)); + } + } + } + + if (OB_SUCC(ret)) { + ObPathArgNode* ans = nullptr; + if (OB_FAIL(ObXmlUtil::alloc_arg_node(ctx.alloc_, ans))) { + LOG_WARN("fail to alloc arg node", K(ret)); + } else if (OB_FALSE_IT(ans = new (ans) ObPathArgNode(ctx.ctx_, ObParserType::PARSER_XML_PATH))) { + } else if (OB_FAIL(ans->init(ret_bool, false))) { + LOG_WARN("fail to init arg node", K(ret)); + } else if (OB_FAIL(ObPathUtil::add_scalar(ctx.alloc_, ans, res))) { + LOG_WARN("add scalar failed"); + } + } + return ret; +} + +int ObPathUtil::filter_single_node(ObPathCtx &ctx, ObPathNode* filter_node, ObSeekVector &res) +{ + INIT_SUCC(ret); + if (OB_ISNULL(filter_node)) { + ret= OB_BAD_NULL_ERROR; + LOG_WARN("get filter node null", K(ret)); + } else if (filter_node->node_type_.is_arg() + && (PN_DOUBLE != filter_node->node_type_.get_arg_type() + || PN_STRING != filter_node->node_type_.get_arg_type())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get filter node type unexpect", K(ret), K(filter_node->node_type_.get_arg_type())); + } else { + bool ret_bool = false; + ObPathArgNode* ans = nullptr; + ObPathArgNode *filter_arg_node; + ObNodeTypeAndContent *filter_content = nullptr; + if (OB_FAIL(ObXmlUtil::alloc_arg_node(ctx.alloc_, ans))) { + LOG_WARN("fail to alloc arg node", K(ret)); + } else if (OB_FALSE_IT(ans = new (ans) ObPathArgNode(ctx.ctx_, ObParserType::PARSER_XML_PATH))) { + } else if (OB_ISNULL(filter_arg_node = static_cast(filter_node))) { + ret = OB_BAD_NULL_ERROR; + LOG_WARN("filter arg node null", K(ret)); + } else if (OB_FAIL(ObPathUtil::alloc_node_content_info(ctx.alloc_, &filter_arg_node->arg_, + filter_node->node_type_.get_arg_type(), + filter_content))) { + LOG_WARN("alloc filter content info failed", K(ret)); + } else if (OB_FAIL(ObXmlUtil::check_bool_rule(filter_content, ret_bool))) { + LOG_WARN("check bool rule failed", K(ret), K(filter_content)); + } else if (OB_FAIL(ans->init(ret_bool, false))) { + LOG_WARN("fail to init arg node", K(ret)); + } else if (OB_FAIL(ObPathUtil::add_scalar(ctx.alloc_, ans, res))) { + LOG_WARN("add scalar failed"); + } + } + return ret; +} + +int ObPathFilterOpNode::get_filter_ans(ObFilterOpAns& ans, ObPathCtx& filter_ctx) +{ + INIT_SUCC(ret); + bool end_loop = false; + ObSeekResult filter; + + for (int i = 0; i < size() && OB_SUCC(ret) && !end_loop; ++i) { + bool filter_ans = false; + ObPathNode* filter_node = static_cast(member(i)); + if (OB_ISNULL(filter_node)) { + ret = OB_BAD_NULL_ERROR; + LOG_WARN("should not be null", K(ret)); + } else if (filter_node->node_type_.is_arg()) { + ret = OB_NOT_IMPLEMENT; + LOG_WARN("single arg not support"); + } else if (OB_FALSE_IT(filter_node->is_seeked_ = false)) { + } else if (OB_FAIL(filter_node->eval_node(filter_ctx, filter))) { + } else if (OB_FAIL(ObPathUtil::seek_res_to_boolean(filter, filter_ans))) { + LOG_WARN("seek res to boolean failed", K(ret)); + } else if (!filter_ans) { + end_loop = true; + ans = FILTERED_FALSE; + } else if (i == size() - 1) { + ans= FILTERED_TRUE; + } + } + return ret; +} + +int ObPathFilterOpNode::get_valid_res(ObPathCtx &ctx, ObSeekResult& res, bool is_left) +{ + INIT_SUCC(ret); + ObPathNode* eval_path = is_left ? left_ : right_; + bool end_seek = false; + while (OB_SUCC(ret) && !end_seek) { + if (OB_FAIL(eval_path->eval_node(ctx, res))) { + LOG_WARN("fail to seek", K(ret)); + } else if (res.is_scalar_) { + end_seek = true; + } else if (OB_NOT_NULL(res.result_.base_)) { + end_seek = true; + } + } // end while + + return ret; +} + +int ObPathFilterOpNode::init_right_without_filter(ObPathCtx &ctx, ObSeekResult& res) +{ + INIT_SUCC(ret); + bool get_valid_right = false; + while (OB_SUCC(ret) && !get_valid_right) { + ObSeekResult left_res; + if (OB_FAIL(get_valid_res(ctx, left_res, true))) { + LOG_WARN("fail to eval left"); + } else if (left_res.is_scalar_) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("shoudl not be scalar"); + } else if (OB_ISNULL(right_)) { + res = left_res; + get_valid_right = true; + } else { + ctx.cur_doc_ = left_res.result_.base_; + ObSeekResult right_res; + if (OB_FAIL(get_valid_res(ctx, right_res, false))) { + if (ret != OB_ITER_END) { + LOG_WARN("fail to eval right"); + } else { + ret = OB_SUCCESS; + get_valid_right = false; + } + } else { + res = right_res; + get_valid_right = true; + } + } // eval left success + } + return ret; +} + +int ObPathFilterOpNode::init_right_with_filter(ObPathCtx &ctx, ObSeekResult& res) +{ + INIT_SUCC(ret); + bool get_valid_right = false; + while (OB_SUCC(ret) && !get_valid_right) { + ObSeekResult left_res; + if (OB_FAIL(get_valid_res(ctx, left_res, true))) { + LOG_WARN("fail to eval left"); + } else if (left_res.is_scalar_) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("shoudl not be scalar"); + } else { + ctx.cur_doc_ = left_res.result_.base_; + ObFilterOpAns ans; + if (OB_FAIL(get_filter_ans(ans, ctx))) { + LOG_WARN("fail to get filter ans"); + } else if (ans == FILTERED_TRUE) { + if (OB_NOT_NULL(right_)) { + ObSeekResult right_res; + if (OB_FAIL(get_valid_res(ctx, right_res, false))) { + if (ret == OB_ITER_END) { + // didn't get valid ans, get next left and continue + ret = OB_SUCCESS; + get_valid_right = false; + } else { + LOG_WARN("fail to get right"); + } + } else { + res = right_res; + get_valid_right = true; + } + } else { + res = left_res; + get_valid_right = true; + } + } else if (ans == FILTERED_FALSE) { + // this filter ans if false, get another left, and try again + get_valid_right = false; + } else { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("there should have ans"); + } + } // eval left success + } + return ret; +} + +int ObPathFilterOpNode::eval_node(ObPathCtx &ctx, ObSeekResult& res) +{ + INIT_SUCC(ret); + ObSeekVector res_filtered; + if (OB_ISNULL(left_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("left can't be null", K(ret)); + } else if (!contain_relative_path_ && !need_cache_) { // could get filter ans directly + if (ans_ == ObFilterOpAns::NOT_FILTERED) { + ObFilterOpAns ans; + if (OB_FAIL(get_filter_ans(ans, ctx))) { + LOG_WARN("fail to get filter ans"); + } + ans_ = ans; + } + if (OB_FAIL(ret)) { + } else if (ans_ == FILTERED_TRUE) { + if (!is_seeked_) { // seek left and then right + ret = init_right_without_filter(ctx, res); + is_seeked_ = true; + } else if (OB_NOT_NULL(right_)) { + if (OB_FAIL(get_valid_res(ctx, res, false))) { // get_right_next + if (ret != OB_ITER_END) { + LOG_WARN("fail to eval right"); + } else { + ret = init_right_without_filter(ctx, res); + } + } + } else { // right is null, just eval left + ret = get_valid_res(ctx, res, true); + } + } else if (ans_ == ObFilterOpAns::FILTERED_FALSE) { + // filter result is false + ret = OB_ITER_END; + } else if (ans_ == ObFilterOpAns::NOT_FILTERED) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("there should have ans"); + } + } else if (!is_seeked_ || OB_ISNULL(right_)) { + ret = init_right_with_filter(ctx, res); + } else if (OB_FAIL(get_valid_res(ctx, res, false))) { // get_right_next + if (ret != OB_ITER_END) { + LOG_WARN("fail to eval right"); + } else { + ret = init_right_without_filter(ctx, res); + } + } + + if (OB_SUCC(ret)) { // do nothing + } else if (ret == OB_ITER_END) { + is_seeked_ = false; // reset + left_->is_seeked_ = false; + if (OB_NOT_NULL(right_)) { + right_->is_seeked_ = false; + } + if (contain_relative_path_ || need_cache_) { + ans_ = NOT_FILTERED; // reset ans + } + } + return ret; +} + +int ObPathFilterNode::eval_node(ObPathCtx &ctx, ObSeekResult& res) +{ + INIT_SUCC(ret); + if (is_seeked_) { + ret = OB_ITER_END; + is_seeked_ = false; + } else if (!need_cache_ && !contain_relative_path_ && filtered_) { + res = ans_; + is_seeked_ = true; + } else if (!ctx.is_inited()) { + ret = OB_INIT_FAIL; + LOG_WARN("should be inited", K(ret)); + } else if (1 == count()) { + // only one param, check bool rule for result + ret = OB_NOT_SUPPORTED; + } else if (count() != 2) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("filter node is not filter type", K(ret)); + } else { + // two params, filter for op + ObPathNode* left_node; + ObPathNode* right_node; + ObArgType left_type; + ObArgType right_type; + ObFilterType op = node_type_.get_filter_type(); + ObNodeSetVector left_node_vec; + ObNodeSetVector rigth_node_vec; + if (OB_FAIL(ObPathUtil::get_filter_node_result(ctx, member(0), left_node))) { + LOG_WARN("get left filter result failed", K(ret)); + } else if (OB_FAIL(ObPathUtil::get_filter_node_result(ctx, member(1), right_node))) { + LOG_WARN("get right filter result failed", K(ret)); + } else if (OB_ISNULL(left_node) || OB_ISNULL(right_node)) { + ret = OB_BAD_NULL_ERROR; + LOG_WARN("node is null", K(ret), K(left_node), K(right_node)); + } else if (op == ObFilterType::PN_AND_COND || op == ObFilterType::PN_OR_COND) { + if (OB_FAIL(ObPathUtil::filter_logic_compare(ctx, left_node, right_node, op, res))) { + LOG_WARN("filter_logic_compare failed", K(ret)); + } + } else if (op == ObFilterType::PN_CMP_UNION) { + if (in_predication_) { + ret = OB_NOT_IMPLEMENT; + LOG_WARN("union out_predication_ not implement", K(ret)); + // if (OB_FAIL(ObPathUtil::filter_union(ctx, left_node, right_node, op, res))) { + // LOG_WARN("filter_union failed", K(ret)); + // } + } else { + // TODO outter_union + ret = OB_NOT_IMPLEMENT; + LOG_WARN("union out_predication_ not implement", K(ret)); + } + } else if (OB_FAIL(ObPathUtil::get_arg_type(left_type, left_node))) { + LOG_WARN("fail to get left type", K(ret)); + } else if (OB_FAIL(ObPathUtil::get_arg_type(right_type, right_node))) { + LOG_WARN("fail to get right type", K(ret)); + } else if (OB_FAIL(ObPathUtil::alloc_node_set_vector(ctx, left_node, left_type, left_node_vec))) { + LOG_WARN("alloc left node set vector failed", K(ret)); + } else if (OB_FAIL(ObPathUtil::alloc_node_set_vector(ctx, right_node, right_type, rigth_node_vec))) { + LOG_WARN("alloc right node set vector failed", K(ret)); + } else { + switch (op) { + case ObFilterType::PN_CMP_UNEQUAL: + case ObFilterType::PN_CMP_EQUAL: + case ObFilterType::PN_CMP_GT: + case ObFilterType::PN_CMP_GE: + case ObFilterType::PN_CMP_LT: + case ObFilterType::PN_CMP_LE: + if (OB_FAIL(ObPathUtil::filter_compare(ctx, + left_node_vec, left_type, + rigth_node_vec, right_type, + op, res))) { + LOG_WARN("filter_compare failed", K(ret)); + } + break; + + case ObFilterType::PN_CMP_ADD: + case ObFilterType::PN_CMP_SUB: + case ObFilterType::PN_CMP_MUL: + case ObFilterType::PN_CMP_DIV: + case ObFilterType::PN_CMP_MOD: + ret = OB_NOT_IMPLEMENT; + LOG_WARN("oparete implement", K(ret)); + // calculate TODO + // if (OB_FAIL(ObPathUtil::filter_calculate(ctx, + // left_node_vec, left_type, + // rigth_node_vec, right_type, + // op, res))) { + // LOG_WARN("filter calculate failed", K(ret)); + // } + break; + + default: + ret = OB_ERR_UNEXPECTED; + LOG_WARN("filter node eval_node unexpected err", K(ret), K(op)); + } + } + is_seeked_ = true; + if (OB_SUCC(ret) && !need_cache_ && !contain_relative_path_) { + ans_ = res; + filtered_ = true; + } + } + return ret; +} + +int ObPathUtil::get_filter_node_result(ObPathCtx &ctx, ObLibTreeNodeBase* filter_node_base, ObPathNode* &res) +{ + INIT_SUCC(ret); + ObPathNode* filter_node; + if (OB_ISNULL(filter_node = static_cast(filter_node_base))) { + ret = OB_BAD_NULL_ERROR; + LOG_WARN("filter node get null", K(ret)); + } else if (!filter_node->node_type_.is_filter()) { + res = filter_node; + } else { + ObSeekResult seek_result; + filter_node->is_seeked_ = false; + if (OB_FAIL(filter_node->eval_node(ctx, seek_result))) { + LOG_WARN("filter node eval node fail", K(ret)); + } else if (!seek_result.is_scalar_) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("eval result size 0", K(ret)); + } else { + res = seek_result.result_.scalar_; + } + } + return ret; +} + +int ObPathArgNode::eval_node(ObPathCtx &ctx, ObSeekResult& res) +{ + INIT_SUCC(ret); + if (!ctx.is_inited()) { + ret = OB_INIT_FAIL; + LOG_WARN("should be inited", K(ret)); + } else if (!node_type_.is_xml_path()) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpect node_type", K(ret)); + } else { + // TODO waiting + ret = OB_NOT_IMPLEMENT; + LOG_WARN("others type not implement", K(ret)); + } + return ret; +} + +int ObPathUtil::get_seek_vec(ObPathCtx &ctx, ObPathNode *from_node, ObSeekVector &res) +{ + INIT_SUCC(ret); + ObArgType node_type = ObArgType::PN_ARG_ERROR; + if (OB_FAIL(ObPathUtil::get_arg_type(node_type, from_node))) { + LOG_WARN("fail to get node type", K(ret)); + } else if (node_type == ObArgType::PN_SUBPATH) { + ObSeekResult path_res; + bool end_seek = false; + while (OB_SUCC(ret)) { + if (OB_FAIL(from_node->eval_node(ctx, path_res))) { + if (ret != OB_ITER_END) { + LOG_WARN("fail to seek", K(ret)); + } + } else if (path_res.is_scalar_) { + if (OB_FAIL(res.push_back(path_res))) { + LOG_WARN("fail to push", K(ret)); + } + } else if (path_res.result_.base_->is_tree()) { + if (OB_FAIL(res.push_back(path_res))) { + LOG_WARN("fail to push_back value into result", K(ret), K(res.size())); + } + } else if (OB_FAIL(ctx.alloc_new_bin(path_res.result_.base_))) { + LOG_WARN("fail to alloc", K(ret)); + } else if (OB_FAIL(res.push_back(path_res))) { + LOG_WARN("fail to push_back value into result", K(ret), K(res.size())); + } + } // end while + if (ret == OB_ITER_END) { + ret = OB_SUCCESS; + } + } else { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("should be subpath", K(ret)); + } + return ret; +} + +int ObPathUtil::seek_res_to_boolean(ObSeekResult& filter, bool &res) +{ + INIT_SUCC(ret); + if (!filter.is_scalar_ ) { + ret = OB_NOT_SUPPORTED; + LOG_WARN("now, filter ans must be scalar(boolean)", K(ret)); + } else if(OB_ISNULL(filter.result_.scalar_)) { + ret = OB_BAD_NULL_ERROR; + LOG_WARN("arg is null", K(ret)); + } else { + res = filter.result_.scalar_->arg_.boolean_; + } + return ret; +} + +int ObPathUtil::get_arg_type(ObArgType& arg_type, ObPathNode *path_node) +{ + INIT_SUCC(ret); + if (OB_ISNULL(path_node)) { + ret = OB_BAD_NULL_ERROR; + LOG_WARN("should not be null", K(ret)); + } else if (path_node->get_node_type().is_arg()) { + arg_type = path_node->get_node_type().get_arg_type(); + } else { + arg_type = ObArgType::PN_SUBPATH; + } + return ret; +} + +int ObPathUtil::release_seek_vector(ObPathCtx &ctx, ObSeekVector& seek_vector) +{ + INIT_SUCC(ret); + bool need_release = true; + for (int i = 0; OB_SUCC(ret) && need_release && i < seek_vector.size(); ++i) { + ObSeekResult result = seek_vector[i]; + if (!result.is_scalar_ && OB_NOT_NULL(result.result_.base_) && result.result_.base_->is_binary()) { + ObXmlBin* bin = static_cast (result.result_.base_); + ctx.bin_pool_.free(bin); + } else { + need_release = false; + } + } + return ret; +} + +int ObPathUtil::collect_ancestor_ns(ObIMulModeBase* extend, + ObStack &ancestor_record, + ObXmlElement::NsMap &ns_map, + ObArray &ns_vec, + common::ObIAllocator* tmp_alloc) +{ + INIT_SUCC(ret); + if (OB_ISNULL(tmp_alloc)) { + } else { + int size = ancestor_record.size(); + for (int64_t pos = -1; OB_SUCC(ret) && pos < size; ++pos) { + // get parent node + ObXmlBin* current = nullptr; + int64_t attribute_num = 0; + if (pos == -1) { + if (OB_ISNULL(extend)) { // normal, means without extend + } else { + current = static_cast(extend); + attribute_num = current->attribute_size(); + } + } else { + current = static_cast(ancestor_record.at(pos)); + attribute_num = current->attribute_size(); + } + bool not_att_or_ns = false; + for (int pos = 0; OB_SUCC(ret) && pos < attribute_num && !not_att_or_ns; ++pos) { + ObXmlBin buff(*current); + ObXmlBin* tmp = &buff; + if (OB_FAIL(current->construct(tmp, nullptr))) { + LOG_WARN("failed to dup bin.", K(ret)); + } else if (OB_FAIL(tmp->set_at(pos))) { + LOG_WARN("failed to set at child.", K(ret)); + } else if (tmp->type() == M_NAMESPACE) { + // get ns info + ObXmlAttribute* ns_node = nullptr; + ObString key; + ObString value; + // init ns node + if (OB_FAIL(tmp->get_key(key))) { + LOG_WARN("failed to eval key.", K(ret)); + } else if (OB_FAIL(tmp->get_value(value))) { + LOG_WARN("failed to eval value.", K(ret)); + } else if (OB_ISNULL(ns_node = static_cast(tmp_alloc->alloc(sizeof(ObXmlAttribute))))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("failed to allocate attribute node.", K(ret)); + } else { + ns_node = new(ns_node) ObXmlAttribute(ObMulModeNodeType::M_NAMESPACE, current->ctx_); + ns_node->set_xml_key(key); + ns_node->set_value(value); + ret = ns_vec.push_back(ns_node); + } + + // if found duplicate key, overwrite + if (OB_FAIL(ret)) { + } else if (OB_NOT_NULL(ns_map.get(key)) && OB_FAIL(ns_map.erase_refactored(key))) { + LOG_WARN("fail to delete ns from map", K(ret), K(key)); + } else if (OB_FAIL(ns_map.set_refactored(key, ns_vec[ns_vec.size() - 1]))) { + LOG_WARN("fail to add ns from map", K(ret), K(key)); + } + } else if (tmp->type() == M_ATTRIBUTE) { + } else { + not_att_or_ns = true; // neither ns nor attribute, stop searching + } + } + } + } + return ret; +} + +int ObPathUtil::add_ns_if_need(ObPathCtx &ctx, ObIMulModeBase*& res) +{ + INIT_SUCC(ret); + if (!ctx.is_inited()) { + ret = OB_INIT_FAIL; + LOG_WARN("should be inited", K(ret)); + } else if (ctx.doc_root_->is_tree()) { // do not need add ns + } else if (ctx.defined_ns_ > 0 || (OB_NOT_NULL(ctx.extend_) && ctx.extend_->attribute_size() > 0)) { + ObXmlBin* bin = static_cast(res); + ObXmlElement::NsMap ns_map; + ObXmlElement::NsMap::iterator ns_map_iter; + // be used as ns node buffer pool + ObArray ns_vec; + ns_vec.set_block_size(PATH_DEFAULT_PAGE_SIZE); + int map_size = (ctx.ancestor_record_.size() > 0 ) ? 4 * ctx.ancestor_record_.size() : ctx.extend_->attribute_size(); + if (OB_FAIL(ns_map.create(map_size, "PATH_PARENT_NS"))) { + LOG_WARN("ns map create failed", K(ret)); + } else if (OB_FAIL(collect_ancestor_ns(ctx.extend_, ctx.ancestor_record_, ns_map, ns_vec, ctx.get_tmp_alloc()))) { + LOG_WARN("ns map init failed", K(ret)); + } else { + ObXmlElement element_ns(ObMulModeNodeType::M_ELEMENT, ctx.doc_root_->get_mem_ctx()); + ret = element_ns.init(); + for (ns_map_iter = ns_map.begin(); OB_SUCC(ret) && ns_map_iter != ns_map.end(); ns_map_iter++) { + if (OB_FAIL(element_ns.add_attribute(ns_map_iter->second))) { + LOG_WARN("fail to add ns", K(ret)); + } + } + // serialize element node as extend area + if (OB_SUCC(ret) && (OB_FAIL(bin->append_extend(&element_ns)))) { + LOG_WARN("fail to append extend", K(ret)); + } + } + ns_map.clear(); + ns_vec.destroy(); + } // need to add ns + return ret; +} +int ObPathUtil::alloc_path_node(ObIAllocator *allocator, ObPathNode*& node) +{ + INIT_SUCC(ret); + if (OB_ISNULL(allocator)) { + ret = OB_BAD_NULL_ERROR; + LOG_WARN("should not be null", K(ret)); + } else { + ObPathNode* path_node = + static_cast (allocator->alloc(sizeof(ObPathNode))); + if (OB_ISNULL(path_node)) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("allocate row buffer failed at path_node", K(ret)); + } else { + node = path_node; + } + } + return ret; +} + +int ObPathUtil::alloc_binary(common::ObIAllocator *allocator, ObXmlBin*& bin) +{ + INIT_SUCC(ret); + if (OB_ISNULL(allocator)) { + ret = OB_BAD_NULL_ERROR; + LOG_WARN("should not be null", K(ret)); + } else { + ObXmlBin* bin_node = static_cast (allocator->alloc(sizeof(ObXmlBin))); + if (OB_ISNULL(bin_node)) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("allocate row buffer failed at path_node", K(ret)); + } else { + bin = bin_node; + } + } + return ret; +} + +int ObPathUtil::alloc_iterator(common::ObIAllocator *allocator, ObSeekIterator*& ada) +{ + INIT_SUCC(ret); + if (OB_ISNULL(allocator)) { + ret = OB_BAD_NULL_ERROR; + LOG_WARN("should not be null", K(ret)); + } else { + ObSeekIterator* node = + static_cast (allocator->alloc(sizeof(ObSeekIterator))); + if (OB_ISNULL(node)) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("allocate row buffer failed at path_node", K(ret)); + } else { + ada = node; + } + } + return ret; +} + +int ObPathUtil::get_seek_iterator(common::ObIAllocator *allocator, ObPathLocationNode* loc, ObSeekIterator*& ada) +{ + INIT_SUCC(ret); + if (loc->is_complex_seektype()) { + ObSeekComplexIterator* tmp_ada = nullptr; + if (OB_FAIL(ObPathUtil::alloc_complex_iterator(allocator, tmp_ada))) { + LOG_WARN("fail to alloc location node", K(ret)); + } else { + tmp_ada = new (tmp_ada) ObSeekComplexIterator(allocator); + ada = tmp_ada; + } + } else if (loc->is_ancestor_axis()) { + ObSeekAncestorIterator* tmp_ada = nullptr; + if (OB_FAIL(ObPathUtil::alloc_ancestor_iterator(allocator, tmp_ada))) { + LOG_WARN("fail to alloc location node", K(ret)); + } else { + tmp_ada = new (tmp_ada) ObSeekAncestorIterator(allocator); + ada = tmp_ada; + } + } else { + ObSeekIterator* tmp_ada = nullptr; + if (OB_FAIL(ObPathUtil::alloc_iterator(allocator, tmp_ada))) { + LOG_WARN("fail to alloc location node", K(ret)); + } else { + tmp_ada = new (tmp_ada) ObSeekIterator(); + ada = tmp_ada; + } + } + return ret; +} + +int ObPathUtil::alloc_complex_iterator(common::ObIAllocator *allocator, ObSeekComplexIterator*& ada) +{ + INIT_SUCC(ret); + if (OB_ISNULL(allocator)) { + ret = OB_BAD_NULL_ERROR; + LOG_WARN("should not be null", K(ret)); + } else { + ObSeekComplexIterator* node = + static_cast (allocator->alloc(sizeof(ObSeekComplexIterator))); + if (OB_ISNULL(node)) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("allocate row buffer failed at path_node", K(ret)); + } else { + ada = node; + } + } + return ret; +} + +int ObPathUtil::alloc_ancestor_iterator(common::ObIAllocator *allocator, ObSeekAncestorIterator*& ada) +{ + INIT_SUCC(ret); + if (OB_ISNULL(allocator)) { + ret = OB_BAD_NULL_ERROR; + LOG_WARN("should not be null", K(ret)); + } else { + ObSeekAncestorIterator* node = + static_cast (allocator->alloc(sizeof(ObSeekAncestorIterator))); + if (OB_ISNULL(node)) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("allocate row buffer failed at path_node", K(ret)); + } else { + ada = node; + } + } + return ret; +} + +int ObPathUtil::alloc_node_content_info(ObIAllocator *allocator, ObString *str, ObNodeTypeAndContent *&res) +{ + INIT_SUCC(ret); + if (OB_ISNULL(allocator)) { + ret = OB_BAD_NULL_ERROR; + LOG_WARN("should not be null", K(ret)); + } else { + res = static_cast (allocator->alloc(sizeof(ObNodeTypeAndContent))); + if (OB_ISNULL(res)) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("allocate row buffer failed at seek result", K(ret)); + } else { + ObPathStr path_str; + path_str.len_ = str->length(); + path_str.name_ = str->ptr(); + ObArgNodeContent *content; + if (OB_ISNULL(content = static_cast (allocator->alloc(sizeof(ObArgNodeContent))))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("allocate row buffer failed at seek result", K(ret)); + } else if (OB_FALSE_IT(content->str_ = path_str)) { + } else { + res->type_ = ObArgType::PN_STRING; + res->content_ = content; + } + } + } + return ret; +} + +int ObPathUtil::alloc_node_content_info(ObIAllocator *allocator, ObArgNodeContent *content, ObArgType type, ObNodeTypeAndContent *&res) +{ + INIT_SUCC(ret); + if (OB_ISNULL(allocator)) { + ret = OB_BAD_NULL_ERROR; + LOG_WARN("should not be null", K(ret)); + } else { + res = static_cast (allocator->alloc(sizeof(ObNodeTypeAndContent))); + if (OB_ISNULL(res)) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("allocate row buffer failed at seek result", K(ret)); + } else { + res->type_ = type; + res->content_ = content; + } + } + return ret; +} + +} // namespace common +} // namespace oceanbase \ No newline at end of file diff --git a/deps/oblib/src/lib/xml/ob_xpath.h b/deps/oblib/src/lib/xml/ob_xpath.h new file mode 100644 index 0000000000..5fb93b44b4 --- /dev/null +++ b/deps/oblib/src/lib/xml/ob_xpath.h @@ -0,0 +1,972 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + * This file contains interface support for the XML Path abstraction. + */ + +#ifndef OCEANBASE_SQL_OB_XPATH +#define OCEANBASE_SQL_OB_XPATH + +#include "ob_tree_base.h" +#include "lib/string/ob_string.h" +#include "lib/container/ob_array.h" +#include "lib/container/ob_vector.h" +#include "src/share/datum/ob_datum.h" +#include "lib/json_type/ob_json_path.h" +#include "lib/xml/ob_mul_mode_reader.h" +#include "lib/xml/ob_multi_mode_interface.h" +#include "lib/allocator/ob_pooled_allocator.h" + +namespace oceanbase { +namespace common { + +enum ObParserType { + PARSER_ERROR = 0, + PARSER_JSON_PATH_STRICT = 1, + PARSER_JSON_PATH_LAX, + PARSER_XML_PATH, + PARSER_MAX +}; +enum ObPathNodeClass { + PN_ERROR = 0, + PN_ROOT = 1, + PN_LOCATION, // location node + PN_FILTER, // filter node + PN_FUNCTION, // function node + PN_ARG, // argument node + PN_LOCATION_FILTER, + PN_MAX +}; + +enum ObLocationType { + PN_LOCATION_ERROR = 0, + PN_KEY, // seek by keyname + PN_ARRAY, // seek by keyname + PN_ELLIPSIS, + PN_LOCATION_MAX +}; + +enum ObFilterType { + PN_FILTER_ERROR = 0, + PN_NOT_COND, // !(cond) + PN_CMP_UNION = 2, // union |, same with ObXpathFilterChar::UNION, don't change the position from union to mod + PN_OR_COND, // cond1 || cond2 + PN_AND_COND, // cond1 && cond2 + PN_CMP_EQUAL, // predicate == + PN_CMP_UNEQUAL, // != + PN_CMP_LE, // <= + PN_CMP_LT, // < + PN_CMP_GE, // >= + PN_CMP_GT, // > + PN_CMP_ADD, // + + PN_CMP_SUB, // - + PN_CMP_MUL, // * + PN_CMP_DIV, // div + PN_CMP_MOD, // mod + PN_SUBSTRING, // has substring + PN_STARTS_WITH, // starts with + PN_LIKE, // like + PN_LIKE_REGEX, // like_regex + PN_EQ_REGEX, // eq_regex + PN_NOT_EXISTS, + PN_EXISTS, // exist + PN_PURE_ARG, + PN_FILTER_MAX +}; + +static constexpr char* filter_type_str_map[ObFilterType::PN_FILTER_MAX] = { + const_cast("!("), + const_cast(" | "), + const_cast(" or "), // json is || + const_cast(" and "), // json is && + const_cast(" = "), // json is == + const_cast(" != "), + const_cast(" <= "), + const_cast(" < "), + const_cast(" >= "), + const_cast(" > "), + const_cast(" + "), + const_cast(" - "), + const_cast(" * "), + const_cast(" div "), + const_cast(" mod "), +}; + +enum ObFuncType { + PN_FUNC_ERROR = 0, + PN_ABS, // abs() + PN_BOOLEAN_FUNC, // boolean() + PN_BOOL_ONLY, // booleanOnly() + PN_CEILING, + PN_DATE_FUNC, + PN_DOUBLE_FUNC, + PN_FLOOR, + PN_LENGTH, + PN_LOWER, + PN_NUMBER_FUNC, + PN_NUM_ONLY, + PN_SIZE, + PN_STRING_FUNC, + PN_STR_ONLY, + PN_TIMESTAMP, + PN_TYPE, + PN_UPPER, + PN_POSITION, + PN_LAST, + PN_COUNT, + PN_CONCAT, + PN_CONTAINS, + PN_FALSE, + PN_TRUE, + PN_LOCAL_NAME, + PN_LANG, + PN_NOT_FUNC, + PN_SUM, + PN_NAME, + PN_NS_URI, + PN_NORMALIZE_SPACE, + PN_SUBSTRING_FUNC, + PN_ROUND, + PN_FUNC_MAX +}; + +static constexpr char* func_str_map[PN_FUNC_MAX - PN_ABS] = { + const_cast("abs("), + const_cast("boolean("), + const_cast("booleanOnly("), + const_cast("ceiling("), + const_cast("date("), + const_cast("double("), + const_cast("floor("), + const_cast("length("), + const_cast("lower("), + const_cast("number("), + const_cast("numberOnly("), + const_cast("size("), + const_cast("string("), + const_cast("stringOnly("), + const_cast("timestamp("), + const_cast("type("), + const_cast("upper("), + const_cast("position("), + const_cast("last("), + const_cast("count("), + const_cast("concat("), + const_cast("contains("), + const_cast("false("), + const_cast("true("), + const_cast("local-name("), + const_cast("lang("), + const_cast("not("), + const_cast("sum("), + const_cast("name("), + const_cast("namespace-uri("), + const_cast("normalize-space("), + const_cast("substring("), + const_cast("round("), +}; + +static constexpr int func_arg_num[PN_FUNC_MAX][2] = { + /* PN_ABS */ {0, 0}, + /* PN_BOOLEAN_FUNC */ {1, 1}, + /* PN_BOOL_ONLY */ {0, 0}, + /* PN_CEILING */ {0, 0}, + /* PN_DATE_FUNC */ {0, 0}, + /* PN_DOUBLE_FUNC */ {0, 0}, + /* PN_FLOOR */ {0, 0}, + /* PN_LENGTH */ {0, 0}, + /* PN_LOWER */ {0, 0}, + /* PN_NUMBER_FUNC */ {0, 0}, + /* PN_NUM_ONLY */ {0, 0}, + /* PN_SIZE */ {0, 0}, + /* PN_STRING_FUNC */ {0, 0}, + /* PN_STR_ONLY */ {0, 0}, + /* PN_TIMESTAMP */ {0, 0}, + /* PN_TYPE */ {0, 0}, + /* PN_UPPER */ {0, 0}, + /* PN_POSITION */ {0, 0}, + /* PN_LAST */ {0, 0}, // following func, todo + /* PN_COUNT */ {1, 1}, + /* PN_CONCAT */ {0, 0}, + /* PN_CONTAINS */ {0, 0}, + /* PN_FALSE */ {0, 0}, + /* PN_TRUE */ {0, 0}, + /* PN_LOCAL_NAME */ {0, 0}, + /* PN_LANG */ {0, 0}, + /* PN_NOT_FUN */ {1, 1}, + /* PN_SUM */ {0, 0}, + /* PN_NAME */ {0, 0}, + /* PN_NS_URI */ {0, 0}, + /* PN_NORMALIZE_SPACE*/ {0, 0}, + /* PN_SUBSTRING_FUNC */ {0, 0}, + /* PN_ROUND */ {0, 0}, + /* PN_FUNC_MAX */ {0, 0}, +}; + +static constexpr int func_name_len[PN_FUNC_MAX] = { + /* PN_ABS */ 3, + /* PN_BOOLEAN_FUNC */ 7, + /* PN_BOOL_ONLY */ 11, + /* PN_CEILING */ 7, + /* PN_DATE_FUNC */ 4, + /* PN_DOUBLE_FUNC */ 6, + /* PN_FLOOR */ 5, + /* PN_LENGTH */ 6, + /* PN_LOWER */ 5, + /* PN_NUMBER_FUNC */ 6, + /* PN_NUM_ONLY */ 10, + /* PN_SIZE */ 4, + /* PN_STRING_FUNC */ 6, + /* PN_STR_ONLY */ 10, + /* PN_TIMESTAMP */ 5, + /* PN_TYPE */ 4, + /* PN_UPPER */ 5, + /* PN_POSITION */ 8, + /* PN_LAST */ 4, // following func, todo + /* PN_COUNT */ 5, + /* PN_CONCAT */ 6, + /* PN_CONTAINS */ 8, + /* PN_FALSE */ 5, + /* PN_TRUE */ 4, + /* PN_LOCAL_NAME */ 10, + /* PN_LANG */ 4, + /* PN_NOT_FUN */ 3, + /* PN_SUM */ 3, + /* PN_NAME */ 4, + /* PN_NS_URI */ 13, + /* PN_NORMALIZE_SPACE*/ 15, + /* PN_SUBSTRING_FUNC */ 9, + /* PN_ROUND */ 5, + /* PN_FUNC_MAX */ 0, +}; + +enum ObArgType { + PN_ARG_ERROR = 0, + PN_BOOLEAN, + PN_STRING, + PN_INT, + PN_DOUBLE, + PN_SQL_VAR, + PN_SUBPATH, + PN_ARG_MAX +}; + +enum ObSeekType { + ERROR_SEEK = 0, + OBJECT = 1, // JSON OBJECT + ARRAY, + NODES, // 3 + // ObSeekType (from element to pi) is corresponding to ObMulModeNodeType, don't change the position + ELEMENT, // 4 + TEXT, + COMMENT, + PROCESSING_INSTRUCTION, + MAX_SEEK +}; + + +static constexpr char* nodetest_str_map[ObSeekType::MAX_SEEK - ObSeekType::NODES] = { + const_cast("node()"), + const_cast(""), // element + const_cast("text()"), + const_cast("comment()"), + const_cast("processing-instruction(") +}; + +enum ObPathNodeAxis { + ERROR_AXIS = 0, // wrong axis, only used in ellipsis node + SELF = 1, + PARENT, + ANCESTOR, + ANCESTOR_OR_SELF, + CHILD, + DESCENDANT, + DESCENDANT_OR_SELF, + FOLLOWING_SIBLING, + FOLLOWING, + PRECEDING_SIBLING, + PRECEDING, + ATTRIBUTE, + NAMESPACE, + MAX_AXIS +}; + +static constexpr char* axis_str_map[ObPathNodeAxis::NAMESPACE] = { + const_cast("self::"), + const_cast("parent::"), + const_cast("ancestor::"), + const_cast("ancestor-or-self::"), + const_cast("child::"), + const_cast("descendant::"), + const_cast("descendant-or-self::"), + const_cast("following-sibling::"), + const_cast("following::"), + const_cast("preceding-sibling::"), + const_cast("preceding::"), + const_cast("@"), + const_cast("namespace::") +}; + +// characters that will be pushed into char_stack when parse filter +// all characters should in priority comparision array +enum ObXpathFilterChar { + CHAR_BEGIN_FILTER = 0, /* 0 [ */ + CHAR_LEFT_BRACE = 1, /* 1 ( */ + CHAR_UNION = 2, /* 2 | */ // equal to ObFilterType::PN_CMP_UNION + CHAR_OR, /* 3 or */ + CHAR_AND, /* 4 and */ + CHAR_EQUAL, /* 5 = */ + CHAR_UNEQUAL, /* 6 != */ + CHAR_LESS_EQUAL, /* 7 <= */ + CHAR_LESS, /* 8 < */ + CHAR_GREAT_EQUAL, /* 9 >= */ + CHAR_GREAT, /* 10 > */ + CHAR_ADD, /* 11 + */ + CHAR_SUB, /* 12 - */ + CHAR_MULTI, /* 13 * */ + CHAR_DIV, /* 14 div */ + CHAR_MOD, /* 15 mod */ + CHAR_RIGHT_BRACE, /* 16 ) */ + CHAR_END_FILTER, /* 17 ] */ + CMP_CHAR_MAX +}; + +enum ObPathArgType { + NOT_SUBPATH = 0, + IN_FILTER = 1, + IN_FUNCTION = 2 +}; + +enum ObFilterOpAns { + NOT_FILTERED = 0, + FILTERED_TRUE = 1, + FILTERED_FALSE = 2 +}; + +class ObPathNode; +class ObPathArgNode; +class ObSeekResult; +class ObNodeTypeAndContent; +class ObSeekIterator; +class ObSeekComplexIterator; +class ObSeekAncestorIterator; +class ObXmlPathFilter; + +typedef struct ObPathStr +{ + const char* name_; + uint64_t len_; +} ObPathStr; + +typedef union ObArgNodeContent +{ + ObPathStr str_; + ObPathNode* subpath_; + double double_; + bool boolean_; +} ObArgNodeContent; + +class ObNodeTypeAndContent +{ +public: + ObNodeTypeAndContent() {} + ObNodeTypeAndContent(ObArgType type) : type_(type) {} + int64_t to_string(char *buf, const int64_t buf_len) const { + int64_t pos = 0; + databuff_printf(buf, buf_len, pos, "data_type = %d", type_); + return pos; + } + ObArgNodeContent *content_; + ObArgType type_; +}; +typedef common::ObArray ObNodeSetVector; + +typedef union ObPathResult +{ + ObIMulModeBase* base_; + ObPathArgNode* scalar_; +} ObPathResult; + +typedef ObJsonPathUtil ObXPathUtil; + +using ObPathVectorArena = PageArena; +using ObPathVectorPointers = ObVector; +using ObPathVecotorIterator = ObPathVectorPointers::const_iterator; +using ObFilterCharVectorArena = PageArena; +using ObFilterCharPointers = ObVector; + +typedef PageArena SeekVectorModuleArena; +typedef common::ObVector ObSeekVector; +typedef PageArena ModeBaseModuleArena; +typedef common::ObSortedVector ObIBaseSortedVector; +typedef PageArena SeekIterModuleArena; +typedef common::ObVector ObSeekIterVector; + +class ObLocationNodeContent +{ +public: + ObPathStr key_; + ObPathStr namespace_; + bool has_prefix_ns_ = false; + bool is_default_prefix_ns_ = false; + bool has_wildcard_ = false; +}; + +class ObSeekResult +{ +public: + ObSeekResult() : is_scalar_(false) {} + ObSeekResult(bool is_scalar) : is_scalar_(is_scalar) {} + ObSeekResult(const ObSeekResult& from) : result_(from.result_), is_scalar_(from.is_scalar_) {} + int64_t to_string(char *buf, const int64_t buf_len) const { + int64_t pos = 0; + databuff_printf(buf, buf_len, pos, "data_type = %d", is_scalar_); + return pos; + } + ObPathResult result_; + bool is_scalar_; +}; + +class ObPathNodeType { +public: + ObPathNodeType () : + path_type_(PARSER_ERROR), node_class_(PN_ERROR), reserved_(0) {} + ObPathNodeType (const ObParserType& path_type) : + path_type_(path_type), node_class_(PN_ERROR), reserved_(0) {} + ObPathNodeType (const ObParserType& path_type, const ObPathNodeClass& node_class) : + path_type_(path_type), node_class_(node_class), reserved_(0) {} + ObPathNodeType (const ObParserType& path_type, const ObPathNodeClass& node_class, const ObLocationType& name) : + path_type_(path_type), node_class_(node_class), node_subtype_(name), reserved_(0) {} + ObPathNodeType (const ObParserType& path_type, const ObPathNodeClass& node_class, const ObFilterType& name) : + path_type_(path_type), node_class_(node_class), node_subtype_(name), reserved_(0) {} + ObPathNodeType (const ObParserType& path_type, const ObPathNodeClass& node_class, const ObFuncType& name) : + path_type_(path_type), node_class_(node_class), node_subtype_(name), reserved_(0) {} + OB_INLINE void set_location_type (const ObLocationType& name) {node_subtype_ = name;} + OB_INLINE void set_filter_type (const ObFilterType& name) {node_subtype_ = name;} + OB_INLINE void set_func_type (const ObFuncType& name) {node_subtype_ = name;} + OB_INLINE void set_arg_type (const ObArgType& name) {node_subtype_ = name;} + + OB_INLINE bool is_strict_json_path () {return path_type_ == ObParserType::PARSER_JSON_PATH_STRICT;} + OB_INLINE bool is_lax_json_path () {return path_type_ == ObParserType::PARSER_JSON_PATH_LAX;} + OB_INLINE bool is_xml_path () {return path_type_ == ObParserType::PARSER_XML_PATH;} + OB_INLINE bool is_root () {return node_class_ == PN_ROOT;} + OB_INLINE bool is_location () {return node_class_ == PN_LOCATION;} + OB_INLINE bool is_filter () {return node_class_ == PN_FILTER;} + OB_INLINE bool is_location_filter () {return node_class_ == PN_LOCATION_FILTER;} + OB_INLINE bool is_func () {return node_class_ == PN_FUNCTION;} + OB_INLINE bool is_arg () {return node_class_ == PN_ARG;} + + OB_INLINE ObParserType get_path_type () {return (ObParserType)path_type_;} + OB_INLINE ObLocationType get_location_type () {return (ObLocationType)node_subtype_;} + OB_INLINE ObFilterType get_filter_type () {return (ObFilterType)node_subtype_;} + OB_INLINE ObFuncType get_func_type () {return (ObFuncType)node_subtype_;} + OB_INLINE ObArgType get_arg_type () {return (ObArgType)node_subtype_;} + + uint32_t path_type_ : 3; + uint32_t node_class_ : 5; + uint32_t node_subtype_ : 12; + uint32_t reserved_ : 12; +}; + +class ObPathVarPair final +{ +public: + ObPathVarPair() : key_(nullptr), value_(nullptr) {} + explicit ObPathVarPair(const ObString &key, ObDatum *value) + : key_(key), + value_(value) + { + } + ~ObPathVarPair() {} + OB_INLINE common::ObString get_key() const { return key_; } + OB_INLINE void set_xml_key(const common::ObString &new_key) + { + key_.assign_ptr(new_key.ptr(), new_key.length()); + } + OB_INLINE void set_value(ObDatum *value) { value_ = value; } + OB_INLINE ObDatum *get_value() const { return value_; } + int64_t to_string(char *buf, const int64_t buf_len) const + { + int64_t pos = 0; + databuff_printf(buf, buf_len, pos, "key = %s", key_.ptr()); + return pos; + } +private: + common::ObString key_; + ObDatum *value_; +}; + +using ObPathPairArena = PageArena; +typedef common::ObVector ObPathVarArray; +static const int64_t PATH_DEFAULT_PAGE_SIZE = (1LL << 10); // 1k + +struct ObPathKeyCompare { + int operator()(const ObPathVarPair &left, const ObPathVarPair &right) + { + INIT_SUCC(ret); + common::ObString left_key = left.get_key(); + common::ObString right_key = right.get_key(); + // first compare length + if (left_key.length() != right_key.length()) { + ret = (left_key.length() < right_key.length()); + } else { // do Lexicographic order when length equals + ret = (left_key.compare(right_key) < 0); + } + return ret; + } + + int compare(const ObString &left, const ObString &right) + { + int result = 0; + // first compare length + if (left.length() != right.length()) { + result = left.length() - right.length(); + } else { // do Lexicographic order when length equals + result = left.compare(right); + } + return result; + } +}; + +class ObPathVarObject +{ + public: + ObPathVarObject(ObIAllocator &allocator) + : page_allocator_(allocator, common::ObModIds::OB_MODULE_PAGE_ALLOCATOR), + pair_arena_(PATH_DEFAULT_PAGE_SIZE, page_allocator_), + object_array_(&pair_arena_, common::ObModIds::OB_MODULE_PAGE_ALLOCATOR) + {} + ~ObPathVarObject() {} + OB_INLINE void reset() { object_array_.clear(); } + OB_INLINE uint64_t element_count() const { return object_array_.size(); } + int add(const common::ObString &key, ObDatum *value, bool with_unique_key = false); + ObDatum *get_value(const common::ObString &key) const; + private: + ModulePageAllocator page_allocator_; + ObPathPairArena pair_arena_; + ObPathVarArray object_array_; +}; + +class ObPathCtx +{ +public: + friend class ObPathNode; + ObPathCtx (common::ObIAllocator *alloc) : alloc_(alloc), ancestor_record_(alloc), is_inited_(0) {} + ObPathCtx(ObMulModeMemCtx* ctx) : + ctx_(ctx), alloc_(ctx->allocator_), doc_root_(nullptr), cur_doc_(nullptr), cur_doc_position_(-1), + ancestor_record_(ctx->allocator_), is_inited_(0), is_auto_wrap_(0), + check_duplicate_(1), need_boolean_(0), need_record_(0), reserved_(0) {} + ~ObPathCtx() {bin_pool_.reset();} + int init(ObMulModeMemCtx* ctx, ObIMulModeBase *doc_root, ObIMulModeBase *cur_doc, + ObIAllocator *tmp_alloc, bool is_auto_wrap, bool need_record, bool add_ns); + int init_extend(); + int push_ancestor(ObIMulModeBase*& base_node); + int alloc_new_bin(ObIMulModeBase*& base_node); + int alloc_new_bin(ObXmlBin*& base_node, ObMulModeMemCtx* ctx); + ObIMulModeBase* top_ancestor(); + int pop_ancestor(); + OB_INLINE void set_cur_doc_to_root() {cur_doc_ = doc_root_;} + bool if_need_record() const; + bool is_inited() const; + int record_if_need(); + void reset() {ancestor_record_.reset(); bin_pool_.reset();} + int reinit(ObIMulModeBase* doc, ObIAllocator *tmp_alloc); + common::ObIAllocator* get_tmp_alloc() { return tmp_alloc_; } + ObMulModeMemCtx* ctx_; + common::ObIAllocator *alloc_; + common::ObIAllocator *tmp_alloc_; + ObIMulModeBase *doc_root_; + ObIMulModeBase *cur_doc_; + ObIMulModeBase* extend_; + ObPathPool bin_pool_; + int cur_doc_position_; + ObStack ancestor_record_; + int defined_ns_; + union { + struct { + uint16_t is_inited_ : 1; + uint16_t is_auto_wrap_ : 1; + uint16_t check_duplicate_ : 1; + uint16_t need_boolean_ : 1; + uint16_t need_record_ : 1; + uint16_t add_ns_ : 1; + uint16_t reserved_ : 10; + }; + + uint16_t flags_; + }; +}; + +class ObPathNode : public ObLibContainerNode +{ +public: + ObPathNode(ObMulModeMemCtx* ctx, const ObParserType& parser_type) : + ObLibContainerNode(OB_PATH_TYPE, MEMBER_SEQUENT_FLAG, ctx), + need_cache_(false), contain_relative_path_(false) { node_type_.path_type_ = parser_type; node_type_.node_class_ = ObPathNodeClass::PN_ERROR;} + ObPathNode(ObMulModeMemCtx* ctx, const ObParserType& parser_type, const ObPathNodeClass& node_class) : + ObLibContainerNode(OB_PATH_TYPE, MEMBER_SEQUENT_FLAG, ctx), + need_cache_(false), contain_relative_path_(false) { node_type_.path_type_ = parser_type; node_type_.node_class_ = node_class;} // root + virtual ~ObPathNode() {} + virtual ObPathNodeType get_node_type() {return node_type_;} + virtual int eval_node(ObPathCtx &ctx, ObSeekResult& res); + virtual int node_to_string(ObStringBuffer& str); + ObPathNodeType node_type_; + bool need_cache_; + bool contain_relative_path_; + bool is_seeked_ = false; +}; + +class ObPathRootNode : public ObPathNode +{ +public: + ObPathRootNode(ObMulModeMemCtx* ctx, const ObParserType& parser_type) : + ObPathNode(ctx, parser_type, ObPathNodeClass::PN_ROOT), + arena_(PATH_DEFAULT_PAGE_SIZE, ctx->page_allocator_), + adapt_(&arena_, common::ObModIds::OB_MODULE_PAGE_ALLOCATOR), + need_trans_(0), iter_pos_(-1), is_abs_path_(true) {} + virtual ~ObPathRootNode() {} + int reuse(ObPathCtx &ctx, ObIMulModeBase*& ans); + int next_adapt(ObPathCtx &ctx, ObIMulModeBase*& ans); + int init_adapt(ObPathCtx &ctx, ObIMulModeBase*& ans); + virtual int node_to_string(ObStringBuffer& str); + virtual int eval_node(ObPathCtx &ctx, ObSeekResult& res); + bool is_abs_subpath(); + SeekIterModuleArena arena_; + ObSeekIterVector adapt_; + int need_trans_; + int iter_pos_; + bool is_abs_path_; +}; + +class ObPathLocationNode : public ObPathNode +{ +public: + ObPathLocationNode(ObMulModeMemCtx* ctx, const ObParserType& parser_type) : + ObPathNode(ctx, parser_type, ObPathNodeClass::PN_LOCATION), is_absolute_(false), + has_filter_(false), check_namespace_(false), + seek_type_(ObSeekType::ERROR_SEEK), node_axis_(ObPathNodeAxis::ERROR_AXIS) {} + int init(const ObLocationType& location_type); + int init(const ObLocationType& location_type, const ObSeekType& seek_type); + int init(const ObLocationType& location_type, const ObPathNodeAxis& axis_type); + int init(const ObLocationType& location_type, const ObSeekType& seek_type, const ObPathNodeAxis& axis_type); + virtual ~ObPathLocationNode() {} + OB_INLINE void set_axis(ObPathNodeAxis axis) {node_axis_ = axis;} + OB_INLINE void set_nodetest(ObSeekType seek_type) {seek_type_ = seek_type;} + OB_INLINE void set_wildcard_info(bool wildcard) {node_content_.has_wildcard_ = wildcard;} + OB_INLINE void set_prefix_ns_info(bool has_prefix_ns) {node_content_.has_prefix_ns_ = has_prefix_ns;} + OB_INLINE void set_default_prefix_ns(bool default_prefix_ns) {node_content_.is_default_prefix_ns_ = default_prefix_ns;} + OB_INLINE void set_check_ns_info(bool check_ns) {check_namespace_ = check_ns;} + OB_INLINE ObSeekType get_seek_type() {return seek_type_;} + OB_INLINE ObPathNodeAxis get_axis() {return node_axis_;} + OB_INLINE bool get_check_ns() {return check_namespace_;} + OB_INLINE ObString get_key_name() {return ObString(node_content_.key_.len_, node_content_.key_.name_);} + OB_INLINE ObString get_ns_name() {return ObString(node_content_.namespace_.len_, node_content_.namespace_.name_);} + OB_INLINE bool is_key_null() {return node_content_.key_.name_ == nullptr;} + OB_INLINE bool get_wildcard_info() {return node_content_.has_wildcard_;} + OB_INLINE bool get_prefix_ns_info() {return node_content_.has_prefix_ns_;} + OB_INLINE bool get_default_prefix_ns_info() {return node_content_.is_default_prefix_ns_;} + OB_INLINE bool has_filter() {return has_filter_;} + OB_INLINE bool is_ancestor_axis() {return node_axis_ == ANCESTOR || node_axis_ == ANCESTOR_OR_SELF;} + OB_INLINE bool is_complex_seektype() {return node_type_.get_location_type() == PN_ELLIPSIS || node_axis_ == DESCENDANT || node_axis_ == DESCENDANT_OR_SELF;} + OB_INLINE void set_ns_info(const char* name, uint64_t len) {node_content_.namespace_.name_ = name; node_content_.namespace_.len_ = len;} + OB_INLINE void set_key_info(const char* name, uint64_t len) {node_content_.key_.name_ = name;node_content_.key_.len_ = len;} + void set_nodetest_by_name(ObSeekType seek_type, const char* name, uint64_t len); + void set_nodetest_by_axis(); + int set_seek_info(ObPathSeekInfo& seek_info); + int set_check_ns_by_nodetest(ObIAllocator *allocator, ObString& default_ns); + // Get first_index according to from_end + virtual int node_to_string(ObStringBuffer& str); + virtual int eval_node(ObPathCtx &ctx, ObSeekResult& res); + int filter_location_res(ObPathCtx &ctx, ObIBaseSortedVector &dup, ObSeekVector &res); + int axis_to_string(ObStringBuffer& str); + int nodetest_to_string(ObStringBuffer& str); + int get_valid_attribute(ObIMulModeBase *doc, ObArray &hit); + int get_valid_namespace(ObIMulModeBase *doc, ObArray &hit); + bool is_filter_nodetest(); + bool is_absolute_; + bool has_filter_; + private: + bool check_namespace_; + ObSeekType seek_type_; + ObPathNodeAxis node_axis_; + ObLocationNodeContent node_content_; +}; + +class ObPathFilterNode : public ObPathNode +{ +public: + ObPathFilterNode(ObMulModeMemCtx* ctx, const ObParserType& parser_type) : + ObPathNode(ctx, parser_type, ObPathNodeClass::PN_FILTER), ans_(NOT_FILTERED), + in_predication_(false), is_boolean_(false) {} + ObPathFilterNode(ObMulModeMemCtx* ctx, const ObParserType& parser_type, bool in_predication) : + ObPathNode(ctx, parser_type, ObPathNodeClass::PN_FILTER) ,ans_(NOT_FILTERED), + in_predication_(in_predication), is_boolean_(false) {} + virtual ~ObPathFilterNode() {} + int init(const ObXpathFilterChar& filter_char, ObPathNode* left, ObPathNode* right, bool pred); + int init(ObFilterType type); + virtual int node_to_string(ObStringBuffer& str); + int filter_arg_to_string(ObStringBuffer& str, bool is_left); + int filter_type_to_string(ObStringBuffer& str); + virtual int eval_node(ObPathCtx &ctx, ObSeekResult& res); + ObSeekResult ans_; + bool in_predication_; + bool is_boolean_; + bool filtered_ = false; +}; + +// complex filter, a collection of several filter operators +// left arg is location before filter operators +// right arg is location after filter operators +class ObPathFilterOpNode : public ObPathNode +{ +public: + ObPathFilterOpNode(ObMulModeMemCtx* ctx, const ObParserType& parser_type) : + ObPathNode(ctx, parser_type, ObPathNodeClass::PN_LOCATION_FILTER), + ans_(NOT_FILTERED), left_(nullptr), right_(nullptr) {} + virtual ~ObPathFilterOpNode() {} + virtual int node_to_string(ObStringBuffer& str); + virtual int eval_node(ObPathCtx &ctx, ObSeekResult& res); + int append_filter(ObPathNode* filter); + void init_left(ObPathNode* left) {left_ = left;} + void init_right(ObPathNode* right) {right_ = right;} + int filter_op_arg_to_str(bool is_left, ObStringBuffer& str); + int get_filter_ans(ObFilterOpAns& ans, ObPathCtx& filter_ctx); + int get_valid_res(ObPathCtx &ctx, ObSeekResult& res, bool is_left); + int init_right_without_filter(ObPathCtx &ctx, ObSeekResult& res); + int init_right_with_filter(ObPathCtx &ctx, ObSeekResult& res); + ObFilterOpAns ans_; + ObPathNode* left_; + ObPathNode* right_; +}; + +class ObPathFuncNode : public ObPathNode +{ +public: + ObPathFuncNode(ObMulModeMemCtx* ctx, const ObParserType& parser_type) : + ObPathNode(ctx, parser_type, ObPathNodeClass::PN_FUNCTION), + max_arg_num_(-1), min_arg_num_(0), ans_(nullptr) {} + virtual ~ObPathFuncNode() {} + int init(ObFuncType& func_type); + virtual int node_to_string(ObStringBuffer& str); + int func_arg_to_string(ObStringBuffer& str); + virtual int eval_node(ObPathCtx &ctx, ObSeekResult& res); + int check_is_all_location_without_filter(ObPathNode* arg_root); + int check_is_legal_arg(); + int check_is_legal_count_arg(); + int checek_cache_and_abs(); + OB_INLINE int get_max_arg_num() {return max_arg_num_;} + OB_INLINE int get_min_arg_num() {return min_arg_num_;} + // Get first_index according to from_end +private: + int eval_position_or_last(ObPathCtx &ctx, bool is_last, ObSeekResult& res); + int eval_count(ObPathCtx &ctx, ObSeekResult&res); + int eval_true_or_false(ObPathCtx &ctx, bool is_true, ObSeekResult& res); + int max_arg_num_; + int min_arg_num_; + ObPathArgNode* ans_; +}; + +class ObPathArgNode : public ObPathNode +{ +public: + ObPathArgNode(ObMulModeMemCtx* ctx, const ObParserType& parser_type) : + ObPathNode(ctx, parser_type, ObPathNodeClass::PN_ARG) {} + virtual ~ObPathArgNode() {} + int init(char* str, uint64_t len, bool pred); + int init(double num, bool pred); + int init(bool boolean, bool pred); + int init(ObPathNode* node, bool pred); + virtual int node_to_string(ObStringBuffer& str); + virtual int eval_node(ObPathCtx &ctx, ObSeekResult& res); + ObArgNodeContent arg_; + bool in_predication_; + bool is_seeked_ = false; +}; + +class ObPathUtil +{ +public: + static bool is_filter_nodetest(const ObSeekType& seek_type); + static bool is_upper_axis(const ObPathNodeAxis& axis); + static bool is_down_axis(const ObPathNodeAxis& axis); + static bool include_self_axis(const ObPathNodeAxis& axis); + static bool check_contain_relative_path(ObPathNode* path); + static bool check_need_cache(ObPathNode* path); + static int add_dup_if_missing(ObIAllocator *allocator, ObIMulModeBase*& path_res, ObIBaseSortedVector &dup, bool& end_seek); + static int add_scalar(ObIAllocator *allocator, ObPathArgNode* arg, ObSeekVector &res); + static int get_parser_type(ObIMulModeBase *doc, ObParserType& parser_type); + static int char_to_filter_type(const ObXpathFilterChar& ch, ObFilterType& type); + static int pop_char_stack(ObFilterCharPointers& char_stack); + static int pop_node_stack(ObPathVectorPointers& node_stack, ObPathNode*& top_node); + static int alloc_seek_result(common::ObIAllocator *allocator, ObIMulModeBase* base, ObSeekResult*& result); + static int alloc_seek_result(ObIAllocator *allocator, ObPathArgNode* arg, ObSeekResult*& res); + static int alloc_num_arg(ObMulModeMemCtx *ctx, ObPathArgNode*& arg, ObParserType parser_type, double num); + static int alloc_boolean_arg(ObMulModeMemCtx *ctx, ObPathArgNode*& arg, ObParserType parser_type, bool ans); + static int trans_scalar_to_base(ObIAllocator *allocator, ObPathArgNode* arg, ObIMulModeBase*& base); + static int filter_compare(ObPathCtx &ctx, + ObNodeSetVector &left, ObArgType left_type, + ObNodeSetVector &right, ObArgType right_type, + ObFilterType op, ObSeekResult& res); + static int filter_calculate(ObPathCtx &ctx, + ObNodeSetVector &left, ObArgType left_type, + ObNodeSetVector &right, ObArgType right_type, + ObFilterType op, ObSeekVector &res); + static int logic_compare_rule(ObPathCtx &ctx, ObPathNode *path_node, bool &ret_bool); + static int filter_logic_compare(ObPathCtx &ctx, ObPathNode* left_node, ObPathNode* right_node, ObFilterType op, ObSeekResult &res); + static int filter_union(ObPathCtx &ctx, ObPathNode* left_node, ObPathNode* right_node, ObFilterType op, ObSeekVector &res); + static int filter_single_node(ObPathCtx &ctx, ObPathNode* filter_node, ObSeekVector &res); + static int get_seek_vec(ObPathCtx &ctx, ObPathNode *from_node, ObSeekVector &res); + static int get_filter_node_result(ObPathCtx &ctx, ObLibTreeNodeBase* filter_node_base, ObPathNode* &res); + static int seek_res_to_boolean(ObSeekResult& filter, bool &res); + static int get_seek_iterator(common::ObIAllocator *allocator, ObPathLocationNode* loc, ObSeekIterator*& ada); + static int alloc_binary(common::ObIAllocator *allocator, ObXmlBin*& ada); + static int alloc_iterator(common::ObIAllocator *allocator, ObSeekIterator*& ada); + static int alloc_complex_iterator(common::ObIAllocator *allocator, ObSeekComplexIterator*& ada); + static int alloc_ancestor_iterator(common::ObIAllocator *allocator, ObSeekAncestorIterator*& ada); + static int alloc_path_node(common::ObIAllocator *allocator, ObPathNode*& node); + static int alloc_node_content_info(common::ObIAllocator *allocator, ObArgNodeContent *content, ObArgType type, ObNodeTypeAndContent *&res); + static int alloc_node_content_info(common::ObIAllocator *allocator, ObString *str, ObNodeTypeAndContent *&res); + static int alloc_node_set_vector(ObPathCtx &ctx, ObPathNode *path_node, ObArgType& arg_type, ObNodeSetVector &node_vec); + static int get_arg_type(ObArgType& arg_type, ObPathNode *path_node); + static int release_seek_vector(ObPathCtx &ctx, ObSeekVector& seek_vector); + static int add_ns_if_need(ObPathCtx &ctx, ObIMulModeBase*& res); + static int collect_ancestor_ns(ObIMulModeBase* extend, ObStack &ancestor_record, ObXmlElement::NsMap &ns_map, ObArray &ns_vec, common::ObIAllocator* tmp_alloc); +}; + +class ObXmlPathFilter : public ObMulModeFilter +{ +public: + ObXmlPathFilter() {} + ObXmlPathFilter(ObPathLocationNode* path, ObPathCtx* path_ctx) : path_(path), path_ctx_(path_ctx) {} + ObXmlPathFilter(const ObXmlPathFilter& from) : path_(from.path_), path_ctx_(from.path_ctx_){} + ~ObXmlPathFilter() {} + int operator()(ObIMulModeBase* doc, bool& filtered); + ObPathLocationNode* path_; + ObPathCtx* path_ctx_; +}; + + +// basic Iterator, used for following axis: child, parent, attribute, namespace +class ObSeekIterator +{ +public: + ObSeekIterator() : iter_(nullptr), is_seeked_(false) {} + ObSeekIterator(const ObSeekIterator& src) : ada_root_(src.ada_root_), + seek_info_(src.seek_info_), axis_(src.axis_), iter_(src.iter_), is_seeked_(src.is_seeked_) {} + virtual ~ObSeekIterator() {} + int init(ObPathCtx &ctx, ObPathLocationNode* location, ObIMulModeBase* ada_root); + virtual int next(ObPathCtx &ctx, ObIMulModeBase*& res); + virtual void reset() {is_seeked_ = false;} + void reset(ObIMulModeBase* new_ada_root); + void set_root(ObIMulModeBase* new_ada_root); + int close(); +protected: + int next_child(ObPathCtx &ctx, ObIMulModeBase*& res); + int next_self(ObPathCtx &ctx, ObIMulModeBase*& res); + int next_parent(ObPathCtx &ctx, ObIMulModeBase*& res); + int next_attribute(ObPathCtx &ctx, ObIMulModeBase*& res); + int next_namespace(ObPathCtx &ctx, ObIMulModeBase*& res); + int filter_ans(ObIMulModeBase* ans, bool& filtered); + ObIMulModeBase* ada_root_; // root node + ObPathSeekInfo seek_info_; // node filter for reaser + ObPathNodeAxis axis_; // axis info + ObMulModeReader iter_; // iter + bool is_seeked_; +}; + +// complex Iterator, used for following axis: descendant, descendant-or-self and '//' +class ObSeekComplexIterator : public ObSeekIterator +{ +public: + ObSeekComplexIterator (ObIAllocator *alloc): ObSeekIterator() , iter_stack_(alloc) {} + + ObSeekComplexIterator(const ObSeekComplexIterator& src) : + ObSeekIterator(src), iter_stack_(src.iter_stack_) {} + ~ObSeekComplexIterator() {} + virtual int next(ObPathCtx &ctx, ObIMulModeBase*& res); + virtual void reset() {is_seeked_ = false; iter_stack_.reset();} +protected: + int next_descendant(ObPathCtx &ctx, bool include_self, ObIMulModeBase*& res); + int ellipsis_inner_next(ObPathCtx &ctx, ObIMulModeBase*& res); + ObStack iter_stack_; +}; + +// Ancestor Iterator, used for following axis: ancestor, ancestor-or-self +class ObSeekAncestorIterator : public ObSeekIterator +{ +public: + ObSeekAncestorIterator (ObIAllocator *alloc): ObSeekIterator() , anc_stack_(alloc) {} + + ObSeekAncestorIterator(const ObSeekAncestorIterator& src) : + ObSeekIterator(src), anc_stack_(src.anc_stack_) {} + ~ObSeekAncestorIterator() {} + virtual int next(ObPathCtx &ctx, ObIMulModeBase*& res); + virtual void reset() {is_seeked_ = false; anc_stack_.reset();} +protected: + int next_ancestor(ObPathCtx &ctx, bool include_self, ObIMulModeBase*& res); + int ancestor_inner_next(ObPathCtx &ctx, ObIMulModeBase*& res); + int anc_stack_push(ObPathCtx &ctx, ObIMulModeBase* push_node); + void anc_stack_pop(ObPathCtx &ctx); + ObStack anc_stack_; +}; + +static const int64_t DEFAULT_DUP_SIZE = 8; +class ObPathExprIter +{ +public: + ObPathExprIter(ObIAllocator *allocator, ObIAllocator *tmp_allocator = nullptr) + : page_allocator_(*allocator, common::ObModIds::OB_MODULE_PAGE_ALLOCATOR), + mode_arena_(PATH_DEFAULT_PAGE_SIZE, page_allocator_), + path_ctx_(allocator), + dup_(DEFAULT_DUP_SIZE, &mode_arena_, common::ObModIds::OB_MODULE_PAGE_ALLOCATOR), + is_inited_(0), need_record_(0), is_open_(0), add_ns_(0), reserved_(0) + { + tmp_allocator_ = (tmp_allocator == nullptr) ? allocator : tmp_allocator; + } + ~ObPathExprIter() {close();} + int init(ObMulModeMemCtx* ctx, ObString& path, ObString& default_ns, ObIMulModeBase* doc, ObPathVarObject* pass_var, bool add_namespace = true); + int open(); // begin to parse and seek + int get_next_node(ObIMulModeBase*& res); + + int get_first_node(ObPathNode*& loc); + int get_first_axis(ObPathNodeAxis& first_axis); + int get_first_seektype(ObSeekType& first_seektype); + ObIMulModeBase* get_cur_res_parent(); + bool is_first_init() { return !is_open_; } + bool get_add_ns() {return add_ns_;} + void set_add_ns(bool add_ns); + int set_tmp_alloc(ObIAllocator *tmp_allocator); + ObString& get_path_str() { return path_; } + int close(); + int reset(); + int reset(ObIMulModeBase* doc, ObIAllocator *tmp_allocator); +private: + common::ObIAllocator *allocator_; + common::ObIAllocator *tmp_allocator_; + ModulePageAllocator page_allocator_; + ModeBaseModuleArena mode_arena_; + ObMulModeMemCtx* ctx_; + ObPathCtx path_ctx_; + ObIMulModeBase* doc_; + ObPathVarObject* pass_var_; + ObPathNode* path_node_; + ObString path_; + ObString default_ns_; + ObIBaseSortedVector dup_; + uint8_t is_inited_ : 1; + uint8_t need_record_ : 1; + uint8_t is_open_ : 1; + uint8_t add_ns_ : 1; + uint8_t reserved_ : 4; +}; + +} +} +#endif // OCEANBASE_SQL_OB_XPATH \ No newline at end of file diff --git a/deps/oblib/src/rpc/obmysql/ob_mysql_util.cpp b/deps/oblib/src/rpc/obmysql/ob_mysql_util.cpp index 9d8bad035d..d8fce1f75b 100644 --- a/deps/oblib/src/rpc/obmysql/ob_mysql_util.cpp +++ b/deps/oblib/src/rpc/obmysql/ob_mysql_util.cpp @@ -25,10 +25,8 @@ #include "lib/json_type/ob_json_bin.h" #include "lib/json_type/ob_json_base.h" #include "lib/geo/ob_geo_bin.h" -#ifdef OB_BUILD_ORACLE_XML #include "lib/xml/ob_xml_util.h" #include "lib/xml/ob_xml_bin.h" -#endif using namespace oceanbase::common; namespace oceanbase @@ -1083,7 +1081,6 @@ int ObMySQLUtil::urowid_cell_str(char *buf, const int64_t len, const ObURowIDDat int ObMySQLUtil::sql_utd_cell_str(uint64_t tenant_id, char *buf, const int64_t len, const ObString &val, int64_t &pos) { INIT_SUCC(ret); -#ifdef OB_BUILD_ORACLE_XML lib::ObMemAttr mem_attr(tenant_id, "XMLModule"); lib::ObMallocHookAttrGuard malloc_guard(mem_attr); ObArenaAllocator allocator(mem_attr); @@ -1120,9 +1117,6 @@ int ObMySQLUtil::sql_utd_cell_str(uint64_t tenant_id, char *buf, const int64_t l ret = OB_SIZE_OVERFLOW; } } -#else - ret = OB_NOT_SUPPORTED; -#endif return ret; } @@ -1134,6 +1128,7 @@ int ObMySQLUtil::json_cell_str(uint64_t tenant_id, char *buf, const int64_t len, ObJsonBin j_bin(val.ptr(), val.length(), &allocator); ObIJsonBase *j_base = &j_bin; ObJsonBuffer jbuf(&allocator); + static_cast(j_base)->set_seek_flag(true); if (OB_ISNULL(buf)) { ret = OB_INVALID_ARGUMENT; OB_LOG(WARN, "invalid input args", K(ret), KP(buf)); diff --git a/src/logservice/libobcdc/src/ob_cdc_lob_aux_meta_storager.cpp b/src/logservice/libobcdc/src/ob_cdc_lob_aux_meta_storager.cpp index 2c0c339574..dd6f79c01a 100644 --- a/src/logservice/libobcdc/src/ob_cdc_lob_aux_meta_storager.cpp +++ b/src/logservice/libobcdc/src/ob_cdc_lob_aux_meta_storager.cpp @@ -380,7 +380,6 @@ int ObCDCLobAuxMetaStorager::del( int64_t commit_version = OB_INVALID_VERSION; const uint64_t tenant_id = lob_data_out_row_ctx_list.get_tenant_id(); const transaction::ObTransID &trans_id = lob_data_out_row_ctx_list.get_trans_id(); - const uint64_t aux_lob_meta_tid = lob_data_out_row_ctx_list.get_aux_lob_meta_table_id(); ObLobDataGetCtxList &lob_data_get_ctx_list = lob_data_out_row_ctx_list.get_lob_data_get_ctx_list(); ObLobDataGetCtx *cur_lob_data_get_ctx = lob_data_get_ctx_list.head_; @@ -392,8 +391,9 @@ int ObCDCLobAuxMetaStorager::del( } while (OB_SUCC(ret) && ! stop_flag && cur_lob_data_get_ctx) { - if (OB_FAIL(del_lob_col_value_(commit_version, tenant_id, trans_id, aux_lob_meta_tid, *cur_lob_data_get_ctx, stop_flag))) { - LOG_ERROR("[OBCDC][LOB_AUX][DEL][COL] del_lob_col_value_ failed", KR(ret), K(tenant_id), K(trans_id), K(aux_lob_meta_tid)); + const uint64_t table_id = lob_data_out_row_ctx_list.get_table_id_of_lob_aux_meta_key(*cur_lob_data_get_ctx); + if (OB_FAIL(del_lob_col_value_(commit_version, tenant_id, trans_id, table_id, *cur_lob_data_get_ctx, stop_flag))) { + LOG_ERROR("[OBCDC][LOB_AUX][DEL][COL] del_lob_col_value_ failed", KR(ret), K(tenant_id), K(trans_id), K(table_id)); } else { cur_lob_data_get_ctx = cur_lob_data_get_ctx->get_next(); } @@ -426,7 +426,7 @@ int ObCDCLobAuxMetaStorager::del_lob_col_value_( const uint64_t seq_no_start = lob_data_out_row_ctx->seq_no_st_; const uint32_t seq_no_cnt = lob_data_out_row_ctx->seq_no_cnt_; auto seq_no = transaction::ObTxSEQ::cast_from_int(seq_no_start); - const ObLobId &lob_id = lob_data_get_ctx.get_new_lob_data()->id_; + const ObLobId lob_id = lob_data_get_ctx.get_lob_id(); for (int64_t idx = 0; OB_SUCC(ret) && idx < seq_no_cnt; ++idx, ++seq_no) { LobAuxMetaKey key(commit_version, tenant_id, trans_id, aux_lob_meta_tid, lob_id, seq_no); @@ -435,7 +435,6 @@ int ObCDCLobAuxMetaStorager::del_lob_col_value_( } } // for } - return ret; } diff --git a/src/logservice/libobcdc/src/ob_cdc_lob_ctx.cpp b/src/logservice/libobcdc/src/ob_cdc_lob_ctx.cpp index b459aeff1f..12851e1325 100644 --- a/src/logservice/libobcdc/src/ob_cdc_lob_ctx.cpp +++ b/src/logservice/libobcdc/src/ob_cdc_lob_ctx.cpp @@ -72,6 +72,7 @@ int ObLobColCtx::set_col_value( void ObLobDataGetCtx::reset() { + type_ = ObLobDataGetTaskType::FULL_LOB; host_ = nullptr; column_id_ = common::OB_INVALID_ID; dml_flag_ = blocksstable::ObDmlFlag::DF_MAX; @@ -93,9 +94,20 @@ void ObLobDataGetCtx::reset( column_id_ = column_id; dml_flag_ = dml_flag; new_lob_data_ = new_lob_data; + + // set task type according to ObLobDataOutRowCtx::op + const ObLobDataOutRowCtx *lob_data_out_row_ctx = nullptr; + if (OB_ISNULL(new_lob_data_)) { + LOG_DEBUG("new_lob_data_ is null", K(column_id), K(dml_flag), KP(host)); + } else if (OB_ISNULL(lob_data_out_row_ctx = reinterpret_cast(new_lob_data_->buffer_))) { + LOG_DEBUG("lob_data_out_row_ctx is null", K(column_id), K(dml_flag), KP(host), KPC(new_lob_data_)); + } else if (lob_data_out_row_ctx->is_diff()) { + type_ = ObLobDataGetTaskType::EXT_INFO_LOG; + LOG_DEBUG("lob_data_out_row_ctx is diff", K(column_id), KPC(new_lob_data_), KPC(lob_data_out_row_ctx)); + } } -int ObLobDataGetCtx::get_lob_out_row_ctx(const ObLobDataOutRowCtx *&lob_data_out_row_ctx) +int ObLobDataGetCtx::get_lob_out_row_ctx(const ObLobDataOutRowCtx *&lob_data_out_row_ctx) const { int ret = OB_SUCCESS; @@ -108,6 +120,55 @@ int ObLobDataGetCtx::get_lob_out_row_ctx(const ObLobDataOutRowCtx *&lob_data_out return ret; } +ObLobId ObLobDataGetCtx::get_lob_id() const +{ + ObLobId lob_id; + switch (get_type()) { + case ObLobDataGetTaskType::FULL_LOB: + if (OB_NOT_NULL(new_lob_data_)) { + lob_id = new_lob_data_->id_; + } else { + LOG_DEBUG("new_lob_data_ is null", KPC(this)); + } + break; + default: + break; + } + return lob_id; +} + +int ObLobDataGetCtx::get_data_length(const bool is_new_col, uint64_t &data_length) const +{ + int ret = OB_SUCCESS; + switch (get_type()) { + case ObLobDataGetTaskType::FULL_LOB: { + const ObLobData *lob_data = nullptr; + if (OB_ISNULL(lob_data = get_lob_data(is_new_col))) { + ret = OB_ERR_UNEXPECTED; + LOG_ERROR("new_lob_data is nullptr", KR(ret), KPC(this)); + } else { + data_length = lob_data->byte_size_; + } + break; + } + case ObLobDataGetTaskType::EXT_INFO_LOG: { + const ObLobDataOutRowCtx *lob_data_out_row_ctx = nullptr; + if (! is_new_col) { + ret = OB_ERR_UNEXPECTED; + LOG_ERROR("is_new_col must be true", KR(ret), KPC(this)); + } else if (OB_FAIL(get_lob_out_row_ctx(lob_data_out_row_ctx))) { + LOG_ERROR("get_lob_out_row_ctx failed", KR(ret), KPC(this)); + } else { + data_length = lob_data_out_row_ctx->modified_len_; + } + break; + } + default: + break; + } + return ret; +} + int ObLobDataGetCtx::set_col_value(const bool is_new_col, const char *buf, const uint64_t buf_len) { int ret = OB_SUCCESS; @@ -128,6 +189,10 @@ void ObLobDataGetCtx::inc_lob_col_value_count(bool &is_lob_col_value_handle_done if (is_insert()) { total_value_count = 1; + // if current outrow data is ext_info_log or partial_json + // there is no before-image output. so total_value_count is one. + } else if (is_ext_info_log()) { + total_value_count = 1; } else if (is_update()) { if (nullptr != old_lob_data_ && old_lob_data_->byte_size_ > 0) { total_value_count += 1; @@ -157,9 +222,9 @@ int64_t ObLobDataGetCtx::to_string(char *buf, const int64_t buf_len) const } (void)common::databuff_printf(buf, buf_len, pos, - "column_id=%ld, dml=%s, ref_cnt[new=%d, old=%d], handle_cnt=%d, ", + "column_id=%ld, dml=%s, ref_cnt[new=%d, old=%d], handle_cnt=%d, type=%d, ", column_id_, print_dml_flag(dml_flag_), new_lob_col_ctx_.get_col_ref_cnt(), - old_lob_col_ctx_.get_col_ref_cnt(), lob_col_value_handle_done_count_); + old_lob_col_ctx_.get_col_ref_cnt(), lob_col_value_handle_done_count_, type_); if (nullptr != new_lob_data_) { (void)common::databuff_printf(buf, buf_len, pos, @@ -258,6 +323,45 @@ int ObLobDataOutRowCtxList::get_lob_column_value( return ret; } +int ObLobDataOutRowCtxList::get_lob_data_get_ctx( + const uint64_t column_id, + ObLobDataGetCtx *&result) +{ + int ret = OB_SUCCESS; + bool is_found = false; + ObLobDataGetCtx *lob_data_get_ctx = lob_data_get_ctxs_.head_; + if (OB_UNLIKELY(! is_all_lob_callback_done())) { + ret = OB_ERR_UNEXPECTED; + LOG_ERROR("lob_ctx_cols is_all_lob_callback_done is false, not expected", KR(ret), KPC(this)); + } else { + while (OB_SUCC(ret) && ! is_found && nullptr != lob_data_get_ctx) { + if (column_id == lob_data_get_ctx->column_id_) { + is_found = true; + result = lob_data_get_ctx; + } + lob_data_get_ctx = lob_data_get_ctx->get_next(); + } // while + } + + if (! is_found) { + ret = OB_ENTRY_NOT_EXIST; + } + return ret; +} + +uint64_t ObLobDataOutRowCtxList::get_table_id_of_lob_aux_meta_key(const ObLobDataGetCtx &lob_data_get_ctx) const +{ + uint64_t table_id = 0; + switch (lob_data_get_ctx.get_type()) { + case ObLobDataGetTaskType::FULL_LOB: + table_id = get_aux_lob_meta_table_id(); + break; + default: + break; + } + return table_id; +} + int64_t ObLobDataOutRowCtxList::to_string(char *buf, const int64_t buf_len) const { int64_t pos = 0; diff --git a/src/logservice/libobcdc/src/ob_cdc_lob_ctx.h b/src/logservice/libobcdc/src/ob_cdc_lob_ctx.h index 962d88a2fd..dede51f540 100644 --- a/src/logservice/libobcdc/src/ob_cdc_lob_ctx.h +++ b/src/logservice/libobcdc/src/ob_cdc_lob_ctx.h @@ -57,6 +57,15 @@ struct ObLobColCtx uint32_t col_ref_cnt_; }; +// indicate what data the task gets +// 1. FULL_LOB means getting all lob data +// 2. EXT_INFO_LOG means get ext info log, not lob data +enum class ObLobDataGetTaskType +{ + FULL_LOB = 0, + EXT_INFO_LOG = 1, +}; + struct ObLobDataGetCtx { ObLobDataGetCtx() { reset(); } @@ -73,8 +82,9 @@ struct ObLobDataGetCtx bool is_insert() const { return blocksstable::ObDmlFlag::DF_INSERT == dml_flag_; } bool is_update() const { return blocksstable::ObDmlFlag::DF_UPDATE == dml_flag_; } bool is_delete() const { return blocksstable::ObDmlFlag::DF_DELETE == dml_flag_; } + bool is_ext_info_log() const { return ObLobDataGetTaskType::EXT_INFO_LOG == type_; } - const common::ObLobData *get_lob_data(const bool is_new_col) + const common::ObLobData *get_lob_data(const bool is_new_col) const { const common::ObLobData *lob_data_ptr; @@ -88,8 +98,9 @@ struct ObLobDataGetCtx } const common::ObLobData *get_new_lob_data() { return new_lob_data_; } const common::ObLobData *get_old_lob_data() { return old_lob_data_; } - int get_lob_out_row_ctx(const ObLobDataOutRowCtx *&lob_data_out_row_ctx); - + int get_lob_out_row_ctx(const ObLobDataOutRowCtx *&lob_data_out_row_ctx) const; + ObLobId get_lob_id() const; + int get_data_length(const bool is_new_col, uint64_t &data_length) const; common::ObString **get_fragment_cb_array(const bool is_new_col) { common::ObString **res_str = nullptr; @@ -126,6 +137,10 @@ struct ObLobDataGetCtx int64_t to_string(char *buf, const int64_t buf_len) const; + ObLobDataGetTaskType get_type() const { return type_; } + void set_type(ObLobDataGetTaskType type) { type_ = type; } + + ObLobDataGetTaskType type_; void *host_; // ObLobDataOutRowCtxList uint64_t column_id_; blocksstable::ObDmlFlag dml_flag_; @@ -246,6 +261,9 @@ public: const uint64_t column_id, const bool is_new_col, common::ObString *&col_str); + int get_lob_data_get_ctx( + const uint64_t column_id, + ObLobDataGetCtx *&result); bool is_all_lob_callback_done() const { return get_total_lob_count() == ATOMIC_LOAD(&lob_col_get_succ_count_); } void inc_lob_col_count(bool &is_all_lob_col_handle_done) @@ -253,6 +271,8 @@ public: is_all_lob_col_handle_done = (get_total_lob_count() == ATOMIC_AAF(&lob_col_get_succ_count_, 1)); } + uint64_t get_table_id_of_lob_aux_meta_key(const ObLobDataGetCtx &lob_data_get_ctx) const; + public: int64_t to_string(char *buf, const int64_t buf_len) const; diff --git a/src/logservice/libobcdc/src/ob_cdc_lob_data_merger.cpp b/src/logservice/libobcdc/src/ob_cdc_lob_data_merger.cpp index 371978c25c..769998ff4a 100644 --- a/src/logservice/libobcdc/src/ob_cdc_lob_data_merger.cpp +++ b/src/logservice/libobcdc/src/ob_cdc_lob_data_merger.cpp @@ -17,6 +17,7 @@ #include "ob_log_instance.h" // TCTX #include "ob_log_formatter.h" // IObLogFormatter #include "ob_log_trace_id.h" // ObLogTraceIdGuard +#include "storage/lob/ob_ext_info_callback.h" // ObExtInfoLog using namespace oceanbase::common; @@ -424,10 +425,10 @@ int ObCDCLobDataMerger::handle_task_( const int64_t commit_version = part_trans_task.get_trans_commit_version(); const uint64_t tenant_id = lob_data_out_row_ctx_list->get_tenant_id(); const transaction::ObTransID &trans_id = lob_data_out_row_ctx_list->get_trans_id(); - const uint64_t aux_lob_meta_tid = lob_data_out_row_ctx_list->get_aux_lob_meta_table_id(); - const ObLobId &lob_id = new_lob_data->id_; + const uint64_t table_id = lob_data_out_row_ctx_list->get_table_id_of_lob_aux_meta_key(lob_data_get_ctx); + const ObLobId lob_id = lob_data_get_ctx.get_lob_id(); const uint32_t idx = task.idx_; - LobAuxMetaKey lob_aux_meta_key(commit_version, tenant_id, trans_id, aux_lob_meta_tid, lob_id, task.seq_no_); + LobAuxMetaKey lob_aux_meta_key(commit_version, tenant_id, trans_id, table_id, lob_id, task.seq_no_); const char *lob_data_ptr = nullptr; int64_t lob_data_len = 0; ObIAllocator &allocator = lob_data_out_row_ctx_list->get_allocator(); @@ -437,18 +438,18 @@ int ObCDCLobDataMerger::handle_task_( lob_data_ptr, lob_data_len); if (OB_SUCC(ret)) { - LOG_DEBUG("lob_aux_meta_storager get succ", K(lob_aux_meta_key), K(lob_data_len), K(task)); + LOG_DEBUG("lob_aux_meta_storager get succ", K(lob_aux_meta_key), K(lob_data_len), K(task), K(lob_data_get_ctx), KPC(lob_data_out_row_ctx_list)); fragment_cb_array[idx]->assign_ptr(lob_data_ptr, lob_data_len); uint32_t col_ref_cnt = lob_data_get_ctx.dec_col_ref_cnt(is_new_col); if (0 == col_ref_cnt) { - if (OB_FAIL(handle_when_all_lob_col_fragment_progress_done_( + if (OB_FAIL(handle_when_outrow_log_fragment_progress_done_( task, lob_data_get_ctx, *lob_data_out_row_ctx_list, stop_flag))) { - LOG_ERROR("handle_when_all_lob_col_fragment_progress_done_ failed", KR(ret)); + LOG_ERROR("handle_when_outrow_log_fragment_progress_done_ failed", KR(ret)); } } } else if (OB_IN_STOP_STATE != ret) { - LOG_ERROR("lob_aux_meta_storager get failed", KR(ret), K(lob_aux_meta_key)); + LOG_ERROR("lob_aux_meta_storager get failed", KR(ret), K(lob_aux_meta_key), K(lob_data_get_ctx), KPC(lob_data_out_row_ctx_list)); } } } @@ -460,39 +461,82 @@ int ObCDCLobDataMerger::handle_task_( return ret; } -int ObCDCLobDataMerger::handle_when_all_lob_col_fragment_progress_done_( +int ObCDCLobDataMerger::handle_when_outrow_log_fragment_progress_done_( LobColumnFragmentCtx &task, ObLobDataGetCtx &lob_data_get_ctx, ObLobDataOutRowCtxList &lob_data_out_row_ctx_list, volatile bool &stop_flag) +{ + int ret = OB_SUCCESS; + ObString src_data; + ObString output_data; + const bool is_new_col = task.is_new_col_; + if (OB_FAIL(merge_fragments_(task, lob_data_get_ctx, lob_data_out_row_ctx_list, src_data))) { + LOG_ERROR("merge_fragments_ fail", KR(ret), K(task), K(lob_data_get_ctx), K(lob_data_out_row_ctx_list)); + } else { + ObLobDataGetTaskType task_type = lob_data_get_ctx.get_type(); + LOG_DEBUG("handle", K(task_type), K(task), K(lob_data_get_ctx), K(lob_data_out_row_ctx_list), K(src_data)); + switch (task_type) { + case ObLobDataGetTaskType::FULL_LOB: + output_data = src_data; + break; + case ObLobDataGetTaskType::EXT_INFO_LOG: + if (OB_FAIL(handle_ext_info_log_(lob_data_get_ctx, lob_data_out_row_ctx_list, src_data, output_data))) { + LOG_ERROR("handle ext info log fail", KR(ret), K(task_type), K(task), K(lob_data_get_ctx), K(lob_data_out_row_ctx_list)); + } + break; + default: + ret = OB_INVALID_ARGUMENT; + LOG_ERROR("invalid task type", KR(ret), K(task_type), K(task), K(lob_data_get_ctx), K(lob_data_out_row_ctx_list)); + break; + } + if (OB_FAIL(ret)) { + } else if (OB_FAIL(lob_data_get_ctx.set_col_value(is_new_col, output_data.ptr(), output_data.length()))) { + LOG_ERROR("lob_data_get_ctx set_col_value failed", KR(ret), K(src_data), K(output_data)); + } else if (OB_FAIL(after_fragment_progress_done_(lob_data_get_ctx, lob_data_out_row_ctx_list, stop_flag))) { + if (OB_IN_STOP_STATE != ret) { + LOG_ERROR("after_fragment_progress_done_ failed", KR(ret), K(task), K(lob_data_get_ctx), K(lob_data_out_row_ctx_list)); + } + } + } + return ret; +} + +int ObCDCLobDataMerger::merge_fragments_( + LobColumnFragmentCtx &task, + ObLobDataGetCtx &lob_data_get_ctx, + ObLobDataOutRowCtxList &lob_data_out_row_ctx_list, + ObString &data) { int ret = OB_SUCCESS; const ObLobData *lob_data = nullptr; const ObLobDataOutRowCtx *lob_data_out_row_ctx = nullptr; const bool is_new_col = task.is_new_col_; ObString **fragment_cb_array= lob_data_get_ctx.get_fragment_cb_array(is_new_col); + uint64_t data_len = 0; if (IS_NOT_INIT) { ret = OB_NOT_INIT; LOG_ERROR("ObCDCLobDataMerger has not been initialized", KR(ret)); } else if (OB_ISNULL(lob_data = lob_data_get_ctx.get_lob_data(is_new_col))) { ret = OB_ERR_UNEXPECTED; - LOG_ERROR("new_lob_data is nullptr", KR(ret), K(task)); + LOG_ERROR("lob_data is nullptr", KR(ret), K(is_new_col), K(lob_data_get_ctx), K(task), K(lob_data_out_row_ctx_list)); } else if (OB_FAIL(lob_data_get_ctx.get_lob_out_row_ctx(lob_data_out_row_ctx))) { - LOG_ERROR("lob_data_get_ctx get_lob_out_row_ctx failed", KR(ret), K(lob_data_get_ctx)); + LOG_ERROR("lob_data_get_ctx get_lob_out_row_ctx failed", KR(ret), K(lob_data_get_ctx), K(task), K(lob_data_out_row_ctx_list)); + } else if (OB_FAIL(lob_data_get_ctx.get_data_length(is_new_col, data_len))) { + LOG_ERROR("lob_data_get_ctx get_data_length failed", KR(ret), K(is_new_col), K(lob_data_get_ctx), K(task), K(lob_data_out_row_ctx_list)); } else { - LOG_DEBUG("lob_aux_meta_storager handle last fragment", K(task), K(lob_data_get_ctx)); + LOG_DEBUG("lob_aux_meta_storager handle last fragment", K(data_len), K(task), K(lob_data_get_ctx), K(lob_data_out_row_ctx_list)); const bool is_new_col = task.is_new_col_; const uint32_t seq_no_cnt = task.ref_cnt_; - const uint64_t lob_total_len = lob_data->byte_size_; char *buf = nullptr; - if (OB_UNLIKELY(0 >= lob_total_len)) { + if (OB_UNLIKELY(0 >= data_len)) { ret = OB_ERR_UNEXPECTED; - LOG_ERROR("lob_data_len is 0, there should be no outrow lob_col_value", K(task), K(lob_data)); - } else if (OB_ISNULL(buf = static_cast(lob_data_out_row_ctx_list.get_allocator().alloc(sizeof(char) * (lob_total_len + 1))))) { + LOG_ERROR("lob_data_len is 0, there should be no outrow lob_col_value", K(task), K(data_len), K(lob_data)); + } else if (OB_ISNULL(buf = static_cast(lob_data_out_row_ctx_list.get_allocator().alloc(sizeof(char) * (data_len + 1))))) { ret = OB_ALLOCATE_MEMORY_FAILED; - LOG_ERROR("buf is nullptr", KR(ret), K(is_new_col), K(task), K(lob_data)); + LOG_ERROR("buf is nullptr", KR(ret), K(is_new_col), K(task), K(lob_data), K(data_len)); } else { uint64_t pos = 0; bool is_lob_col_value_handle_done = false; @@ -503,9 +547,9 @@ int ObCDCLobDataMerger::handle_when_all_lob_col_fragment_progress_done_( const int64_t len = str_ptr->length(); const char *ptr = str_ptr->ptr(); - if (pos + len > lob_total_len) { + if (pos + len > data_len) { ret = OB_BUF_NOT_ENOUGH; - LOG_ERROR("buf not enough, not expected", KR(ret), K(pos), K(len), K(lob_total_len)); + LOG_ERROR("buf not enough, not expected", KR(ret), K(pos), K(len), K(data_len)); } else { MEMCPY(buf + pos, ptr, len); pos += len; @@ -514,43 +558,37 @@ int ObCDCLobDataMerger::handle_when_all_lob_col_fragment_progress_done_( if (OB_SUCC(ret)) { buf[pos] = '\0'; - - if (OB_FAIL(lob_data_get_ctx.set_col_value(is_new_col, buf, pos))) { - LOG_ERROR("lob_data_get_ctx set_col_value failed", KR(ret), K(pos)); - } else { - lob_data_get_ctx.inc_lob_col_value_count(is_lob_col_value_handle_done); - - if (is_lob_col_value_handle_done) { - lob_data_out_row_ctx_list.inc_lob_col_count(is_all_lob_col_handle_done); - } - - // TODO debug remove - /* - int64_t buf_len = strlen(buf); - int char_len = sizeof(char_len); - LOG_INFO("handle_when_all_lob_col_fragment_progress_done_", "md5", calc_md5_cstr(buf, pos), - K(buf_len), K(pos), - K(char_len), K(lob_total_len)); - */ - // remove - - if (is_all_lob_col_handle_done) { - if (OB_FAIL(try_to_push_task_into_formatter_(lob_data_out_row_ctx_list, stop_flag))) { - if (OB_IN_STOP_STATE != ret) { - LOG_ERROR("try_to_push_task_into_formatter_ failed", KR(ret)); - } - } - } - } + data.assign_ptr(buf, pos); } - - LOG_DEBUG("handle_when_all_lob_col_fragment_progress_done_", K(is_lob_col_value_handle_done), K(is_all_lob_col_handle_done), K(seq_no_cnt), K(pos)); } } return ret; } +int ObCDCLobDataMerger::after_fragment_progress_done_( + ObLobDataGetCtx &lob_data_get_ctx, + ObLobDataOutRowCtxList &lob_data_out_row_ctx_list, + volatile bool &stop_flag) +{ + int ret = OB_SUCCESS; + bool is_lob_col_value_handle_done = false; + bool is_all_lob_col_handle_done = false; + lob_data_get_ctx.inc_lob_col_value_count(is_lob_col_value_handle_done); + if (is_lob_col_value_handle_done) { + lob_data_out_row_ctx_list.inc_lob_col_count(is_all_lob_col_handle_done); + } + LOG_DEBUG("after_fragment_progress_done_", K(is_lob_col_value_handle_done), K(is_all_lob_col_handle_done), K(lob_data_get_ctx), K(lob_data_out_row_ctx_list)); + if (is_all_lob_col_handle_done) { + if (OB_FAIL(try_to_push_task_into_formatter_(lob_data_out_row_ctx_list, stop_flag))) { + if (OB_IN_STOP_STATE != ret) { + LOG_ERROR("try_to_push_task_into_formatter_ failed", KR(ret)); + } + } + } + return ret; +} + int ObCDCLobDataMerger::try_to_push_task_into_formatter_( ObLobDataOutRowCtxList &lob_data_out_row_ctx_list, volatile bool &stop_flag) @@ -596,5 +634,54 @@ void ObCDCLobDataMerger::print_task_count_() } } +int ObCDCLobDataMerger::handle_ext_info_log_( + ObLobDataGetCtx &lob_data_get_ctx, + ObLobDataOutRowCtxList &lob_data_out_row_ctx_list, + const ObString &src_data, + ObString &output_data) +{ + int ret = OB_SUCCESS; + storage::ObExtInfoLogHeader ext_info_log_header; + int64_t pos = 0; + ObIAllocator &allocator = lob_data_out_row_ctx_list.get_allocator(); + + if (src_data.empty()) { + ret = OB_INVALID_ARGUMENT; + LOG_ERROR("buf is empty", KR(ret), K(src_data)); + } else if (OB_FAIL(ext_info_log_header.deserialize(src_data.ptr(), src_data.length(), pos))) { + LOG_ERROR("deserialize ext info log header fail", KR(ret), K(lob_data_get_ctx), K(src_data)); + } else { + storage::ObExtInfoLogType type = ext_info_log_header.get_type(); + LOG_DEBUG("handle ext info log", K(ext_info_log_header), K(pos), K(src_data), K(lob_data_get_ctx), K(lob_data_out_row_ctx_list)); + switch (type) { + case storage::OB_JSON_DIFF_EXT_INFO_LOG: + if (OB_FAIL(handle_json_diff_ext_info_log_(allocator, src_data.ptr(), src_data.length(), pos, output_data))) { + LOG_ERROR("handle_json_diff_ext_info_log_ fail", KR(ret), K(src_data), K(ext_info_log_header), K(lob_data_get_ctx), K(lob_data_out_row_ctx_list)); + } + break; + default: + ret = OB_NOT_SUPPORTED; + LOG_ERROR("unsupport ext info log type", KR(ret), K(type), K(ext_info_log_header), K(lob_data_get_ctx), K(lob_data_out_row_ctx_list)); + break; + } + } + return ret; +} + +int ObCDCLobDataMerger::handle_json_diff_ext_info_log_( + ObIAllocator &allocator, + const char *buf, uint64_t len, int64_t pos, + ObString &output_data) +{ + int ret = OB_SUCCESS; + storage::ObJsonDiffLog json_diff_log; + if (OB_FAIL(json_diff_log.deserialize(buf, len, pos))) { + LOG_ERROR("deserialize json diff log fail", KR(ret), K(len), K(pos)); + } else if (OB_FAIL(json_diff_log.to_string(allocator, output_data))) { + LOG_ERROR("json diff log to_string failed", KR(ret), K(len), K(pos)); + } + return ret; +} + } // namespace libobcdc } // namespace oceanbase diff --git a/src/logservice/libobcdc/src/ob_cdc_lob_data_merger.h b/src/logservice/libobcdc/src/ob_cdc_lob_data_merger.h index 96959b8ce8..ee190805bd 100644 --- a/src/logservice/libobcdc/src/ob_cdc_lob_data_merger.h +++ b/src/logservice/libobcdc/src/ob_cdc_lob_data_merger.h @@ -100,7 +100,7 @@ private: LobColumnFragmentCtx &task, const int64_t thread_index, volatile bool &stop_flag); - int handle_when_all_lob_col_fragment_progress_done_( + int handle_when_outrow_log_fragment_progress_done_( LobColumnFragmentCtx &task, ObLobDataGetCtx &lob_data_get_ctx, ObLobDataOutRowCtxList &lob_data_out_row_ctx_list, @@ -108,6 +108,25 @@ private: int try_to_push_task_into_formatter_( ObLobDataOutRowCtxList &lob_data_out_row_ctx_list, volatile bool &stop_flag); + int merge_fragments_( + LobColumnFragmentCtx &task, + ObLobDataGetCtx &lob_data_get_ctx, + ObLobDataOutRowCtxList &lob_data_out_row_ctx_list, + ObString &data); + int after_fragment_progress_done_( + ObLobDataGetCtx &lob_data_get_ctx, + ObLobDataOutRowCtxList &lob_data_out_row_ctx_list, + volatile bool &stop_flag); + // ext info log handle + int handle_ext_info_log_( + ObLobDataGetCtx &lob_data_get_ctx, + ObLobDataOutRowCtxList &lob_data_out_row_ctx_list, + const ObString &src_data, + ObString &format_data); + int handle_json_diff_ext_info_log_( + ObIAllocator &allocator, + const char *buf, uint64_t len, int64_t pos, + ObString &format_data); bool is_in_stop_status(volatile bool stop_flag) const { return stop_flag || LobDataMergerThread::is_stoped(); } // TODO diff --git a/src/logservice/libobcdc/src/ob_log_formatter.cpp b/src/logservice/libobcdc/src/ob_log_formatter.cpp index e721416f35..73422352f7 100644 --- a/src/logservice/libobcdc/src/ob_log_formatter.cpp +++ b/src/logservice/libobcdc/src/ob_log_formatter.cpp @@ -63,6 +63,7 @@ void ObLogFormatter::RowValue::reset() (void)memset(is_rowkey_, 0, sizeof(is_rowkey_)); (void)memset(is_changed_, 0, sizeof(is_changed_)); (void)memset(is_null_lob_old_columns_, 0, sizeof(is_null_lob_old_columns_)); + (void)memset(is_diff_, 0, sizeof(is_diff_)); } int ObLogFormatter::RowValue::init(const int64_t column_num, const bool contain_old_column) @@ -79,6 +80,7 @@ int ObLogFormatter::RowValue::init(const int64_t column_num, const bool contain_ (void)memset(is_rowkey_, 0, column_num * sizeof(is_rowkey_[0])); (void)memset(is_changed_, 0, column_num * sizeof(is_changed_[0])); (void)memset(is_null_lob_old_columns_, 0, column_num * sizeof(is_null_lob_old_columns_[0])); + (void)memset(is_diff_, 0, column_num * sizeof(is_diff_[0])); } return OB_SUCCESS; @@ -1294,15 +1296,26 @@ int ObLogFormatter::fill_normal_cols_( if (! cv->is_out_row_) { rv->new_columns_[usr_column_idx] = &cv->string_value_; } else { + ObLobDataGetCtx *lob_data_get_ctx = nullptr; ObString *new_col_str = nullptr; - if (OB_FAIL(lob_ctx_cols.get_lob_column_value(column_id, true/*is_new_col*/, new_col_str))) { + if (OB_FAIL(lob_ctx_cols.get_lob_data_get_ctx(column_id, lob_data_get_ctx))) { if (OB_ENTRY_NOT_EXIST != ret) { LOG_ERROR("get_lob_column_value failed", KR(ret), K(column_id)); } + } else { + new_col_str = &(lob_data_get_ctx->get_new_lob_column_value()); } if (OB_SUCC(ret)) { - if (cv->is_json() || cv->is_geometry()) { + if (lob_data_get_ctx->is_ext_info_log()) { + if (cv->is_json()) { + rv->new_columns_[usr_column_idx] = new_col_str; + rv->is_diff_[usr_column_idx] = true; + } else { + ret = OB_ERR_UNEXPECTED; + LOG_ERROR("not support ext info log type", KR(ret), K(is_new_value), KPC(lob_data_get_ctx), KPC(cv)); + } + } else if (cv->is_json() || cv->is_geometry()) { const common::ObObjType obj_type = cv->get_obj_type(); cv->value_.set_string(obj_type, *new_col_str); @@ -1317,7 +1330,7 @@ int ObLogFormatter::fill_normal_cols_( } LOG_DEBUG("fill_normal_cols_", K(is_new_value), K(column_id), KPC(cv), K(lob_ctx_cols), "md5", calc_md5_cstr(new_col_str->ptr(), new_col_str->length()), - "buf_len", new_col_str->length()); + "buf_len", new_col_str->length(), KPC(lob_data_get_ctx), "is_diff", rv->is_diff_[usr_column_idx]); } else if (OB_ENTRY_NOT_EXIST == ret) { ret = OB_SUCCESS; rv->new_columns_[usr_column_idx] = nullptr; @@ -1329,15 +1342,28 @@ int ObLogFormatter::fill_normal_cols_( if (! cv->is_out_row_) { rv->old_columns_[usr_column_idx] = &cv->string_value_; } else { + ObLobDataGetCtx *lob_data_get_ctx = nullptr; ObString *old_col_str = nullptr; - if (OB_FAIL(lob_ctx_cols.get_lob_column_value(column_id, false/*is_new_col*/, old_col_str))) { + if (OB_FAIL(lob_ctx_cols.get_lob_data_get_ctx(column_id, lob_data_get_ctx))) { if (OB_ENTRY_NOT_EXIST != ret) { LOG_ERROR("get_lob_column_value failed", KR(ret), K(column_id)); } + } else { + old_col_str = &(lob_data_get_ctx->get_old_lob_column_value()); } if (OB_SUCC(ret)) { - if (cv->is_json() || cv->is_geometry()) { + if (lob_data_get_ctx->is_ext_info_log()) { + if (cv->is_json()) { + // old data isn't passed when data is partial json + // so need set is_null_lob_old_columns_ + rv->old_columns_[usr_column_idx] = nullptr; + rv->is_null_lob_old_columns_[usr_column_idx] = true; + } else { + ret = OB_ERR_UNEXPECTED; + LOG_ERROR("not support ext info log type", KR(ret), K(is_new_value), KPC(lob_data_get_ctx), KPC(cv)); + } + } else if (cv->is_json() || cv->is_geometry()) { const common::ObObjType obj_type = cv->get_obj_type(); cv->value_.set_string(obj_type, *old_col_str); @@ -1353,7 +1379,7 @@ int ObLogFormatter::fill_normal_cols_( // TODO remove LOG_DEBUG("fill_normal_cols_", K(is_new_value), K(column_id), KPC(cv), K(lob_ctx_cols), "md5", calc_md5_cstr(old_col_str->ptr(), old_col_str->length()), - "buf_len", old_col_str->length()); + "buf_len", old_col_str->length(), KPC(lob_data_get_ctx)); } else if (OB_ENTRY_NOT_EXIST == ret) { ret = OB_SUCCESS; rv->old_columns_[usr_column_idx] = nullptr; @@ -1836,6 +1862,9 @@ int ObLogFormatter::format_dml_update_(IBinlogRecord *br_data, const RowValue *r ret = OB_ERR_UNEXPECTED; LOG_ERROR("changed column new value is NULL", KR(ret), K(i), "column_num", row_value->column_num_); + } else if (row_value->is_diff_[i]) { + br_data->putNewDiff(str_val->ptr(), str_val->length()); + LOG_DEBUG("putNewDiff", K(i), KPC(str_val)); } else { br_data->putNew(str_val->ptr(), str_val->length()); } @@ -2091,6 +2120,8 @@ int ObLogFormatter::parse_aux_lob_meta_table_( if (OB_FAIL(parse_aux_lob_meta_table_insert_(*log_entry_task, stmt_task, *new_cols))) { LOG_ERROR("parse_aux_lob_meta_table_insert_ failed", KR(ret)); } + } else if (stmt_task.is_update()) { + // lob meta update data isn't used, just skip } else if (stmt_task.is_delete()) { if (OB_FAIL(parse_aux_lob_meta_table_delete_(*log_entry_task, stmt_task, *old_cols))) { LOG_ERROR("parse_aux_lob_meta_table_delete_ failed", KR(ret)); diff --git a/src/logservice/libobcdc/src/ob_log_formatter.h b/src/logservice/libobcdc/src/ob_log_formatter.h index 76a0c6d3fe..fd1cb404aa 100644 --- a/src/logservice/libobcdc/src/ob_log_formatter.h +++ b/src/logservice/libobcdc/src/ob_log_formatter.h @@ -138,6 +138,7 @@ private: bool is_rowkey_[common::OB_MAX_COLUMN_NUMBER]; bool is_changed_[common::OB_MAX_COLUMN_NUMBER]; bool is_null_lob_old_columns_[common::OB_MAX_COLUMN_NUMBER]; + bool is_diff_[common::OB_MAX_COLUMN_NUMBER]; // invoke before handle format stmt task // incase of usage of column_num but row doesn't contain valid column and column_num is not set diff --git a/src/logservice/libobcdc/src/ob_log_part_trans_parser.cpp b/src/logservice/libobcdc/src/ob_log_part_trans_parser.cpp index 3d59038d60..8f9c6b6244 100644 --- a/src/logservice/libobcdc/src/ob_log_part_trans_parser.cpp +++ b/src/logservice/libobcdc/src/ob_log_part_trans_parser.cpp @@ -350,6 +350,20 @@ int ObLogPartTransParser::parse_stmts_( ++row_index; } } // need_ignore_row=false + } else if (MutatorType::MUTATOR_ROW_EXT_INFO == mutator_type) { + if (OB_FAIL(handle_mutator_ext_info_log_( + tenant, + tablet_id, + redo_data, + redo_data_len, + pos, + task, + redo_log_entry_task))) { + LOG_ERROR("handle_mutator_ext_info_log_ failed", KR(ret), + "tls_id", task.get_tls_id(), + "trans_id", task.get_trans_id(), + K(tablet_id), K(redo_log_entry_task), K(row_index)); + } } else { ret = OB_NOT_SUPPORTED; LOG_ERROR("not support mutator type", KR(ret), K(mutator_type)); @@ -921,5 +935,104 @@ const transaction::ObTxSEQ &ObLogPartTransParser::get_row_seq_(PartTransTask &ta return row.seq_no_; } +int ObLogPartTransParser::parse_ext_info_log_mutator_row_( + ObLogTenant *tenant, + const char *redo_data, + const int64_t redo_data_len, + int64_t &pos, + PartTransTask &part_trans_task, + ObLogEntryTask &redo_log_entry_task, + MutatorRow *&row, + bool &is_ignored) +{ + int ret = OB_SUCCESS; + is_ignored = false; + row = nullptr; + bool need_rollback = false; + + if (OB_FAIL(alloc_mutator_row_(part_trans_task, redo_log_entry_task, row))) { + LOG_ERROR("alloc_mutator_row_ failed", KR(ret), K(part_trans_task), K(redo_log_entry_task)); + } else if (OB_FAIL(row->deserialize(redo_data, redo_data_len, pos))) { + LOG_ERROR("deserialize mutator row fail", KR(ret), KPC(row), K(redo_data_len), K(pos)); + } else if (OB_FAIL(check_row_need_rollback_(part_trans_task, *row, need_rollback))) { + LOG_ERROR("check_row_need_rollback_ failed", KR(ret), K(part_trans_task), K(redo_log_entry_task), KPC(row)); + } else if (need_rollback) { + LOG_DEBUG("rollback row by RollbackToSavepoint", + "tls_id", part_trans_task.get_tls_id(), + "trans_id", part_trans_task.get_trans_id(), + "row_seq_no", row->seq_no_); + } else if (part_trans_task.is_ddl_trans()) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("part tans task is ddl not expected", KR(ret), K(part_trans_task)); + } + + if (OB_SUCC(ret)) { + is_ignored = need_rollback; + } + + if (OB_FAIL(ret) || is_ignored) { + free_mutator_row_(part_trans_task, redo_log_entry_task, row); + row = nullptr; + } else if (OB_ISNULL(row)) { + ret = OB_INVALID_DATA; + } + + return ret; +} + +int ObLogPartTransParser::handle_mutator_ext_info_log_( + ObLogTenant *tenant, + const ObTabletID &tablet_id, + const char *redo_data, + const int64_t redo_data_len, + int64_t &pos, + PartTransTask &part_trans_task, + ObLogEntryTask &redo_log_entry_task) +{ + int ret = OB_SUCCESS; + bool is_ignored = false; + MutatorRow *row = nullptr; + if (OB_FAIL(parse_ext_info_log_mutator_row_( + tenant, + redo_data, + redo_data_len, + pos, + part_trans_task, + redo_log_entry_task, + row, + is_ignored))) { + LOG_ERROR("parse_mutator_row_ failed", KR(ret), + "tls_id", part_trans_task.get_tls_id(), + "trans_id", part_trans_task.get_trans_id(), + K(tablet_id)); + } else if (! is_ignored) { + const int64_t commit_version = part_trans_task.get_trans_commit_version(); + const uint64_t tenant_id = tenant->get_tenant_id(); + const transaction::ObTransID &trans_id = part_trans_task.get_trans_id(); + const uint64_t table_id = 0; + ObLobId lob_id; // empty + transaction::ObTxSEQ row_seq_no = row->seq_no_; + ObString ext_info_log; + ObCDCLobAuxMetaStorager &lob_aux_meta_storager = TCTX.lob_aux_meta_storager_; + LobAuxMetaKey lob_aux_meta_key(commit_version, tenant_id, trans_id, table_id, lob_id, row_seq_no); + if (OB_FAIL(row->parse_ext_info_log(ext_info_log))) { + LOG_WARN("parse_ext_info_log fail", KR(ret)); + } else if (OB_FAIL(lob_aux_meta_storager.put(lob_aux_meta_key, "ext_info_log", ext_info_log.ptr(), ext_info_log.length()))) { + LOG_ERROR("lob_aux_meta_storager put failed", KR(ret), K(lob_aux_meta_key)); + } else { + LOG_DEBUG("put ext info log success", K(lob_aux_meta_key), "log_length", ext_info_log.length()); + } + } else { + LOG_INFO("ext info log is ignored", K(tablet_id), + "tenant_id", tenant->get_tenant_id(), + "tls_id", part_trans_task.get_tls_id(), + "trans_id", part_trans_task.get_trans_id()); + } + if (OB_NOT_NULL(row)) { + free_mutator_row_(part_trans_task, redo_log_entry_task, row); + } + return ret; +} + } } diff --git a/src/logservice/libobcdc/src/ob_log_part_trans_parser.h b/src/logservice/libobcdc/src/ob_log_part_trans_parser.h index 3a4cddcc07..76aa4de4c0 100644 --- a/src/logservice/libobcdc/src/ob_log_part_trans_parser.h +++ b/src/logservice/libobcdc/src/ob_log_part_trans_parser.h @@ -159,6 +159,24 @@ private: MutatorRow *&row, ObCDCTableInfo &table_info, bool &is_ignored); + int handle_mutator_ext_info_log_( + ObLogTenant *tenant, + const ObTabletID &tablet_id, + const char *redo_data, + const int64_t redo_data_len, + int64_t &pos, + PartTransTask &part_trans_task, + ObLogEntryTask &redo_log_entry_task); + int parse_ext_info_log_mutator_row_( + ObLogTenant *tenant, + const char *redo_data, + const int64_t redo_data_len, + int64_t &pos, + PartTransTask &part_trans_task, + ObLogEntryTask &redo_log_entry_task, + MutatorRow *&row, + bool &is_ignored); + private: bool inited_; IObLogBRPool *br_pool_; diff --git a/src/logservice/libobcdc/src/ob_log_part_trans_task.cpp b/src/logservice/libobcdc/src/ob_log_part_trans_task.cpp index 1c94720a4f..a543749e35 100644 --- a/src/logservice/libobcdc/src/ob_log_part_trans_task.cpp +++ b/src/logservice/libobcdc/src/ob_log_part_trans_task.cpp @@ -24,6 +24,7 @@ #include "storage/memtable/ob_memtable_mutator.h" // ObMemtableMutatorMeta #include "storage/memtable/ob_memtable_context.h" // ObTransRowFlag #include "storage/blocksstable/ob_row_reader.h" // ObRowReader +#include "storage/lob/ob_ext_info_callback.h" // ObExtInfoLog #include "ob_log_binlog_record.h" // ObLogBR #include "ob_log_binlog_record_pool.h" // ObLogBRPool @@ -976,6 +977,37 @@ int MutatorRow::get_cols( return ret; } +int MutatorRow::parse_ext_info_log(ObString &ext_info_log) +{ + int ret = OB_SUCCESS; + blocksstable::ObRowReader row_reader; + blocksstable::ObDatumRow datum_row; + bool is_found = false; + if (OB_UNLIKELY(cols_parsed_)) { + ret = OB_STATE_NOT_MATCH; + LOG_ERROR("columns has been parsed", KR(ret), K(cols_parsed_)); + } else if (OB_UNLIKELY(! deserialized_)) { + ret = OB_STATE_NOT_MATCH; + LOG_ERROR("row has not been deserialized", KR(ret)); + } else if (OB_ISNULL(new_row_.data_) || OB_UNLIKELY(new_row_.size_ <= 0)) { + LOG_WARN("new row data is empty", K(new_row_), + "mutator_row", (const ObMemtableMutatorRow &)(*this)); + new_cols_.reset(); + } else if (OB_UNLIKELY(new_cols_.num_ > 0)) { + ret = OB_INVALID_ARGUMENT; + LOG_ERROR("column value list is not reseted", KR(ret), K(new_cols_)); + } else if (OB_FAIL(row_reader.read_row(new_row_.data_, new_row_.size_, nullptr, datum_row))) { + LOG_ERROR("Failed to read datum row", K(ret)); + } else if (datum_row.get_column_count() != storage::ObExtInfoCallback::OB_EXT_INFO_MUTATOR_ROW_COUNT) { + ret = OB_INVALID_ARGUMENT; + LOG_ERROR("ext info mutator column count invalid", KR(ret), "column_count", datum_row.get_column_count()); + } else { + ext_info_log = datum_row.storage_datums_[storage::ObExtInfoCallback::OB_EXT_INFO_MUTATOR_ROW_VALUE_IDX].get_string(); + cols_parsed_ = true; + } + return ret; +} + //////////////////////////////////////// DmlStmtUniqueID /////////////////////////////////////////////// int64_t DmlStmtUniqueID::get_dml_unique_id_length() const { diff --git a/src/logservice/libobcdc/src/ob_log_part_trans_task.h b/src/logservice/libobcdc/src/ob_log_part_trans_task.h index fe15d80f70..489e749b64 100644 --- a/src/logservice/libobcdc/src/ob_log_part_trans_task.h +++ b/src/logservice/libobcdc/src/ob_log_part_trans_task.h @@ -256,6 +256,8 @@ public: ObLobDataOutRowCtxList &get_new_lob_ctx_cols() { return new_lob_ctx_cols_; } + int parse_ext_info_log(ObString &ext_info_log); + public: TO_STRING_KV( "Row", static_cast(*this), diff --git a/src/logservice/libobcdc/src/ob_obj2str_helper.cpp b/src/logservice/libobcdc/src/ob_obj2str_helper.cpp index 6f616f48ba..4e29d21f9a 100644 --- a/src/logservice/libobcdc/src/ob_obj2str_helper.cpp +++ b/src/logservice/libobcdc/src/ob_obj2str_helper.cpp @@ -21,9 +21,7 @@ #include "sql/engine/expr/ob_datum_cast.h" // padding_char_for_cast #include "lib/alloc/ob_malloc_allocator.h" #include "lib/geo/ob_geo_utils.h" -#ifdef OB_BUILD_ORACLE_XML #include "lib/xml/ob_xml_util.h" -#endif #include "sql/engine/expr/ob_expr_uuid.h" #include "sql/engine/expr/ob_expr_operator.h" #include "sql/engine/expr/ob_expr_res_type_map.h" @@ -453,12 +451,8 @@ int ObObj2strHelper::convert_xmltype_to_text_( common::ObString &str, common::ObIAllocator &allocator) const { -#ifdef OB_BUILD_ORACLE_XML const ObString &data = obj.get_string(); return ObXmlUtil::xml_bin_to_text(allocator, data, str); -#else - return OB_NOT_SUPPORTED; -#endif } bool ObObj2strHelper::need_padding_(const lib::Worker::CompatMode &compat_mode, diff --git a/src/logservice/libobcdc/tests/ob_binlog_record_printer.cpp b/src/logservice/libobcdc/tests/ob_binlog_record_printer.cpp index 2a1bdb0348..d220d84969 100644 --- a/src/logservice/libobcdc/tests/ob_binlog_record_printer.cpp +++ b/src/logservice/libobcdc/tests/ob_binlog_record_printer.cpp @@ -553,6 +553,7 @@ int ObBinlogRecordPrinter::output_data_file_column_data(IBinlogRecord *br, ObStringBuffer enum_set_values_str(&str_allocator); bool is_geometry = is_geometry_type(ctype); bool is_xml = is_xml_type(ctype); + bool is_diff = (index < new_cols_count) && new_cols[index].m_diff_val; constexpr int64_t string_print_md5_threshold = 4L << 10; const bool is_type_for_md5_printing = is_lob || is_json || is_geometry || is_xml || (is_string && col_data_length >= string_print_md5_threshold); @@ -567,6 +568,15 @@ int ObBinlogRecordPrinter::output_data_file_column_data(IBinlogRecord *br, ROW_PRINTF(ptr, size, pos, ri, "[C%ld] column_is_signed:%s", column_index, is_signed); ROW_PRINTF(ptr, size, pos, ri, "[C%ld] column_encoding:%s", column_index, encoding); ROW_PRINTF(ptr, size, pos, ri, "[C%ld] column_is_not_null:%s", column_index, is_not_null); + if (is_diff) { + if (is_json) { + ROW_PRINTF(ptr, size, pos, ri, "[C%ld] is_json_diff:true", column_index); + } else { + ROW_PRINTF(ptr, size, pos, ri, "[C%ld] unkonwn_diff", column_index); + ret = OB_ERR_UNEXPECTED; + LOG_ERROR("unkonwn_diff", K(ret), K(index), K(column_index), K(ctype)); + } + } if (enable_print_detail) { if (is_hidden_row_key_column) { ROW_PRINTF(ptr, size, pos, ri, "[C%ld] column_is_hidden_rowkey:%d", column_index, is_hidden_row_key_column); diff --git a/src/objit/include/objit/common/ob_item_type.h b/src/objit/include/objit/common/ob_item_type.h index 275dbbc272..92fc80abec 100755 --- a/src/objit/include/objit/common/ob_item_type.h +++ b/src/objit/include/objit/common/ob_item_type.h @@ -1132,7 +1132,6 @@ typedef enum ObItemType T_DIAGNOSTICS, T_JSON_TABLE_EXPRESSION, T_JSON_TABLE_COLUMN, - //pseudo_column T_PSEUDO_COLUMN, T_LEVEL, @@ -2520,6 +2519,7 @@ typedef enum ObOutlineType || ((op) == T_FUN_SYS_JSON_OVERLAPS) \ || ((op) == T_FUN_SYS_JSON_CONTAINS) \ || ((op) == T_FUN_SYS_JSON_CONTAINS_PATH) \ + || ((op) == T_FUN_SYS_JSON_SCHEMA_VALID) \ || ((op) == T_FUN_SYS_JSON_EQUAL) \ || ((op) == T_FUN_SYS_IS_JSON) \ || ((op) == T_FUN_SYS_JSON_EXISTS) \ diff --git a/src/observer/mysql/ob_query_driver.cpp b/src/observer/mysql/ob_query_driver.cpp index 05ca5c8586..a2861423c8 100644 --- a/src/observer/mysql/ob_query_driver.cpp +++ b/src/observer/mysql/ob_query_driver.cpp @@ -25,11 +25,9 @@ #include "lib/charset/ob_charset.h" #include "sql/engine/expr/ob_expr_sql_udt_utils.h" #include "observer/mysql/obmp_stmt_prexecute.h" -#include "sql/engine/expr/ob_expr_xml_func_helper.h" -#ifdef OB_BUILD_ORACLE_XML #include "lib/xml/ob_multi_mode_interface.h" #include "lib/xml/ob_xml_util.h" -#endif +#include "sql/engine/expr/ob_expr_xml_func_helper.h" namespace oceanbase { diff --git a/src/observer/ob_server.cpp b/src/observer/ob_server.cpp index bab86ceb3b..fe02cd3c23 100644 --- a/src/observer/ob_server.cpp +++ b/src/observer/ob_server.cpp @@ -120,9 +120,7 @@ #ifdef OB_BUILD_TDE_SECURITY #include "share/ob_master_key_getter.h" #endif -#ifdef OB_BUILD_ORACLE_XML #include "lib/xml/ob_libxml2_sax_handler.h" -#endif #include "ob_check_params.h" using namespace oceanbase::lib; @@ -664,11 +662,9 @@ void ObServer::destroy() sql_engine_.destroy(); FLOG_INFO("sql engine destroyed"); -#ifdef OB_BUILD_ORACLE_XML FLOG_INFO("begin to destroy xml ctx"); ObLibXml2SaxHandler::destroy(); FLOG_INFO("xml ctx destroyed"); -#endif FLOG_INFO("begin to destroy pl engine"); pl_engine_.destory(); @@ -2579,11 +2575,9 @@ int ObServer::init_sql() } } -#ifdef OB_BUILD_ORACLE_XML if (OB_SUCC(ret)) { ObLibXml2SaxHandler::init(); } -#endif if (OB_SUCC(ret)) { LOG_INFO("init sql done"); diff --git a/src/pl/ob_pl_interface_pragma.h b/src/pl/ob_pl_interface_pragma.h index 9d0a4d4955..e221c06ab5 100644 --- a/src/pl/ob_pl_interface_pragma.h +++ b/src/pl/ob_pl_interface_pragma.h @@ -61,11 +61,10 @@ #include "pl/sys_package/ob_sdo_geometry.h" #include "pl/sys_package/ob_dbms_mview.h" #include "pl/sys_package/ob_dbms_mview_stats.h" -#endif -#include "pl/sys_package/ob_pl_dbms_resource_manager.h" -#ifdef OB_BUILD_ORACLE_XML +#include "pl/sys_package/ob_json_array_type.h" #include "pl/sys_package/ob_xml_type.h" #endif +#include "pl/sys_package/ob_pl_dbms_resource_manager.h" #include "pl/sys_package/ob_dbms_session.h" #include "pl/sys_package/ob_dbms_workload_repository.h" #include "pl/sys_package/ob_dbms_mview_mysql.h" @@ -466,7 +465,6 @@ INTERFACE_DEF(INTERFACE_SDO_GEOMETRY_GET_GEOJSON, "SDO_GEOMETRY_GET_GEOJSON", (ObSdoGeometry::get_geojson)) //end of sdo_geometry -#ifdef OB_BUILD_ORACLE_XML //start of xmltype INTERFACE_DEF(INTERFACE_XML_TYPE_TRANSFORM, "XML_TYPE_TRANSFORM", (ObXmlType::transform)) INTERFACE_DEF(INTERFACE_XML_TYPE_GETCLOBVAL, "XML_TYPE_GETCLOBVAL", (ObXmlType::getclobval)) @@ -478,7 +476,6 @@ //start of dbms_xmlgen INTERFACE_DEF(INTERFACE_DBMS_XMLGEN_CONVERT, "DBMS_XMLGEN_CONVERT", (ObDbmsXmlGen::convert)) //end of dbms_xmlgen -#endif //start of dbms_crypto INTERFACE_DEF(INTERFACE_DBMS_CRYPTO_ENCRYPT, "DBMS_CRYPTO_ENCRYPT", (ObDbmsCrypto::encrypt)) @@ -685,6 +682,15 @@ INTERFACE_DEF(INTERFACE_JSON_OBJECT_RENAME_KEY, "JSON_OBJECT_RENAME_KEY", (ObPlJsonObject::rename_key)) INTERFACE_DEF(INTERFACE_JSON_OBJECT_CLONE, "JSON_OBJECT_CLONE", (ObPlJsonObject::clone)) // end of json_object_t + + // start of json_array_t + INTERFACE_DEF(INTERFACE_JSON_ARRAY_ON_ERROR, "JSON_ARRAY_ON_ERROR", (ObPlJsonArray::set_on_error)) + INTERFACE_DEF(INTERFACE_JSON_ARRAY_PARSE, "JSON_ARRAY_PARSE", (ObPlJsonArray::parse)) + INTERFACE_DEF(INTERFACE_JSON_ARRAY_GET, "JSON_ARRAY_GET", (ObPlJsonArray::get)) + INTERFACE_DEF(INTERFACE_JSON_ARRAY_GET_TYPE, "JSON_ARRAY_GET_TYPE", (ObPlJsonArray::get_type)) + INTERFACE_DEF(INTERFACE_JSON_ARRAY_CONSTRUCTOR, "JSON_ARRAY_CONSTRUCTOR", (ObPlJsonArray::constructor)) + INTERFACE_DEF(INTERFACE_JSON_ARRAY_CLONE, "JSON_ARRAY_CLONE", (ObPlJsonArray::clone)) + // end of json_array_t #endif #ifdef OB_BUILD_ORACLE_PL diff --git a/src/pl/ob_pl_package_manager.cpp b/src/pl/ob_pl_package_manager.cpp index 98858d3251..a871b7d0fe 100644 --- a/src/pl/ob_pl_package_manager.cpp +++ b/src/pl/ob_pl_package_manager.cpp @@ -260,6 +260,8 @@ static ObSysPackageFile oracle_sys_package_file_table[] = { {"json_object_t", "json_object_type.sql", "json_object_type_body.sql"}, {"dbms_mview", "dbms_mview.sql", "dbms_mview_body.sql"}, {"dbms_mview_stats", "dbms_mview_stats.sql", "dbms_mview_stats_body.sql"}, + {"json_array_t", "json_array_type.sql", "json_array_type_body.sql"}, + {"xmlsequence", "xml_sequence_type.sql", "xml_sequence_type_body.sql"}, {"sdo_geometry", "sdo_geometry.sql", "sdo_geometry_body.sql"}, #endif }; diff --git a/src/pl/ob_pl_resolver.cpp b/src/pl/ob_pl_resolver.cpp index f23ec57c1f..82cbfbf1ea 100644 --- a/src/pl/ob_pl_resolver.cpp +++ b/src/pl/ob_pl_resolver.cpp @@ -7073,6 +7073,24 @@ int ObPLResolver::check_in_param_type_legal(const ObIRoutineParam *param_info, } } } + if (OB_FAIL(ret)) { + } else if (actually_type.get_user_type_id() != expected_type.get_user_type_id()) { +#ifdef OB_BUILD_ORACLE_PL + if (ObPlJsonUtil::is_pl_jsontype(actually_type.get_user_type_id())) { + OZ (check_composite_compatible(current_block_->get_namespace(), + expected_type.get_user_type_id(), + actually_type.get_user_type_id(), + is_legal)); + } else { +#endif + OZ (check_composite_compatible(current_block_->get_namespace(), + actually_type.get_user_type_id(), + expected_type.get_user_type_id(), + is_legal)); + } +#ifdef OB_BUILD_ORACLE_PL + } +#endif } else if (actually_type.is_composite_type() || expected_type.is_composite_type()) { if (actually_type.is_obj_type() && ObExtendType == actually_type.get_data_type()->get_obj_type()) { @@ -9507,7 +9525,8 @@ bool ObPLResolver::is_json_type_compatible(const ObUserDefinedType *left, const { #ifdef OB_BUILD_ORACLE_PL return (ObPlJsonUtil::is_pl_json_element_type(left->get_user_type_id()) - && ObPlJsonUtil::is_pl_json_object_type(right->get_user_type_id())) ; + && (ObPlJsonUtil::is_pl_json_object_type(right->get_user_type_id()) + || ObPlJsonUtil::is_pl_json_array_type(right->get_user_type_id()))) ; #else return false; #endif @@ -9788,7 +9807,8 @@ int ObPLResolver::resolve_expr(const ParseNode *node, #ifdef OB_BUILD_ORACLE_PL // error code compiltable with oracle if (ObPlJsonUtil::is_pl_json_object_type(expected_type->get_user_type_id()) - && ObPlJsonUtil::is_pl_json_element_type(expr->get_result_type().get_udt_id())) { + && ObPlJsonUtil::is_pl_json_element_type(expr->get_result_type().get_udt_id()) + && ObPlJsonUtil::is_pl_json_array_type(expr->get_result_type().get_udt_id())) { ret = OB_ERR_EXPRESSION_WRONG_TYPE; } #endif @@ -11471,7 +11491,7 @@ int ObPLResolver::resolve_record_construct(const ObQualifiedName &q_name, } if (OB_SUCC(ret)) { int64_t param_cnt = udf_info.ref_expr_->get_param_exprs().count(); - int64_t member_cnt = object_type->get_member_count(); + int64_t member_cnt = object_type->is_opaque_type() ? 0 : object_type->get_member_count(); bool is_opaque_cons_and_no_self_param = object_type->is_opaque_type() && (param_cnt - 2) == member_cnt && udf_info.is_udf_udt_cons(); diff --git a/src/pl/sys_package/ob_json_array_type.h b/src/pl/sys_package/ob_json_array_type.h new file mode 100644 index 0000000000..b3b09cab19 --- /dev/null +++ b/src/pl/sys_package/ob_json_array_type.h @@ -0,0 +1,43 @@ +/* + * Copyright (c) 2021 OceanBase Technology Co.,Ltd. + * OceanBase is licensed under Mulan PubL v1. + * You can use this software according to the terms and conditions of the Mulan PubL v1. + * You may obtain a copy of Mulan PubL v1 at: + * http://license.coscl.org.cn/MulanPubL-1.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v1 for more details. + */ + +#ifndef OCEANBASE_SRC_PL_SYS_PACKAGE_JSON_ARRAY_TYPE_H_ +#define OCEANBASE_SRC_PL_SYS_PACKAGE_JSON_ARRAY_TYPE_H_ + +#include "lib/json_type/ob_json_base.h" +#include "sql/engine/ob_exec_context.h" +#include "sql/session/ob_sql_session_info.h" +#include "pl/ob_pl_user_type.h" +#include "pl/sys_package/ob_json_pl_utils.h" +namespace oceanbase +{ +namespace pl +{ +class ObPlJsonArray { +public: + static int constructor(sql::ObExecContext &ctx, sql::ParamStore ¶ms, common::ObObj &result); + static int parse(sql::ObExecContext &ctx, sql::ParamStore ¶ms, common::ObObj &result); + static int get(sql::ObExecContext &ctx, sql::ParamStore ¶ms, common::ObObj &result); + static int get_type(sql::ObExecContext &ctx, sql::ParamStore ¶ms, common::ObObj &result); + static int clone(sql::ObExecContext &ctx, sql::ParamStore ¶ms, common::ObObj &result); + static int set_on_error(sql::ObExecContext &ctx, sql::ParamStore ¶ms, common::ObObj &result); +private: + static int get_array_value(sql::ObExecContext &ctx, + sql::ParamStore ¶ms, + ObJsonNode *&json_val, + int& error_behavior, + int expect_param_nums = 2); +}; + +} // end pl +} // end oceanbase +#endif \ No newline at end of file diff --git a/src/pl/sys_package/ob_json_pl_utils.h b/src/pl/sys_package/ob_json_pl_utils.h index 454ea59e7b..db22ffc43a 100644 --- a/src/pl/sys_package/ob_json_pl_utils.h +++ b/src/pl/sys_package/ob_json_pl_utils.h @@ -56,6 +56,7 @@ public: ObPLJsonBaseType *&jsontype); static int print_decimal(number::ObNumber &num, ObScale scale, ObJsonBuffer &j_buf); static int get_json_object(sql::ObExecContext &ctx, ObJsonNode*& json_val); + static int get_json_array(sql::ObExecContext &ctx, ObJsonNode*& json_val); static int get_json_boolean(sql::ObExecContext &ctx, ObObj &data, ObJsonNode*& json_val); static int get_json_null(sql::ObExecContext &ctx, ObJsonNode*& json_val); static int set_on_error(sql::ObExecContext &ctx, sql::ParamStore ¶ms, common::ObObj &result); diff --git a/src/rootserver/ob_ddl_service.cpp b/src/rootserver/ob_ddl_service.cpp index a441c6feab..3246043891 100755 --- a/src/rootserver/ob_ddl_service.cpp +++ b/src/rootserver/ob_ddl_service.cpp @@ -8730,6 +8730,7 @@ int ObDDLService::fill_new_column_attributes( new_column_schema.set_extended_type_info(alter_column_schema.get_extended_type_info()); new_column_schema.set_srs_id(alter_column_schema.get_srs_id()); new_column_schema.set_skip_index_attr(alter_column_schema.get_skip_index_attr().get_packed_value()); + new_column_schema.set_lob_chunk_size(alter_column_schema.get_lob_chunk_size()); if (OB_FAIL(new_column_schema.get_local_session_var().deep_copy(alter_column_schema.get_local_session_var()))) { LOG_WARN("deep copy local session vars failed", K(ret)); } diff --git a/src/share/CMakeLists.txt b/src/share/CMakeLists.txt index 96cebd0b71..1f07c263b3 100644 --- a/src/share/CMakeLists.txt +++ b/src/share/CMakeLists.txt @@ -250,6 +250,7 @@ ob_set_subtarget(ob_share common_mixed system_variable/ob_system_variable_factory.cpp system_variable/ob_system_variable_init.cpp ob_lob_access_utils.cpp + ob_json_access_utils.cpp table/ob_table.cpp table/ob_table_rpc_struct.cpp table/ob_table_load_define.cpp diff --git a/src/share/datum/ob_datum_cmp_func_def.h b/src/share/datum/ob_datum_cmp_func_def.h index f5b56c67c5..bee82eedb2 100644 --- a/src/share/datum/ob_datum_cmp_func_def.h +++ b/src/share/datum/ob_datum_cmp_func_def.h @@ -365,8 +365,8 @@ struct ObDatumJsonCmp : public ObDefined<> } else if (OB_FAIL(r_instr_iter.get_full_data(r_data))) { COMMON_LOG(WARN, "Lob: get right lob str iter full data failed ", K(ret), K(r_instr_iter)); } else { - ObJsonBin j_bin_l(l_data.ptr(), l_data.length()); - ObJsonBin j_bin_r(r_data.ptr(), r_data.length()); + ObJsonBin j_bin_l(l_data.ptr(), l_data.length(), &allocator); + ObJsonBin j_bin_r(r_data.ptr(), r_data.length(), &allocator); ObIJsonBase *j_base_l = &j_bin_l; ObIJsonBase *j_base_r = &j_bin_r; diff --git a/src/share/datum/ob_datum_funcs.cpp b/src/share/datum/ob_datum_funcs.cpp index 7c1aeb9df3..b3e3f2c244 100644 --- a/src/share/datum/ob_datum_funcs.cpp +++ b/src/share/datum/ob_datum_funcs.cpp @@ -523,7 +523,7 @@ struct DatumJsonHashCalculator : public DefHashMethod } else if (OB_FAIL(str_iter.get_full_data(j_bin_str))) { LOG_WARN("Lob: str iter get full data failed ", K(ret), K(str_iter)); } else { - ObJsonBin j_bin(j_bin_str.ptr(), j_bin_str.length()); + ObJsonBin j_bin(j_bin_str.ptr(), j_bin_str.length(), &allocator); ObIJsonBase *j_base = &j_bin; if (j_bin_str.length() == 0) { res = seed; diff --git a/src/share/inner_table/sys_package/json_object_type.sql b/src/share/inner_table/sys_package/json_object_type.sql index 1dd7ec2f4a..56a27a0062 100644 --- a/src/share/inner_table/sys_package/json_object_type.sql +++ b/src/share/inner_table/sys_package/json_object_type.sql @@ -52,6 +52,7 @@ CREATE OR REPLACE TYPE JSON_OBJECT_T OID '300024' AS OPAQUE MEMBER PROCEDURE put(SELF IN OUT NOCOPY JSON_OBJECT_T, key VARCHAR2, value BOOLEAN), MEMBER PROCEDURE put(SELF IN OUT NOCOPY JSON_OBJECT_T, key VARCHAR2, value JSON_OBJECT_T), MEMBER PROCEDURE put(SELF IN OUT NOCOPY JSON_OBJECT_T, key VARCHAR2, value JSON_ELEMENT_T), + MEMBER PROCEDURE put(SELF IN OUT NOCOPY JSON_OBJECT_T, key VARCHAR2, value JSON_ARRAY_T), MEMBER PROCEDURE put(SELF IN OUT NOCOPY JSON_OBJECT_T, key VARCHAR2, value VARCHAR2), MEMBER PROCEDURE put(SELF IN OUT NOCOPY JSON_OBJECT_T, key VARCHAR2, value NUMBER), MEMBER PROCEDURE put(SELF IN OUT NOCOPY JSON_OBJECT_T, key VARCHAR2, value DATE), diff --git a/src/share/inner_table/sys_package/json_object_type_body.sql b/src/share/inner_table/sys_package/json_object_type_body.sql index 8cd941ca95..4b96535ee0 100644 --- a/src/share/inner_table/sys_package/json_object_type_body.sql +++ b/src/share/inner_table/sys_package/json_object_type_body.sql @@ -104,22 +104,22 @@ CREATE OR REPLACE TYPE BODY JSON_OBJECT_T AS MEMBER FUNCTION get_Boolean(SELF IN OUT NOCOPY JSON_OBJECT_T, key VARCHAR2) return BOOLEAN; PRAGMA INTERFACE(c, JSON_OBJECT_GET_BOOLEAN); - MEMBER FUNCTION get_Clob(SELF IN OUT NOCOPY JSON_OBJECT_T, key VARCHAR2) return CLOB; + MEMBER FUNCTION get_Clob(self IN OUT NOCOPY JSON_OBJECT_T, key VARCHAR2) return CLOB; PRAGMA INTERFACE(c, JSON_OBJECT_GET_CLOB); - MEMBER FUNCTION get_Blob(SELF IN OUT NOCOPY JSON_OBJECT_T, key VARCHAR2) return BLOB; + MEMBER FUNCTION get_Blob(self IN OUT NOCOPY JSON_OBJECT_T, key VARCHAR2) return BLOB; PRAGMA INTERFACE(c, JSON_OBJECT_GET_BLOB); - MEMBER FUNCTION get_Object(SELF IN OUT NOCOPY JSON_OBJECT_T, key VARCHAR2) return JSON_OBJECT_T; + MEMBER FUNCTION get_Object(self IN OUT NOCOPY JSON_OBJECT_T, key VARCHAR2) return JSON_OBJECT_T; PRAGMA INTERFACE(c, JSON_OBJECT_GET_OBJECT); - MEMBER FUNCTION get(SELF IN OUT NOCOPY JSON_OBJECT_T, key VARCHAR2) return JSON_ELEMENT_T; + MEMBER FUNCTION get(self IN OUT NOCOPY JSON_OBJECT_T, key VARCHAR2) return JSON_ELEMENT_T; PRAGMA INTERFACE(c, JSON_OBJECT_GET_ELEMENT); - MEMBER PROCEDURE get_Clob(SELF IN OUT NOCOPY JSON_OBJECT_T, key VARCHAR2, c IN OUT CLOB); + MEMBER PROCEDURE get_Clob(self IN OUT NOCOPY JSON_OBJECT_T, key VARCHAR2, c IN OUT CLOB); PRAGMA INTERFACE(c, JSON_OBJECT_GET_CLOB_PROC); - MEMBER PROCEDURE get_Blob(SELF IN OUT NOCOPY JSON_OBJECT_T, key VARCHAR2, c IN OUT BLOB); + MEMBER PROCEDURE get_Blob(self IN OUT NOCOPY JSON_OBJECT_T, key VARCHAR2, c IN OUT BLOB); PRAGMA INTERFACE(c, JSON_OBJECT_GET_BLOB_PROC); MEMBER PROCEDURE put(SELF IN OUT NOCOPY JSON_OBJECT_T, key VARCHAR2, value BOOLEAN); @@ -131,6 +131,9 @@ CREATE OR REPLACE TYPE BODY JSON_OBJECT_T AS MEMBER PROCEDURE put(SELF IN OUT NOCOPY JSON_OBJECT_T, key VARCHAR2, value JSON_ELEMENT_T); PRAGMA INTERFACE(c, JSON_OBJECT_PUT); + MEMBER PROCEDURE put(SELF IN OUT NOCOPY JSON_OBJECT_T, key VARCHAR2, value JSON_ARRAY_T); + PRAGMA INTERFACE(c, JSON_OBJECT_PUT); + MEMBER PROCEDURE put(SELF IN OUT NOCOPY JSON_OBJECT_T, key VARCHAR2, value VARCHAR2); PRAGMA INTERFACE(c, JSON_OBJECT_PUT_VARCHAR); diff --git a/src/share/inner_table/sys_package/xml_sequence_type.sql b/src/share/inner_table/sys_package/xml_sequence_type.sql index 4ff8ca6763..b7408fefea 100644 --- a/src/share/inner_table/sys_package/xml_sequence_type.sql +++ b/src/share/inner_table/sys_package/xml_sequence_type.sql @@ -1,3 +1,3 @@ CREATE OR REPLACE TYPE XMLSEQUENCE OID '300026' AS VARRAY(32767) OF xmltype; -// \ No newline at end of file +// diff --git a/src/share/ob_admin_dump_helper.cpp b/src/share/ob_admin_dump_helper.cpp index 544dd448d1..c80e83a539 100644 --- a/src/share/ob_admin_dump_helper.cpp +++ b/src/share/ob_admin_dump_helper.cpp @@ -209,6 +209,7 @@ void ObLogStat::reset() total_tx_redo_log_count_ = 0; normal_row_count_ = 0; table_lock_count_ = 0; + ext_info_log_count_ = 0; } int64_t ObLogStat::total_size() const diff --git a/src/share/ob_admin_dump_helper.h b/src/share/ob_admin_dump_helper.h index 1f8758b029..5b83ac39ab 100644 --- a/src/share/ob_admin_dump_helper.h +++ b/src/share/ob_admin_dump_helper.h @@ -137,6 +137,7 @@ struct ObLogStat int64_t total_tx_redo_log_count_; int64_t normal_row_count_; int64_t table_lock_count_; + int64_t ext_info_log_count_; TO_STRING_KV( @@ -154,7 +155,8 @@ struct ObLogStat K(total_tx_log_count_), K(total_tx_redo_log_count_), K(normal_row_count_), - K(table_lock_count_)); + K(table_lock_count_), + K(ext_info_log_count_)); }; struct ObAdminMutatorStringArg diff --git a/src/share/ob_errno.cpp b/src/share/ob_errno.cpp index a86b21205c..fe370b0412 100644 --- a/src/share/ob_errno.cpp +++ b/src/share/ob_errno.cpp @@ -21495,8 +21495,8 @@ static const _error _error_OB_ERR_INVALID_XPATH_EXPRESSION = { .error_solution = "Contact OceanBase Support", .mysql_errno = ER_UNKNOWN_ERROR, .sqlstate = "42000", - .str_error = "invalid xpath expression", - .str_user_error = "invalid xpath expression", + .str_error = "XPATH syntax error: ''", + .str_user_error = "XPATH syntax error: ''", .oracle_errno = 31013, .oracle_str_error = "ORA-31013: invalid xpath expression", .oracle_str_user_error = "ORA-31013: invalid xpath expression" @@ -21555,8 +21555,8 @@ static const _error _error_OB_XPATH_EXPRESSION_UNSUPPORTED = { .error_solution = "Contact OceanBase Support", .mysql_errno = ER_UNKNOWN_ERROR, .sqlstate = "42000", - .str_error = "Given XPATH expression not supported", - .str_user_error = "Given XPATH expression not supported", + .str_error = "Only constant XPATH queries are supported", + .str_user_error = "Only constant XPATH queries are supported", .oracle_errno = 31012, .oracle_str_error = "ORA-31012: Given XPATH expression not supported", .oracle_str_user_error = "ORA-31012: Given XPATH expression not supported" @@ -21843,11 +21843,11 @@ static const _error _error_OB_ERR_XML_PARENT_ALREADY_CONTAINS_CHILD = { .error_solution = "Contact OceanBase Support", .mysql_errno = -1, .sqlstate = "42000", - .str_error = "Parent %.*s already contains child entry %.*s", - .str_user_error = "Parent %.*s already contains child entry %.*s", + .str_error = "Parent %.*s already contains child entry %s%.*s", + .str_user_error = "Parent %.*s already contains child entry %s%.*s", .oracle_errno = 31003, - .oracle_str_error = "ORA-31003: Parent %.*s already contains child entry %.*s", - .oracle_str_user_error = "ORA-31003: Parent %.*s already contains child entry %.*s" + .oracle_str_error = "ORA-31003: Parent %.*s already contains child entry %s%.*s", + .oracle_str_user_error = "ORA-31003: Parent %.*s already contains child entry %s%.*s" }; static const _error _error_OB_SERVER_IS_INIT = { .error_name = "OB_SERVER_IS_INIT", @@ -23871,8 +23871,8 @@ static const _error _error_OB_ERR_XML_PARSE = { .error_solution = "Contact OceanBase Support", .mysql_errno = ER_WRONG_VALUE, .sqlstate = "HY000", - .str_error = "XML parsing failed", - .str_user_error = "XML parsing failed", + .str_error = "Incorrect XML value", + .str_user_error = "Incorrect XML value", .oracle_errno = 31011, .oracle_str_error = "ORA-31011: XML parsing failed", .oracle_str_user_error = "ORA-31011: XML parsing failed" diff --git a/src/share/ob_errno.def b/src/share/ob_errno.def index d039e76432..dac5531d60 100755 --- a/src/share/ob_errno.def +++ b/src/share/ob_errno.def @@ -2082,12 +2082,12 @@ DEFINE_ORACLE_ERROR_DEP(OB_ERR_VALUE_NOT_ALLOWED, -7297, -1, "HY000", "value not DEFINE_ORACLE_ERROR_EXT(OB_ERR_INVALID_XML_DATATYPE, -7402, -1, "22000", "Invalid data type for the operation", "inconsistent datatypes: expected %s got %s", 932, "inconsistent datatypes", "inconsistent datatypes: expected %s got %s"); DEFINE_ORACLE_ERROR(OB_ERR_XML_MISSING_COMMA, -7403, -1, "42000", "missing comma", 917, "missing comma"); -DEFINE_ORACLE_ERROR(OB_ERR_INVALID_XPATH_EXPRESSION, -7404, ER_UNKNOWN_ERROR, "42000", "invalid xpath expression", 31013, "invalid xpath expression"); +DEFINE_ORACLE_ERROR(OB_ERR_INVALID_XPATH_EXPRESSION, -7404, ER_UNKNOWN_ERROR, "42000", "XPATH syntax error: ''", 31013, "invalid xpath expression"); DEFINE_ORACLE_ERROR(OB_ERR_EXTRACTVALUE_MULTI_NODES, -7405, -1, "42000", "EXTRACTVALUE cannot extract values of multiple nodes", 19025, "EXTRACTVALUE cannot extract values of multiple nodes"); DEFINE_ORACLE_ERROR(OB_ERR_XML_FRAMENT_CONVERT, -7406, -1, "42000", "Cannot convert XML fragment to the required datatype", 19012, "Cannot convert XML fragment to the required datatype"); DEFINE_ORACLE_ERROR(OB_INVALID_PRINT_OPTION, -7407, -1, "42000", "The specified printing option is invalid", 31188, "The specified printing option is invalid"); DEFINE_ORACLE_ERROR(OB_XML_CHAR_LEN_TOO_SMALL, -7408, -1, "42000", "character length specified for XMLSerialize is too small.", 19044, "character length specified for XMLSerialize is too small."); -DEFINE_ORACLE_ERROR(OB_XPATH_EXPRESSION_UNSUPPORTED, -7409, ER_UNKNOWN_ERROR, "42000", "Given XPATH expression not supported", 31012, "Given XPATH expression not supported"); +DEFINE_ORACLE_ERROR(OB_XPATH_EXPRESSION_UNSUPPORTED, -7409, ER_UNKNOWN_ERROR, "42000", "Only constant XPATH queries are supported", 31012, "Given XPATH expression not supported"); DEFINE_ORACLE_ERROR(OB_EXTRACTVALUE_NOT_LEAF_NODE, -7410, -1, "42000", "EXTRACTVALUE can only retrieve value of leaf node", 19026, "EXTRACTVALUE can only retrieve value of leaf node"); DEFINE_ORACLE_ERROR(OB_XML_INSERT_FRAGMENT, -7411, -1, "42000", "Cannot insert XML fragments", 19010, "Cannot insert XML fragments"); DEFINE_ORACLE_ERROR(OB_ERR_NO_ORDER_MAP_SQL, -7412, -1, "42000", "cannot ORDER objects without MAP or ORDER method", 22950, "cannot ORDER objects without MAP or ORDER method"); @@ -2111,7 +2111,7 @@ DEFINE_ORACLE_ERROR(OB_ERR_INVALID_XML_CHILD_NAME, -7429, -1, "42000", "The docu DEFINE_ORACLE_ERROR(OB_ERR_XML_NOT_SUPPORT_OPERATION, -7430, -1, "42000", "XML node '' (type=%s) does not support this operation", 31195, "XML node '' (type=%s) does not support this operation"); DEFINE_ORACLE_ERROR_EXT_DEP(OB_ERR_DUP_DEF_NAMESPACE, -7431, -1, "42000", "XQST0066 - duplicate default namespace definition - string", "XQST0066 - duplicate default namespace definition - %s.", 19118, "XQST0066 - duplicate default namespace definition - string.", "XQST0066 - duplicate default namespace definition - %s."); DEFINE_ORACLE_ERROR(OB_ERR_COMPARE_VARRAY_LOB_ATTR, -7432, -1, "42000", "cannot compare VARRAY or LOB attributes of an object type", 22901, "cannot compare VARRAY or LOB attributes of an object type"); -DEFINE_ORACLE_ERROR(OB_ERR_XML_PARENT_ALREADY_CONTAINS_CHILD, -7433, -1, "42000", "Parent %.*s already contains child entry %.*s", 31003, "Parent %.*s already contains child entry %.*s"); +DEFINE_ORACLE_ERROR(OB_ERR_XML_PARENT_ALREADY_CONTAINS_CHILD, -7433, -1, "42000", "Parent %.*s already contains child entry %s%.*s", 31003, "Parent %.*s already contains child entry %s%.*s"); // 余留位置 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// @@ -2360,7 +2360,7 @@ DEFINE_ORACLE_ERROR(OB_ERR_WHEN_CLAUSE, -9545, -1, "HY000", "WHEN clause cannot DEFINE_ORACLE_ERROR(OB_ERR_NEW_OLD_REFERENCES, -9546, -1, "HY000", "NEW or OLD references not allowed in table level triggers", 4082, "NEW or OLD references not allowed in table level triggers"); DEFINE_PLS_ERROR_EXT(OB_ERR_TYPE_DECL_ILLEGAL, -9547, -1, "HY000", "%%TYPE must be applied to a variable, column, field or attribute, not to other", "%%TYPE must be applied to a variable, column, field or attribute, not to '%.*s'", 206, "%%TYPE must be applied to a variable, column, field or attribute, not to other", "%%TYPE must be applied to a variable, column, field or attribute, not to '%.*s'"); DEFINE_PLS_ERROR_EXT(OB_ERR_OBJECT_INVALID, -9548, -1, "HY000", "object is invalid", "object '%.*s' is invalid", 905, "object is invalid", "object '%.*s' is invalid"); -DEFINE_ORACLE_ERROR_DEP(OB_ERR_XML_PARSE, -9549, ER_WRONG_VALUE, "HY000", "XML parsing failed", 31011, "XML parsing failed"); +DEFINE_ORACLE_ERROR_DEP(OB_ERR_XML_PARSE, -9549, ER_WRONG_VALUE, "HY000", "Incorrect XML value", 31011, "XML parsing failed"); DEFINE_PLS_ERROR_EXT(OB_ERR_EXP_NOT_ASSIGNABLE, -9550, -1, "HY000", "expression cannot be used as an assignment", "expression '%.*s' cannot be used as an assignment", 363, "expression cannot be used as an assignment", "expression '%.*s' cannot be used as an assignment"); DEFINE_ORACLE_ERROR(OB_ERR_CURSOR_CONTAIN_BOTH_REGULAR_AND_ARRAY, -9551, -1, "HY000", "Cursor contains both regular and array defines which is illegal", 29256, "Cursor contains both regular and array defines which is illegal"); DEFINE_PLS_ERROR(OB_ERR_STATIC_BOOL_EXPR, -9552, -1, "HY000", "a static boolean expression must be used", 174, "a static boolean expression must be used"); diff --git a/src/share/ob_errno.h b/src/share/ob_errno.h index 820f129860..e763fae41e 100644 --- a/src/share/ob_errno.h +++ b/src/share/ob_errno.h @@ -3665,12 +3665,12 @@ constexpr int OB_ERR_INVALID_DATE_MSG_FMT_V2 = -4219; #define OB_ERR_VALUE_NOT_ALLOWED__USER_ERROR_MSG "value not allowed" #define OB_ERR_INVALID_XML_DATATYPE__USER_ERROR_MSG "inconsistent datatypes: expected %s got %s" #define OB_ERR_XML_MISSING_COMMA__USER_ERROR_MSG "missing comma" -#define OB_ERR_INVALID_XPATH_EXPRESSION__USER_ERROR_MSG "invalid xpath expression" +#define OB_ERR_INVALID_XPATH_EXPRESSION__USER_ERROR_MSG "XPATH syntax error: ''" #define OB_ERR_EXTRACTVALUE_MULTI_NODES__USER_ERROR_MSG "EXTRACTVALUE cannot extract values of multiple nodes" #define OB_ERR_XML_FRAMENT_CONVERT__USER_ERROR_MSG "Cannot convert XML fragment to the required datatype" #define OB_INVALID_PRINT_OPTION__USER_ERROR_MSG "The specified printing option is invalid" #define OB_XML_CHAR_LEN_TOO_SMALL__USER_ERROR_MSG "character length specified for XMLSerialize is too small." -#define OB_XPATH_EXPRESSION_UNSUPPORTED__USER_ERROR_MSG "Given XPATH expression not supported" +#define OB_XPATH_EXPRESSION_UNSUPPORTED__USER_ERROR_MSG "Only constant XPATH queries are supported" #define OB_EXTRACTVALUE_NOT_LEAF_NODE__USER_ERROR_MSG "EXTRACTVALUE can only retrieve value of leaf node" #define OB_XML_INSERT_FRAGMENT__USER_ERROR_MSG "Cannot insert XML fragments" #define OB_ERR_NO_ORDER_MAP_SQL__USER_ERROR_MSG "cannot ORDER objects without MAP or ORDER method" @@ -3694,7 +3694,7 @@ constexpr int OB_ERR_INVALID_DATE_MSG_FMT_V2 = -4219; #define OB_ERR_XML_NOT_SUPPORT_OPERATION__USER_ERROR_MSG "XML node '' (type=%s) does not support this operation" #define OB_ERR_DUP_DEF_NAMESPACE__USER_ERROR_MSG "XQST0066 - duplicate default namespace definition - %s." #define OB_ERR_COMPARE_VARRAY_LOB_ATTR__USER_ERROR_MSG "cannot compare VARRAY or LOB attributes of an object type" -#define OB_ERR_XML_PARENT_ALREADY_CONTAINS_CHILD__USER_ERROR_MSG "Parent %.*s already contains child entry %.*s" +#define OB_ERR_XML_PARENT_ALREADY_CONTAINS_CHILD__USER_ERROR_MSG "Parent %.*s already contains child entry %s%.*s" #define OB_SERVER_IS_INIT__USER_ERROR_MSG "Server is initializing" #define OB_SERVER_IS_STOPPING__USER_ERROR_MSG "Server is stopping" #define OB_PACKET_CHECKSUM_ERROR__USER_ERROR_MSG "Packet checksum error" @@ -3863,7 +3863,7 @@ constexpr int OB_ERR_INVALID_DATE_MSG_FMT_V2 = -4219; #define OB_ERR_NEW_OLD_REFERENCES__USER_ERROR_MSG "NEW or OLD references not allowed in table level triggers" #define OB_ERR_TYPE_DECL_ILLEGAL__USER_ERROR_MSG "%%TYPE must be applied to a variable, column, field or attribute, not to '%.*s'" #define OB_ERR_OBJECT_INVALID__USER_ERROR_MSG "object '%.*s' is invalid" -#define OB_ERR_XML_PARSE__USER_ERROR_MSG "XML parsing failed" +#define OB_ERR_XML_PARSE__USER_ERROR_MSG "Incorrect XML value" #define OB_ERR_EXP_NOT_ASSIGNABLE__USER_ERROR_MSG "expression '%.*s' cannot be used as an assignment" #define OB_ERR_CURSOR_CONTAIN_BOTH_REGULAR_AND_ARRAY__USER_ERROR_MSG "Cursor contains both regular and array defines which is illegal" #define OB_ERR_STATIC_BOOL_EXPR__USER_ERROR_MSG "a static boolean expression must be used" @@ -5942,7 +5942,7 @@ constexpr int OB_ERR_INVALID_DATE_MSG_FMT_V2 = -4219; #define OB_ERR_XML_NOT_SUPPORT_OPERATION__ORA_USER_ERROR_MSG "ORA-31195: XML node '' (type=%s) does not support this operation" #define OB_ERR_DUP_DEF_NAMESPACE__ORA_USER_ERROR_MSG "ORA-19118: XQST0066 - duplicate default namespace definition - %s." #define OB_ERR_COMPARE_VARRAY_LOB_ATTR__ORA_USER_ERROR_MSG "ORA-22901: cannot compare VARRAY or LOB attributes of an object type" -#define OB_ERR_XML_PARENT_ALREADY_CONTAINS_CHILD__ORA_USER_ERROR_MSG "ORA-31003: Parent %.*s already contains child entry %.*s" +#define OB_ERR_XML_PARENT_ALREADY_CONTAINS_CHILD__ORA_USER_ERROR_MSG "ORA-31003: Parent %.*s already contains child entry %s%.*s" #define OB_SERVER_IS_INIT__ORA_USER_ERROR_MSG "ORA-00600: internal error code, arguments: -8001, Server is initializing" #define OB_SERVER_IS_STOPPING__ORA_USER_ERROR_MSG "ORA-00600: internal error code, arguments: -8002, Server is stopping" #define OB_PACKET_CHECKSUM_ERROR__ORA_USER_ERROR_MSG "ORA-00600: internal error code, arguments: -8003, Packet checksum error" diff --git a/src/share/ob_json_access_utils.cpp b/src/share/ob_json_access_utils.cpp new file mode 100644 index 0000000000..02be210f50 --- /dev/null +++ b/src/share/ob_json_access_utils.cpp @@ -0,0 +1,53 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + * This file contains implementation for lob_access_utils. + */ + +#define USING_LOG_PREFIX SHARE + +#include "share/ob_json_access_utils.h" +#include "share/ob_cluster_version.h" +#include "share/rc/ob_tenant_base.h" +#include "lib/json_type/ob_json_base.h" + +namespace oceanbase +{ +using namespace common; +namespace share +{ + +int ObJsonWrapper::get_raw_binary(ObIJsonBase *j_base, ObString &result, ObIAllocator *allocator) +{ + INIT_SUCC(ret); + uint64_t tenant_data_version = 0; + uint64_t tenant_id = MTL_ID(); + if (tenant_id == 0) { + tenant_id = OB_SYS_TENANT_ID; + LOG_INFO("get tenant id zero"); + } + if (OB_ISNULL(allocator) || OB_ISNULL(j_base)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("allocator or j_base is null", K(ret), KP(allocator), KP(j_base)); + } else if (OB_FAIL(GET_MIN_DATA_VERSION(tenant_id, tenant_data_version))) { + LOG_WARN("get tenant data version failed", K(ret)); + } else if (! ((DATA_VERSION_4_2_2_0 <= tenant_data_version && tenant_data_version < DATA_VERSION_4_3_0_0) || tenant_data_version >= DATA_VERSION_4_3_1_0)) { + if (OB_FAIL(j_base->get_raw_binary_v0(result, allocator))) { + LOG_WARN("get raw binary fail", K(ret), K(tenant_data_version), K(tenant_id)); + } + } else if (OB_FAIL(j_base->get_raw_binary(result, allocator))) { + LOG_WARN("get raw binary fail", K(ret), K(tenant_data_version), K(tenant_id)); + } + return ret; +} + + +} // end namespace share +} // end namespace oceanbase diff --git a/src/share/ob_json_access_utils.h b/src/share/ob_json_access_utils.h new file mode 100644 index 0000000000..492ac1d702 --- /dev/null +++ b/src/share/ob_json_access_utils.h @@ -0,0 +1,35 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + * This file contains implementation for lob_access_utils. + */ + +#ifndef OCEANBASE_SHARE_OB_JSON_ACCESS_UTILS_ +#define OCEANBASE_SHARE_OB_JSON_ACCESS_UTILS_ + +#include "lib/string/ob_string.h" + +namespace oceanbase +{ +namespace common { +class ObIJsonBase; +class ObIAllocator; +} +namespace share +{ +class ObJsonWrapper +{ +public: + static int get_raw_binary(common::ObIJsonBase *j_base, common::ObString &result, common::ObIAllocator *allocator); +}; + +} // end namespace share +} // end namespace oceanbase +#endif diff --git a/src/share/ob_lob_access_utils.cpp b/src/share/ob_lob_access_utils.cpp index e46b0719a3..45c44b0fff 100644 --- a/src/share/ob_lob_access_utils.cpp +++ b/src/share/ob_lob_access_utils.cpp @@ -89,7 +89,7 @@ int ObTextStringIter::init(uint32_t buffer_len, ret = OB_ERR_UNEXPECTED; COMMON_LOG(WARN,"Lob: invalid lob", K(ret)); } else if (FALSE_IT(is_outrow_ = !locator.has_inrow_data())) { - } else if (!is_outrow_) { // inrow lob always get full data, no need ctx_ + } else if (!is_outrow_ && !locator.is_delta_temp_lob()) { // inrow lob always get full data, no need ctx_ } else if (OB_ISNULL(res_allocator)) { ret = OB_INVALID_ARGUMENT; COMMON_LOG(WARN, "Lob: iter with null allocator", K(ret)); @@ -130,6 +130,9 @@ static int init_lob_access_param(storage::ObLobAccessParam ¶m, } else if (!lob_iter_ctx->locator_.is_persist_lob()) { ret = OB_NOT_IMPLEMENT; COMMON_LOG(WARN, "Lob: outrow temp lob is not supported", K(ret), K(lob_iter_ctx->locator_)); + } else if (lob_iter_ctx->locator_.is_delta_temp_lob()) { + ret = OB_INVALID_ARGUMENT; + COMMON_LOG(WARN, "Lob: is delta lob", K(ret), K(lob_iter_ctx->locator_)); } else if (OB_FAIL(lob_iter_ctx->locator_.get_disk_locator(disk_loc_str))) { COMMON_LOG(WARN, "Lob: get disk locator failed.", K(ret)); } else if (FALSE_IT(disk_loc = reinterpret_cast(disk_loc_str.ptr()))){ @@ -238,6 +241,81 @@ int ObTextStringIter::get_outrow_lob_full_data(ObIAllocator *allocator /*nullptr return ret; } +int ObTextStringIter::get_delta_lob_full_data(ObLobLocatorV2& lob_locator, ObIAllocator *allocator, ObString &data_str) +{ + int ret = OB_SUCCESS; + ObLobCommon *lob_common = nullptr; + ObLobDiffHeader *diff_header = nullptr; + if (! ob_is_json(type_)) { + ret = OB_ERR_UNEXPECTED; + COMMON_LOG(WARN, "only json support", K(ret), K(type_)); + } else if (OB_FAIL(lob_locator.get_disk_locator(lob_common))) { + COMMON_LOG(WARN, "get disk locator failed.", K(ret), K(lob_locator)); + } else if (! lob_common->in_row_) { + ret = OB_ERR_UNEXPECTED; + COMMON_LOG(WARN, "Unsupport out row delta tmp lob locator", K(ret), KPC(lob_common)); + } else if (OB_ISNULL(diff_header = reinterpret_cast(lob_common->buffer_))) { + ret = OB_ERR_UNEXPECTED; + COMMON_LOG(WARN, "diff_header is null", K(ret), KPC(lob_common)); + } else { + char *buf = diff_header->data_; + int64_t data_len = diff_header->persist_loc_size_; + int64_t pos = 0; + ObLobPartialData partial_data; + if (OB_FAIL(partial_data.init())) { + COMMON_LOG(WARN, "map create fail", K(ret)); + } else if (OB_FAIL(partial_data.deserialize(buf, data_len, pos))) { + COMMON_LOG(WARN, "deserialize partial data fail", K(ret), K(data_len), K(pos)); + } else { + storage::ObLobManager* lob_mngr = MTL(storage::ObLobManager*); + storage::ObLobAccessParam param; + ctx_->locator_ = partial_data.locator_; + if (OB_FAIL(init_lob_access_param(param, ctx_, cs_type_, allocator))) { + COMMON_LOG(WARN, "init_lob_access_param fail", K(ret)); + } else if (!param.ls_id_.is_valid() || !param.tablet_id_.is_valid()) { + ret = OB_INVALID_ARGUMENT; + COMMON_LOG(WARN, "Lob: invalid param.", K(ret), K(param)); + } else if ((param.len_ = param.byte_size_) <= 0) { + ret = OB_ERR_UNEXPECTED; + COMMON_LOG(WARN,"Lob: calc byte size is negative.", K(ret), K(param)); + } else if (param.byte_size_ > OB_MAX_LONGTEXT_LENGTH) { + ret = OB_ERR_UNEXPECTED; + COMMON_LOG(WARN,"Lob: unable to read full data over 512M lob.", K(ret), K(param)); + } else if (partial_data.data_length_ > OB_MAX_LONGTEXT_LENGTH) { + ret = OB_ERR_UNEXPECTED; + COMMON_LOG(WARN,"Lob: unable to read full data over 512M lob.", K(ret), K(param), K(partial_data)); + } else { + ctx_->total_byte_len_ = partial_data.data_length_; + ctx_->buff_byte_len_ = static_cast(partial_data.data_length_); + ctx_->buff_ = static_cast(ctx_->alloc_->alloc(ctx_->buff_byte_len_)); + ObString output_data; + if (OB_ISNULL(ctx_->buff_)) { + ret = OB_ALLOCATE_MEMORY_FAILED; + COMMON_LOG(WARN,"Lob: failed to alloc output buffer", + K(ret), KP(ctx_->buff_), K(ctx_->buff_byte_len_)); + } else { + output_data.assign_buffer(ctx_->buff_, ctx_->buff_byte_len_); + if (OB_FAIL(lob_mngr->query(param, output_data))) { + COMMON_LOG(WARN,"Lob: falied to query lob tablets.", K(ret), K(param)); + } else { + output_data.set_length(static_cast(partial_data.data_length_)); + for(int32_t i = 0; OB_SUCC(ret) && i < partial_data.index_.count(); ++i) { + ObLobChunkIndex &idx = partial_data.index_[i]; + if (1 == idx.is_modified_ || 1 == idx.is_add_) { + ObLobChunkData &chunk_data = partial_data.data_[idx.data_idx_]; + MEMCPY(output_data.ptr() + idx.offset_, chunk_data.data_.ptr() + idx.pos_, idx.byte_len_); + } + } + ctx_->content_byte_len_ = output_data.length(); + data_str = output_data; + } + } + } + } + } + return ret; +} + int ObTextStringIter::get_outrow_prefix_data(uint32_t prefix_char_len) { int ret = OB_SUCCESS; @@ -304,6 +382,7 @@ int ObTextStringIter::get_current_block(ObString &str) int ObTextStringIter::get_full_data(ObString &data_str) { int ret = OB_SUCCESS; + ObLobLocatorV2 loc(datum_str_, has_lob_header_); if (!is_init_ || state_ != TEXTSTRING_ITER_INIT) { ret = OB_INVALID_ARGUMENT; COMMON_LOG(WARN, "Lob: iter state error", K(ret), K(is_init_), K(state_)); @@ -312,8 +391,11 @@ int ObTextStringIter::get_full_data(ObString &data_str) COMMON_LOG(DEBUG, "Lob: iter with null input", K(ret), K(*this)); } else if (!is_lob_ || !has_lob_header_) { // string types or 4.0 compatiable text data_str.assign_ptr(datum_str_.ptr(), datum_str_.length()); + } else if (loc.is_delta_temp_lob()) { + if (OB_FAIL(get_delta_lob_full_data(loc, tmp_alloc_, data_str))) { + COMMON_LOG(WARN, "get_delta_lob_full_data fail", K(ret), K(loc)); + } } else if (!is_outrow_) { // inrow lob - ObLobLocatorV2 loc(datum_str_, has_lob_header_); if (OB_FAIL(loc.get_inrow_data(data_str))) { COMMON_LOG(WARN, "Lob: get lob inrow data failed", K(ret)); } @@ -1344,5 +1426,112 @@ int ObTextStringResult::ob_convert_datum_temporay_lob(ObDatum &datum, return ret; } +int64_t ObDeltaLob::get_serialize_size() const +{ + int64_t size = 0; + // header size + size += get_header_serialize_size(); + // updated lob size + size += get_partial_data_serialize_size(); + // lob diff size + size += get_lob_diff_serialize_size(); + return size; +} + +int64_t ObDeltaLob::get_header_serialize_size() const +{ + int64_t size = 0; + // ObMemLobCommon + size += sizeof(ObMemLobCommon); + // ObLobCommon; + size += sizeof(ObLobCommon); + // ObLobDiffHeader + size += sizeof(ObLobDiffHeader); + return size; +} + +int ObDeltaLob::serialize(char* buf, const int64_t buf_len, int64_t& pos) const +{ + int ret = OB_SUCCESS; + ObLobDiffHeader *diff_header = nullptr; + if (OB_FAIL(serialize_header(buf, buf_len, pos, diff_header))) { + LOG_WARN("serialize_header fail", KR(ret), K(buf_len), K(pos), KP(buf)); + } else if (OB_FAIL(serialize_partial_data(buf, buf_len, pos))) { + LOG_WARN("serialize_partial_data fail", KR(ret), K(buf_len), K(pos), KP(buf)); + } else if (OB_FAIL(serialize_lob_diffs(buf, buf_len, diff_header))) { + LOG_WARN("serialize_lob_diffs fail", KR(ret), K(buf_len), K(pos), KP(buf)); + } + return ret; +} + +int ObDeltaLob::serialize_header(char* buf, const int64_t buf_len, int64_t& pos, ObLobDiffHeader *&diff_header) const +{ + int ret = OB_SUCCESS; + int64_t size = get_header_serialize_size(); + if (pos + size > buf_len) { + ret = OB_SIZE_OVERFLOW; + LOG_WARN("buffer not enough", KR(ret), K(pos), K(size), K(buf_len), KP(buf)); + } else { + ObMemLobCommon *mem_common = new (buf + pos) ObMemLobCommon(ObMemLobType::TEMP_DELTA_LOB, false); + ObLobCommon *lob_common = new (mem_common->data_) ObLobCommon(); + diff_header = new (lob_common->buffer_) ObLobDiffHeader(); + diff_header->diff_cnt_ = get_lob_diff_cnt(); + diff_header->persist_loc_size_ = static_cast(get_partial_data_serialize_size()); + pos += size; + } + return ret; +} + +int ObDeltaLob::deserialize(const ObLobLocatorV2 &lob_locator) +{ + int ret = OB_SUCCESS; + ObLobCommon *lob_common = nullptr; + ObLobDiffHeader *diff_header = nullptr; + if (! lob_locator.is_delta_temp_lob()) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("input not delta tmp lob", KR(ret), K(lob_locator)); + } else if (OB_FAIL(lob_locator.get_disk_locator(lob_common))) { + LOG_WARN("get disk locator failed.", K(ret), K(lob_locator)); + } else if (OB_ISNULL(diff_header = reinterpret_cast(lob_common->buffer_))){ + ret = OB_ERR_UNEXPECTED; + } else if (OB_FAIL(deserialize_partial_data(diff_header))) { + LOG_WARN("deserialize_partial_data fail", KR(ret), K(lob_locator), KPC(diff_header)); + } else if (OB_FAIL(deserialize_lob_diffs(lob_locator.ptr_, lob_locator.size_, diff_header))) { + LOG_WARN("deserialize_lob_diffs fail", KR(ret), K(lob_locator), KPC(diff_header)); + } + return ret; +} + +int ObDeltaLob::has_diff(const ObLobLocatorV2 &locator, int64_t &res) +{ + int ret = OB_SUCCESS; + bool bres = false; + if (OB_FAIL(has_diff(locator, bres))) { + LOG_WARN("fail", KR(ret), K(locator)); + } else { + res = bres; + } + return ret; +} + +int ObDeltaLob::has_diff(const ObLobLocatorV2 &locator, bool &res) +{ + int ret = OB_SUCCESS; + ObLobCommon *lob_common = nullptr; + if (! locator.is_delta_temp_lob()) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("not delta lob", K(ret), K(locator)); + } else if (OB_FAIL(locator.get_disk_locator(lob_common))) { + LOG_WARN("get disk locator failed.", KR(ret), K(locator)); + } else if (! lob_common->in_row_) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("Unsupport out row delta tmp lob locator", KR(ret), KPC(lob_common), K(locator)); + } else { + ObLobDiffHeader *diff_header = reinterpret_cast(lob_common->buffer_); + res = diff_header->diff_cnt_ > 0; + } + return ret; +} + } } diff --git a/src/share/ob_lob_access_utils.h b/src/share/ob_lob_access_utils.h index 334b1a5c58..5c4848c9ab 100644 --- a/src/share/ob_lob_access_utils.h +++ b/src/share/ob_lob_access_utils.h @@ -29,6 +29,7 @@ class ObBasicSessionInfo; namespace storage { class ObLobQueryIter; + class ObLobDiffHeader; } // namespace storage namespace common { @@ -129,6 +130,7 @@ public: if (is_lob_storage(type)) { validate_has_lob_header(has_lob_header_); } + cs_type_ = ob_is_json(type) ? CS_TYPE_BINARY : cs_type_; } ObTextStringIter(const ObObj &obj) : @@ -139,6 +141,7 @@ public: if (is_lob_storage(obj.get_type())) { validate_has_lob_header(has_lob_header_); } + cs_type_ = ob_is_json(obj.get_type()) ? CS_TYPE_BINARY : cs_type_; } ~ObTextStringIter(); @@ -193,6 +196,7 @@ public: private: int get_outrow_lob_full_data(ObIAllocator *allocator = nullptr); + int get_delta_lob_full_data(ObLobLocatorV2& lob_locator, ObIAllocator *allocator, ObString &data); int get_first_block(ObString &str); int get_next_block_inner(ObString &str); int get_outrow_prefix_data(uint32_t prefix_char_len); @@ -329,6 +333,28 @@ OB_INLINE bool ob_is_empty_lob(const ObObj &obj) return bret; } +class ObDeltaLob { +public: + static int has_diff(const ObLobLocatorV2 &locator, int64_t &res); + static int has_diff(const ObLobLocatorV2 &locator, bool &res); + +public: + int64_t get_serialize_size() const; + int64_t get_header_serialize_size() const; + virtual int64_t get_partial_data_serialize_size() const = 0; + virtual int64_t get_lob_diff_serialize_size() const = 0; + virtual uint32_t get_lob_diff_cnt() const = 0; + + int serialize(char* buf, const int64_t buf_len, int64_t& pos) const; + int serialize_header(char* buf, const int64_t buf_len, int64_t& pos, storage::ObLobDiffHeader *&diff_header) const; + virtual int serialize_partial_data(char* buf, const int64_t buf_len, int64_t& pos) const = 0; + virtual int serialize_lob_diffs(char* buf, const int64_t buf_len, storage::ObLobDiffHeader *diff_header) const = 0; + + int deserialize(const ObLobLocatorV2 &delta_lob); + virtual int deserialize_partial_data(storage::ObLobDiffHeader *diff_header) = 0; + virtual int deserialize_lob_diffs(char* buf, const int64_t buf_len, storage::ObLobDiffHeader *diff_header) = 0; +}; + } // end namespace common } // end namespace oceanbase diff --git a/src/share/object/ob_obj_cast.cpp b/src/share/object/ob_obj_cast.cpp index 73134bbf5f..c6ffd46027 100644 --- a/src/share/object/ob_obj_cast.cpp +++ b/src/share/object/ob_obj_cast.cpp @@ -22,6 +22,7 @@ #include "lib/worker.h" #include "share/object/ob_obj_cast_util.h" #include "share/rc/ob_tenant_base.h" +#include "share/ob_json_access_utils.h" #include "common/sql_mode/ob_sql_mode_utils.h" #include "observer/omt/ob_tenant_srs.h" #include "lib/json_type/ob_json_tree.h" @@ -41,10 +42,8 @@ #ifdef OB_BUILD_ORACLE_PL #include "pl/sys_package/ob_sdo_geometry.h" #endif -#ifdef OB_BUILD_ORACLE_XML #include "lib/xml/ob_xml_util.h" #include "lib/xml/ob_xml_parser.h" -#endif // from sql_parser_base.h #define DEFAULT_STR_LENGTH -1 @@ -146,7 +145,7 @@ static int cast_extend_types_not_support(const ObObjType expect_type, UNUSED(out); UNUSED(cast_mode); int ret = OB_SUCCESS; -#ifdef OB_BUILD_ORACLE_XML +#ifdef OB_BUILD_ORACLE_PL if (in.is_pl_extend()) { if (pl::PL_OPAQUE_TYPE == in.get_meta().get_extend_type()) { pl::ObPLOpaque *pl_src = reinterpret_cast(in.get_ext()); @@ -1551,7 +1550,7 @@ static int int_json(const ObObjType expect_type, ObObjCastParams ¶ms, ObJsonInt j_int(in.get_int()); ObIJsonBase *j_base = &j_int; ObString raw_bin; - if (OB_FAIL(j_base->get_raw_binary(raw_bin, params.allocator_v2_))) { + if (OB_FAIL(ObJsonWrapper::get_raw_binary(j_base, raw_bin, params.allocator_v2_))) { LOG_WARN("fail to get int json binary", K(ret), K(in), K(expect_type), K(*j_base)); } else if (OB_FAIL(set_json_bin_res(¶ms, &out, raw_bin))) { LOG_WARN("fail to fill json bin lob locator", K(ret)); @@ -1974,7 +1973,7 @@ static int uint_json(const ObObjType expect_type, ObObjCastParams ¶ms, ObJsonUint j_uint(in.get_uint64()); ObIJsonBase *j_base = &j_uint; ObString raw_bin; - if (OB_FAIL(j_base->get_raw_binary(raw_bin, params.allocator_v2_))) { + if (OB_FAIL(ObJsonWrapper::get_raw_binary(j_base, raw_bin, params.allocator_v2_))) { LOG_WARN("fail to get uint json binary", K(ret), K(in), K(expect_type), K(*j_base)); } else if (OB_FAIL(set_json_bin_res(¶ms, &out, raw_bin))) { LOG_WARN("fail to fill json bin lob locator", K(ret)); @@ -2423,7 +2422,7 @@ static int float_json(const ObObjType expect_type, ObObjCastParams ¶ms, ObJsonDouble j_double(in.get_float()); ObIJsonBase *j_base = &j_double; ObString raw_bin; - if (OB_FAIL(j_base->get_raw_binary(raw_bin, params.allocator_v2_))) { + if (OB_FAIL(ObJsonWrapper::get_raw_binary(j_base, raw_bin, params.allocator_v2_))) { LOG_WARN("fail to get float json binary", K(ret), K(in), K(expect_type), K(*j_base)); } else if (OB_FAIL(set_json_bin_res(¶ms, &out, raw_bin))) { LOG_WARN("fail to fill json bin lob locator", K(ret)); @@ -2896,7 +2895,7 @@ static int double_json(const ObObjType expect_type, ObObjCastParams ¶ms, ObJsonDouble j_double(in.get_double()); ObIJsonBase *j_base = &j_double; ObString raw_bin; - if (OB_FAIL(j_base->get_raw_binary(raw_bin, params.allocator_v2_))) { + if (OB_FAIL(ObJsonWrapper::get_raw_binary(j_base, raw_bin, params.allocator_v2_))) { LOG_WARN("fail to get double json binary", K(ret), K(in), K(expect_type), K(*j_base)); } else if (OB_FAIL(set_json_bin_res(¶ms, &out, raw_bin))) { LOG_WARN("fail to fill json bin lob locator", K(ret)); @@ -3472,7 +3471,7 @@ static int number_json(const ObObjType expect_type, ObObjCastParams ¶ms, ObJsonDecimal j_dec(nmb, -1, in.get_scale()); ObIJsonBase *j_base = &j_dec; ObString raw_bin; - if (OB_FAIL(j_base->get_raw_binary(raw_bin, params.allocator_v2_))) { + if (OB_FAIL(ObJsonWrapper::get_raw_binary(j_base, raw_bin, params.allocator_v2_))) { LOG_WARN("fail to get decimal json binary", K(ret), K(in), K(expect_type), K(*j_base)); } else if (OB_FAIL(set_json_bin_res(¶ms, &out, raw_bin))) { LOG_WARN("fail to fill json bin lob locator", K(ret)); @@ -3921,7 +3920,7 @@ static int datetime_json(const ObObjType expect_type, ObObjCastParams ¶ms, ObJsonDatetime j_datetime(j_type, ob_time); ObIJsonBase *j_base = &j_datetime; ObString raw_bin; - if (OB_FAIL(j_base->get_raw_binary(raw_bin, params.allocator_v2_))) { + if (OB_FAIL(ObJsonWrapper::get_raw_binary(j_base, raw_bin, params.allocator_v2_))) { LOG_WARN("fail to get datetime json binary", K(ret), K(in), K(expect_type), K(*j_base)); } else if (OB_FAIL(set_json_bin_res(¶ms, &out, raw_bin))) { LOG_WARN("fail to fill json bin lob locator", K(ret)); @@ -4249,7 +4248,7 @@ static int date_json(const ObObjType expect_type, ObObjCastParams ¶ms, ObJsonDatetime j_date(ObJsonNodeType::J_DATE, ob_time); ObIJsonBase *j_base = &j_date; ObString raw_bin; - if (OB_FAIL(j_base->get_raw_binary(raw_bin, params.allocator_v2_))) { + if (OB_FAIL(ObJsonWrapper::get_raw_binary(j_base, raw_bin, params.allocator_v2_))) { LOG_WARN("fail to get date json binary", K(ret), K(in), K(expect_type), K(*j_base)); } else if (OB_FAIL(set_json_bin_res(¶ms, &out, raw_bin))) { LOG_WARN("fail to fill json bin lob locator", K(ret)); @@ -4595,7 +4594,7 @@ static int time_json(const ObObjType expect_type, ObObjCastParams ¶ms, ObJsonDatetime j_time(ObJsonNodeType::J_TIME, ob_time); ObIJsonBase *j_base = &j_time; ObString raw_bin; - if (OB_FAIL(j_base->get_raw_binary(raw_bin, params.allocator_v2_))) { + if (OB_FAIL(ObJsonWrapper::get_raw_binary(j_base, raw_bin, params.allocator_v2_))) { LOG_WARN("fail to get time json binary", K(ret), K(in), K(expect_type), K(*j_base)); } else if (OB_FAIL(set_json_bin_res(¶ms, &out, raw_bin))) { LOG_WARN("fail to fill json bin lob locator", K(ret)); @@ -4926,7 +4925,7 @@ static int year_json(const ObObjType expect_type, ObObjCastParams ¶ms, ObJsonInt j_year(full_year); ObIJsonBase *j_base = &j_year; ObString raw_bin; - if (OB_FAIL(j_base->get_raw_binary(raw_bin, params.allocator_v2_))) { + if (OB_FAIL(ObJsonWrapper::get_raw_binary(j_base, raw_bin, params.allocator_v2_))) { LOG_WARN("fail to get year json binary", K(ret), K(in), K(expect_type), K(*j_base)); } else if (OB_FAIL(set_json_bin_res(¶ms, &out, raw_bin))) { LOG_WARN("fail to fill json bin lob locator", K(ret)); @@ -6136,7 +6135,7 @@ static int string_json(const ObObjType expect_type, ObObjCastParams ¶ms, if (OB_SUCC(ret)) { ObString raw_bin; - if (OB_FAIL(j_base->get_raw_binary(raw_bin, params.allocator_v2_))) { + if (OB_FAIL(ObJsonWrapper::get_raw_binary(j_base, raw_bin, params.allocator_v2_))) { LOG_WARN("fail to get string json binary", K(ret), K(in), K(*j_base)); } else if (OB_FAIL(set_json_bin_res(¶ms, &out, raw_bin))) { LOG_WARN("fail to fill json bin lob locator", K(ret)); @@ -6746,7 +6745,7 @@ static int bit_json(const ObObjType expect_type, ObObjCastParams ¶ms, ObJsonOpaque j_opaque(j_value, ObBitType); ObIJsonBase *j_base = &j_opaque; ObString raw_bin; - if (OB_FAIL(j_base->get_raw_binary(raw_bin, params.allocator_v2_))) { + if (OB_FAIL(ObJsonWrapper::get_raw_binary(j_base, raw_bin, params.allocator_v2_))) { LOG_WARN("fail to get int json binary", K(ret), K(in), K(expect_type), K(*j_base)); } else if (OB_FAIL(copy_string(params, expect_type, raw_bin.ptr(), raw_bin.length(), out))) { LOG_WARN("fail to copy string", K(ret), K(expect_type), K(raw_bin)); @@ -8240,7 +8239,7 @@ static int json_int(const ObObjType expect_type, ObObjCastParams ¶ms, if (OB_FAIL(sql::ObTextStringHelper::read_real_string_data(params.allocator_v2_, in, j_bin_str))) { LOG_WARN("fail to get real data.", K(ret), K(j_bin_str)); } else { - ObJsonBin j_bin(j_bin_str.ptr(), j_bin_str.length()); + ObJsonBin j_bin(j_bin_str.ptr(), j_bin_str.length(), params.allocator_v2_); ObIJsonBase *j_base = &j_bin; if (OB_FAIL(j_bin.reset_iter())) { LOG_WARN("failed to reset json bin iter", K(ret), K(j_bin_str)); @@ -8278,7 +8277,7 @@ static int json_uint(const ObObjType expect_type, ObObjCastParams ¶ms, if (OB_FAIL(sql::ObTextStringHelper::read_real_string_data(params.allocator_v2_, in, j_bin_str))) { LOG_WARN("fail to get real data.", K(ret), K(j_bin_str)); } else { - ObJsonBin j_bin(j_bin_str.ptr(), j_bin_str.length()); + ObJsonBin j_bin(j_bin_str.ptr(), j_bin_str.length(), params.allocator_v2_); ObIJsonBase *j_base = &j_bin; if (OB_FAIL(j_bin.reset_iter())) { LOG_WARN("failed to reset json bin iter", K(ret), K(j_bin_str)); @@ -8339,7 +8338,7 @@ static int json_double(const ObObjType expect_type, ObObjCastParams ¶ms, if (OB_FAIL(sql::ObTextStringHelper::read_real_string_data(params.allocator_v2_, in, j_bin_str))) { LOG_WARN("fail to get real data.", K(ret), K(j_bin_str)); } else { - ObJsonBin j_bin(j_bin_str.ptr(), j_bin_str.length()); + ObJsonBin j_bin(j_bin_str.ptr(), j_bin_str.length(), params.allocator_v2_); ObIJsonBase *j_base = &j_bin; if (OB_FAIL(j_bin.reset_iter())) { LOG_WARN("failed to reset json bin iter", K(ret), K(j_bin_str)); @@ -8376,7 +8375,7 @@ static int json_number(const ObObjType expect_type, ObObjCastParams ¶ms, if (OB_FAIL(sql::ObTextStringHelper::read_real_string_data(params.allocator_v2_, in, j_bin_str))) { LOG_WARN("fail to get real data.", K(ret), K(j_bin_str)); } else { - ObJsonBin j_bin(j_bin_str.ptr(), j_bin_str.length()); + ObJsonBin j_bin(j_bin_str.ptr(), j_bin_str.length(), params.allocator_v2_); ObIJsonBase *j_base = &j_bin; if (OB_FAIL(j_bin.reset_iter())) { LOG_WARN("failed to reset json bin iter", K(ret), K(j_bin_str)); @@ -8413,7 +8412,7 @@ static int json_datetime(const ObObjType expect_type, ObObjCastParams ¶ms, if (OB_FAIL(sql::ObTextStringHelper::read_real_string_data(params.allocator_v2_, in, j_bin_str))) { LOG_WARN("fail to get real data.", K(ret), K(j_bin_str)); } else { - ObJsonBin j_bin(j_bin_str.ptr(), j_bin_str.length()); + ObJsonBin j_bin(j_bin_str.ptr(), j_bin_str.length(), params.allocator_v2_); ObIJsonBase *j_base = &j_bin; if (OB_FAIL(j_bin.reset_iter())) { LOG_WARN("failed to reset json bin iter", K(ret), K(j_bin_str)); @@ -8447,7 +8446,7 @@ static int json_date(const ObObjType expect_type, ObObjCastParams ¶ms, if (OB_FAIL(sql::ObTextStringHelper::read_real_string_data(params.allocator_v2_, in, j_bin_str))) { LOG_WARN("fail to get real data.", K(ret), K(j_bin_str)); } else { - ObJsonBin j_bin(j_bin_str.ptr(), j_bin_str.length()); + ObJsonBin j_bin(j_bin_str.ptr(), j_bin_str.length(), params.allocator_v2_); ObIJsonBase *j_base = &j_bin; if (OB_FAIL(j_bin.reset_iter())) { LOG_WARN("failed to reset json bin iter", K(ret), K(j_bin_str)); @@ -8481,7 +8480,7 @@ static int json_time(const ObObjType expect_type, ObObjCastParams ¶ms, if (OB_FAIL(sql::ObTextStringHelper::read_real_string_data(params.allocator_v2_, in, j_bin_str))) { LOG_WARN("fail to get real data.", K(ret), K(j_bin_str)); } else { - ObJsonBin j_bin(j_bin_str.ptr(), j_bin_str.length()); + ObJsonBin j_bin(j_bin_str.ptr(), j_bin_str.length(), params.allocator_v2_); ObIJsonBase *j_base = &j_bin; if (OB_FAIL(j_bin.reset_iter())) { LOG_WARN("failed to reset json bin iter", K(ret), K(j_bin_str)); @@ -8516,7 +8515,7 @@ static int json_year(const ObObjType expect_type, ObObjCastParams ¶ms, if (OB_FAIL(sql::ObTextStringHelper::read_real_string_data(params.allocator_v2_, in, j_bin_str))) { LOG_WARN("fail to get real data.", K(ret), K(j_bin_str)); } else { - ObJsonBin j_bin(j_bin_str.ptr(), j_bin_str.length()); + ObJsonBin j_bin(j_bin_str.ptr(), j_bin_str.length(), params.allocator_v2_); ObIJsonBase *j_base = &j_bin; if (OB_FAIL(j_bin.reset_iter())) { LOG_WARN("failed to reset json bin iter", K(ret), K(j_bin_str)); @@ -8559,7 +8558,7 @@ static int json_raw(const ObObjType expect_type, ObObjCastParams ¶ms, } else { ObJsonBuffer j_buf(params.allocator_v2_); ObString j_bin_str = in.get_string(); - ObJsonBin j_bin(j_bin_str.ptr(), j_bin_str.length()); + ObJsonBin j_bin(j_bin_str.ptr(), j_bin_str.length(), params.allocator_v2_); ObIJsonBase *j_base = &j_bin; if (OB_FAIL(j_bin.reset_iter())) { LOG_WARN("failed to reset json bin iter", K(ret), K(j_bin_str)); @@ -8631,7 +8630,7 @@ static int json_string(const ObObjType expect_type, ObObjCastParams ¶ms, if (OB_FAIL(sql::ObTextStringHelper::read_real_string_data(params.allocator_v2_, in, j_bin_str))) { LOG_WARN("fail to get real data.", K(ret), K(j_bin_str)); } else { - ObJsonBin j_bin(j_bin_str.ptr(), j_bin_str.length()); + ObJsonBin j_bin(j_bin_str.ptr(), j_bin_str.length(), params.allocator_v2_); ObIJsonBase *j_base = &j_bin; if (OB_FAIL(j_bin.reset_iter())) { LOG_WARN("failed to reset json bin iter", K(ret), K(j_bin_str)); @@ -8699,7 +8698,7 @@ static int common_json_string(const ObObjType expect_type, if (OB_FAIL(sql::ObTextStringHelper::read_real_string_data(params.allocator_v2_, in, j_bin_str))) { LOG_WARN("fail to get real data.", K(ret), K(j_bin_str)); } else { - ObJsonBin j_bin(j_bin_str.ptr(), j_bin_str.length()); + ObJsonBin j_bin(j_bin_str.ptr(), j_bin_str.length(), params.allocator_v2_); ObIJsonBase *j_base = &j_bin; ObString j_str; if (OB_FAIL(j_bin.reset_iter())) { @@ -8779,7 +8778,7 @@ static int json_bit(const ObObjType expect_type, ObObjCastParams ¶ms, if (OB_FAIL(sql::ObTextStringHelper::read_real_string_data(params.allocator_v2_, in, j_bin_str))) { LOG_WARN("fail to get real data.", K(ret), K(j_bin_str)); } else { - ObJsonBin j_bin(j_bin_str.ptr(), j_bin_str.length()); + ObJsonBin j_bin(j_bin_str.ptr(), j_bin_str.length(), params.allocator_v2_); ObIJsonBase *j_base = &j_bin; if (OB_FAIL(j_bin.reset_iter())) { LOG_WARN("failed to reset json bin iter", K(ret), K(j_bin_str)); @@ -9297,7 +9296,7 @@ static int sql_udt_pl_extend(const ObObjType expect_type, ObObjCastParams ¶m const ObObj &in, ObObj &out, const ObCastMode cast_mode) { int ret = OB_SUCCESS; -#ifdef OB_BUILD_ORACLE_XML +#ifdef OB_BUILD_ORACLE_PL if (in.is_xml_sql_type()) { // no need to read blob full data pl::ObPLXmlType *xmltype = NULL; @@ -9366,7 +9365,6 @@ static int string_sql_udt(const ObObjType expect_type, ObObjCastParams ¶ms, const ObObj &in, ObObj &out, const ObCastMode cast_mode) { int ret = OB_SUCCESS; -#ifdef OB_BUILD_ORACLE_XML if (in.is_string_type()) { ObMulModeMemCtx* mem_ctx = nullptr; ObIAllocator &temp_allocator = *params.allocator_v2_; @@ -9425,9 +9423,6 @@ static int string_sql_udt(const ObObjType expect_type, ObObjCastParams ¶ms, ret = OB_ERR_UNEXPECTED; LOG_WARN("Unexpected type to convert format", K(ret), K(expect_type), K(in)); } -#else - ret = OB_NOT_SUPPORTED; -#endif return ret; } @@ -9609,7 +9604,7 @@ static int pl_extend_string(const ObObjType expect_type, ObObjCastParams ¶ms const ObObj &in, ObObj &out, const ObCastMode cast_mode) { int ret = OB_SUCCESS; -#ifdef OB_BUILD_ORACLE_XML +#ifdef OB_BUILD_ORACLE_PL if (in.is_pl_extend()) { if (pl::PL_OPAQUE_TYPE == in.get_meta().get_extend_type()) { pl::ObPLOpaque *pl_src = reinterpret_cast(in.get_ext()); @@ -9689,7 +9684,6 @@ static int udt_string(const ObObjType expect_type, ObObjCastParams ¶ms, const ObObj &in, ObObj &out, const ObCastMode cast_mode) { int ret = OB_SUCCESS; -#ifdef OB_BUILD_ORACLE_XML if (in.is_xml_sql_type()) { ObString blob_data = in.get_string(); ObStringBuffer xml_plain_text(params.allocator_v2_); @@ -9717,9 +9711,6 @@ static int udt_string(const ObObjType expect_type, ObObjCastParams ¶ms, LOG_WARN_RET(OB_ERR_INVALID_TYPE_FOR_OP, "inconsistent datatypes", "expected", expect_type, "got", in.get_type(), K(in.get_udt_subschema_id())); } -#else - ret = OB_NOT_SUPPORTED; -#endif return ret; } @@ -14544,7 +14535,7 @@ int ObObjCaster::bool_to_json(const ObObjType expect_type, ObJsonBoolean j_bool(bool_val); ObIJsonBase *j_base = &j_bool; ObString raw_bin; - if (OB_FAIL(j_base->get_raw_binary(raw_bin, cast_ctx.allocator_v2_))) { + if (OB_FAIL(ObJsonWrapper::get_raw_binary(j_base, raw_bin, cast_ctx.allocator_v2_))) { LOG_WARN("fail to get bool json binary", K(ret), K(in_obj), K(expect_type)); } else if (OB_FAIL(set_json_bin_res(&cast_ctx, &buf_obj, raw_bin))) { LOG_WARN("fail to fill json bin lob locator", K(ret)); @@ -14586,7 +14577,7 @@ int ObObjCaster::enumset_to_json(const ObObjType expect_type, } else { j_base = &j_string; ObString raw_bin; - if (OB_FAIL(j_base->get_raw_binary(raw_bin, cast_ctx.allocator_v2_))) { + if (OB_FAIL(ObJsonWrapper::get_raw_binary(j_base, raw_bin, cast_ctx.allocator_v2_))) { LOG_WARN("fail to get string json binary", K(ret), K(in_obj)); } else if (OB_FAIL(set_json_bin_res(&cast_ctx, &buf_obj, raw_bin))) { LOG_WARN("fail to fill json bin lob locator", K(ret)); diff --git a/src/share/schema/ob_schema_printer.cpp b/src/share/schema/ob_schema_printer.cpp index 9ae21e7eab..52303bd923 100644 --- a/src/share/schema/ob_schema_printer.cpp +++ b/src/share/schema/ob_schema_printer.cpp @@ -434,6 +434,9 @@ int ObSchemaPrinter::print_table_definition_columns(const ObTableSchema &table_s } } } + if (OB_SUCC(ret) && !is_oracle_mode && OB_FAIL(print_column_lob_params(*col, buf, buf_len, pos))) { + SHARE_SCHEMA_LOG(WARN, "fail to print lob params", K(ret)); + } if (OB_SUCC(ret) && !is_oracle_mode && 0 < strlen(col->get_comment())) { if (OB_FAIL(databuff_printf(buf, buf_len, pos, " COMMENT '%s'", to_cstring(ObHexEscapeSqlStr(col->get_comment_str()))))) { SHARE_SCHEMA_LOG(WARN, "fail to print comment", K(ret)); @@ -5590,6 +5593,24 @@ int ObSchemaPrinter::print_view_define_str(char* buf, return ret; } +int ObSchemaPrinter::print_column_lob_params(const ObColumnSchemaV2 &column_schema, + char* buf, + const int64_t& buf_len, + int64_t& pos) const +{ + int ret = OB_SUCCESS; + if (strict_compat_ || column_schema.is_shadow_column() || column_schema.is_hidden()) { + // do nothing + } else if (! column_schema.is_json()) { + // do nothing + } else if (column_schema.get_lob_chunk_size() == OB_DEFAULT_LOB_CHUNK_SIZE) { + // default value not display + } else if (OB_FAIL(databuff_printf(buf, buf_len, pos, " CHUNK '%ldKB'", column_schema.get_lob_chunk_size()/ObLobDataOutRowCtx::OUTROW_LOB_CHUNK_SIZE_UNIT))) { + SHARE_SCHEMA_LOG(WARN, "fail to print column", K(ret), K(column_schema)); + } + return ret; +} + int ObSchemaPrinter::print_table_definition_lob_params(const ObTableSchema &table_schema, char* buf, const int64_t& buf_len, diff --git a/src/share/schema/ob_schema_printer.h b/src/share/schema/ob_schema_printer.h index 79cba91c63..814b9ee303 100644 --- a/src/share/schema/ob_schema_printer.h +++ b/src/share/schema/ob_schema_printer.h @@ -487,6 +487,10 @@ public: bool is_oracle_mode, const ObString &sql) const; + int print_column_lob_params(const ObColumnSchemaV2 &column_schema, + char* buf, + const int64_t& buf_len, + int64_t& pos) const; int print_table_definition_lob_params(const ObTableSchema &table_schema, char* buf, const int64_t& buf_len, diff --git a/src/share/schema/ob_table_schema.cpp b/src/share/schema/ob_table_schema.cpp index 270c2ab391..d1f77df255 100644 --- a/src/share/schema/ob_table_schema.cpp +++ b/src/share/schema/ob_table_schema.cpp @@ -4342,7 +4342,7 @@ int ObTableSchema::check_alter_column_accuracy(const ObColumnSchemaV2 &src_colum if (ob_is_number_tc(src_col_type) || src_meta.is_bit() || src_meta.is_char() || src_meta.is_varchar() || src_meta.is_varbinary() || src_meta.is_text() || src_meta.is_blob() || src_meta.is_timestamp() || src_meta.is_datetime() - || src_meta.is_integer_type()) { + || src_meta.is_integer_type() || src_meta.is_json()) { // online, do nothing } else if (ob_is_decimal_int_tc(src_col_type) && dst_accuracy.get_scale() == src_accuracy.get_scale() @@ -4872,7 +4872,8 @@ int ObTableSchema::check_is_exactly_same_type(const ObColumnSchemaV2 &src_column } } else { if (src_column.is_string_type() || src_column.is_raw() - || ob_is_rowid_tc(src_column.get_data_type())) { + || ob_is_rowid_tc(src_column.get_data_type()) + || src_column.is_json()) { if (src_column.get_charset_type() == dst_column.get_charset_type() && src_column.get_collation_type() == dst_column.get_collation_type() && src_column.get_data_length() == dst_column.get_data_length() && diff --git a/src/share/schema/ob_table_sql_service.cpp b/src/share/schema/ob_table_sql_service.cpp index dbbe3d312c..8a131caff8 100644 --- a/src/share/schema/ob_table_sql_service.cpp +++ b/src/share/schema/ob_table_sql_service.cpp @@ -4059,6 +4059,11 @@ int ObTableSqlService::gen_column_dml( lib::Worker::CompatMode compat_mode = lib::Worker::CompatMode::INVALID; if (OB_FAIL(GET_MIN_DATA_VERSION(exec_tenant_id, tenant_data_version))) { LOG_WARN("get tenant data version failed", K(ret)); + } else if (! ((DATA_VERSION_4_2_2_0 <= tenant_data_version && tenant_data_version < DATA_VERSION_4_3_0_0) || tenant_data_version >= DATA_VERSION_4_3_1_0) + && column.get_lob_chunk_size() != OB_DEFAULT_LOB_CHUNK_SIZE) { + ret = OB_NOT_SUPPORTED; + LOG_WARN("tenant data version is less than 4.3.1, lob chunk size is not supported", K(ret), K(tenant_data_version), K(column)); + LOG_USER_ERROR(OB_NOT_SUPPORTED, "tenant data version is less than 4.3.1, lob chunk size"); } else if (OB_FAIL(ObCompatModeGetter::get_table_compat_mode( column.get_tenant_id(), column.get_table_id(), compat_mode))) { LOG_WARN("fail to get tenant mode", K(ret), K(column)); @@ -4225,6 +4230,8 @@ int ObTableSqlService::gen_column_dml( || (tenant_data_version >= DATA_VERSION_4_2_0_0 &&OB_FAIL(dml.add_column("sub_data_type", column.get_sub_data_type()))) || (tenant_data_version >= DATA_VERSION_4_3_0_0 && OB_FAIL(dml.add_column("skip_index_attr", column.get_skip_index_attr().get_packed_value()))) + || (((DATA_VERSION_4_2_2_0 <= tenant_data_version && tenant_data_version < DATA_VERSION_4_3_0_0) || tenant_data_version >= DATA_VERSION_4_3_1_0) + && OB_FAIL(dml.add_column("lob_chunk_size", column.get_lob_chunk_size()))) || (tenant_data_version >= DATA_VERSION_4_2_2_0 &&OB_FAIL(dml.add_column("local_session_vars", ObHexEscapeSqlStr(local_session_var)))) || OB_FAIL(dml.add_gmt_create()) || OB_FAIL(dml.add_gmt_modified()))) { diff --git a/src/share/system_variable/ob_system_variable.cpp b/src/share/system_variable/ob_system_variable.cpp index bb611fb2be..c92034a7f8 100644 --- a/src/share/system_variable/ob_system_variable.cpp +++ b/src/share/system_variable/ob_system_variable.cpp @@ -2079,15 +2079,27 @@ int ObSysVarOnCheckFuncs::check_log_row_value_option_is_valid(sql::ObExecContext int ret = OB_SUCCESS; ObString val = in_val.get_string(); if (!val.empty()) { - if (val.case_compare("partial_lob") != 0) { - ret = OB_ERR_PARAM_VALUE_INVALID; - LOG_USER_ERROR(OB_ERR_PARAM_VALUE_INVALID); - } else { + if (val.case_compare(OB_LOG_ROW_VALUE_PARTIAL_LOB) == 0) { // because not adapat obcdc, currently partial_lob is disabled // out_val = in_val; ret = OB_NOT_SUPPORTED; LOG_WARN("partial_lob is not support, please use _enable_dbms_lob_partial_update instead", K(ret), K(in_val)); LOG_USER_ERROR(OB_NOT_SUPPORTED, "partial_lob"); + } else if (val.case_compare(OB_LOG_ROW_VALUE_PARTIAL_JSON) == 0 + || val.case_compare(OB_LOG_ROW_VALUE_PARTIAL_ALL) == 0) { + uint64_t tenant_data_version = 0; + if (OB_FAIL(GET_MIN_DATA_VERSION(MTL_ID(), tenant_data_version))) { + LOG_WARN("get tenant data version failed", K(ret), K(val)); + } else if (! ((DATA_VERSION_4_2_2_0 <= tenant_data_version && tenant_data_version < DATA_VERSION_4_3_0_0) || tenant_data_version >= DATA_VERSION_4_3_1_0)) { + ret = OB_NOT_SUPPORTED; + LOG_WARN("json partial update not support in current version", K(ret), K(tenant_data_version)); + LOG_USER_ERROR(OB_NOT_SUPPORTED, "json partial update not support in current version"); + } else { + out_val = in_val; + } + } else { + ret = OB_ERR_PARAM_VALUE_INVALID; + LOG_USER_ERROR(OB_ERR_PARAM_VALUE_INVALID); } } else { out_val = in_val; diff --git a/src/share/vector/vector_basic_op.h b/src/share/vector/vector_basic_op.h index 73294a77f5..2835307031 100644 --- a/src/share/vector/vector_basic_op.h +++ b/src/share/vector/vector_basic_op.h @@ -292,7 +292,8 @@ struct VecTCHashCalc } else if (OB_FAIL(str_iter.get_full_data(j_bin_str))) { COMMON_LOG(WARN, "Lob: str iter get full data failed", K(ret)); } else { - ObJsonBin j_bin(j_bin_str.ptr(), j_bin_str.length()); + ObJsonBinCtx ctx; + ObJsonBin j_bin(j_bin_str.ptr(), j_bin_str.length(), &ctx); ObIJsonBase *j_base = &j_bin; if (j_bin_str.length() == 0) { res = seed; @@ -681,8 +682,10 @@ struct VecTCCmpCalc } else if (OB_FAIL(r_instr_iter.get_full_data(r_data))) { COMMON_LOG(WARN, "Lob: get right lob str iter full data failed ", K(ret), K(r_instr_iter)); } else { - ObJsonBin j_bin_l(l_data.ptr(), l_data.length()); - ObJsonBin j_bin_r(r_data.ptr(), r_data.length()); + ObJsonBinCtx ctx_l; + ObJsonBinCtx ctx_r; + ObJsonBin j_bin_l(l_data.ptr(), l_data.length(), &ctx_l); + ObJsonBin j_bin_r(r_data.ptr(), r_data.length(), &ctx_r); ObIJsonBase *j_base_l = &j_bin_l; ObIJsonBase *j_base_r = &j_bin_r; diff --git a/src/sql/CMakeLists.txt b/src/sql/CMakeLists.txt index 7e93bcb80c..50831fce25 100644 --- a/src/sql/CMakeLists.txt +++ b/src/sql/CMakeLists.txt @@ -402,6 +402,9 @@ ob_set_subtarget(ob_sql engine_expr engine/expr/ob_expr_is_serving_tenant.cpp engine/expr/ob_expr_json_func_helper.cpp engine/expr/ob_expr_json_extract.cpp + engine/expr/ob_expr_json_schema_valid.cpp + engine/expr/ob_expr_json_schema_validation_report.cpp + engine/expr/ob_expr_json_schema_validation_report.cpp engine/expr/ob_expr_json_contains.cpp engine/expr/ob_expr_json_contains_path.cpp engine/expr/ob_expr_json_depth.cpp @@ -420,6 +423,7 @@ ob_set_subtarget(ob_sql engine_expr engine/expr/ob_expr_json_search.cpp engine/expr/ob_expr_json_array.cpp engine/expr/ob_expr_json_array_append.cpp + engine/expr/ob_expr_json_append.cpp engine/expr/ob_expr_json_array_insert.cpp engine/expr/ob_expr_json_quote.cpp engine/expr/ob_expr_json_unquote.cpp @@ -683,6 +687,9 @@ ob_set_subtarget(ob_sql engine_expr engine/expr/ob_expr_extract_xml.cpp engine/expr/ob_expr_xmlcast.cpp engine/expr/ob_expr_update_xml.cpp + engine/expr/ob_expr_insert_child_xml.cpp + engine/expr/ob_expr_xml_delete_xml.cpp + engine/expr/ob_expr_xml_sequence.cpp engine/expr/ob_expr_sql_udt_utils.cpp engine/expr/ob_expr_temp_table_ssid.cpp engine/expr/ob_expr_collection_construct.cpp @@ -711,6 +718,7 @@ ob_set_subtarget(ob_sql engine_expr engine/expr/ob_expr_extract_cert_expired_time.cpp engine/expr/ob_expr_transaction_id.cpp engine/expr/ob_expr_inner_row_cmp_val.cpp + engine/expr/ob_expr_json_utils.cpp ) ob_set_subtarget(ob_sql engine_join diff --git a/src/sql/code_generator/ob_static_engine_cg.cpp b/src/sql/code_generator/ob_static_engine_cg.cpp index 325f4eb79e..42eb5d0bce 100644 --- a/src/sql/code_generator/ob_static_engine_cg.cpp +++ b/src/sql/code_generator/ob_static_engine_cg.cpp @@ -7510,7 +7510,9 @@ int ObStaticEngineCG::generate_spec(ObLogJsonTable &op, ObJsonTableSpec &spec, UNUSED(in_root_job); ObIAllocator &alloc = phy_plan_->get_allocator(); ObRawExpr *value_raw_expr = nullptr; + ObArray ns_arr; ObExpr *value_expr = nullptr; + ObString ns_prefix_str; int ret = OB_SUCCESS; if (OB_ISNULL(op.get_stmt())) { ret = OB_ERR_UNEXPECTED; @@ -7518,7 +7520,8 @@ int ObStaticEngineCG::generate_spec(ObLogJsonTable &op, ObJsonTableSpec &spec, } else if (OB_FAIL(spec.column_exprs_.init(op.get_stmt()->get_column_size())) || OB_FAIL(spec.emp_default_exprs_.init(op.get_stmt()->get_column_size())) || OB_FAIL(spec.err_default_exprs_.init(op.get_stmt()->get_column_size())) - || OB_FAIL(spec.cols_def_.init(op.get_origin_cols_def().count()))) { + || OB_FAIL(spec.cols_def_.init(op.get_origin_cols_def().count())) + || OB_FAIL(spec.namespace_def_.init(op.get_ns_size()))) { LOG_WARN("failed to init array", K(ret)); } else if (OB_UNLIKELY(op.get_num_of_child() > 1)) { ret = OB_ERR_UNEXPECTED; @@ -7531,6 +7534,7 @@ int ObStaticEngineCG::generate_spec(ObLogJsonTable &op, ObJsonTableSpec &spec, } else { spec.has_correlated_expr_ = value_raw_expr->has_flag(CNT_DYNAMIC_PARAM); spec.value_expr_ = value_expr; + spec.table_type_ = op.get_table_type(); // table func type if (OB_FAIL(spec.dup_origin_column_defs(op.get_origin_cols_def()))) { LOG_WARN("failed to append col define", K(ret)); @@ -7542,9 +7546,24 @@ int ObStaticEngineCG::generate_spec(ObLogJsonTable &op, ObJsonTableSpec &spec, } } + if (OB_SUCC(ret)) { // deal namespace + if (OB_FAIL(op.get_namespace_arr(ns_arr))) { + LOG_WARN("fail to get ns from log table", K(ret)); + } else { + for (int64_t i = 0; OB_SUCC(ret) && i < ns_arr.size(); i++) { + if (OB_FAIL(ob_write_string(*(spec.namespace_def_.get_allocator()), ns_arr.at(i), ns_prefix_str))) { // need deep copy + LOG_WARN("fail to wirte prefix string", K(ret), K(ns_prefix_str)); + } else if (OB_FAIL(spec.namespace_def_.push_back(ns_prefix_str))) { + LOG_WARN("fail to add ns str to arr", K(ret), K(i)); + } + } + } + } + bool need_set_lob_header = get_cur_cluster_version() >= CLUSTER_VERSION_4_1_0_0; for (int64_t i = 0; OB_SUCC(ret) && i < op.get_stmt()->get_column_size(); ++i) { ObExpr *rt_expr = nullptr; + ObRawExpr* default_val = nullptr; const ColumnItem *col_item = op.get_stmt()->get_column_item(i); CK (OB_NOT_NULL(col_item)); CK (OB_NOT_NULL(col_item->expr_)); @@ -7556,7 +7575,7 @@ int ObStaticEngineCG::generate_spec(ObLogJsonTable &op, ObJsonTableSpec &spec, if (OB_SUCC(ret) && is_lob_storage(rt_expr->obj_meta_.get_type()) && need_set_lob_header) { rt_expr->obj_meta_.set_has_lob_header(); } - + ObColumnDefault* col_def; OZ (spec.column_exprs_.push_back(rt_expr)); if (OB_FAIL(ret)) { @@ -7569,20 +7588,23 @@ int ObStaticEngineCG::generate_spec(ObLogJsonTable &op, ObJsonTableSpec &spec, ObJtColInfo* col_info = spec.cols_def_.at(col_item->col_idx_); col_info->output_column_idx_ = spec.column_exprs_.count() - 1; - if (OB_NOT_NULL(col_item->default_value_expr_)) { + if (OB_ISNULL(col_def = op.get_column_param_default_val(col_item->column_id_))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("fail to get default value", K(ret), K(col_item->column_id_)); + } else if (OB_NOT_NULL(default_val = col_def->default_error_expr_)) { ObExpr *err_expr = nullptr; - OZ (mark_expr_self_produced(col_item->default_value_expr_)); - OZ (generate_rt_expr(*col_item->default_value_expr_, err_expr)); + OZ (mark_expr_self_produced(default_val)); + OZ (generate_rt_expr(*default_val, err_expr)); if (OB_SUCC(ret) && is_lob_storage(err_expr->obj_meta_.get_type()) && need_set_lob_header) { err_expr->obj_meta_.set_has_lob_header(); } OX (col_info->error_expr_id_ = spec.err_default_exprs_.count()); OZ (spec.err_default_exprs_.push_back(err_expr)); } - if (OB_SUCC(ret) && OB_NOT_NULL(col_item->default_empty_expr_)) { + if (OB_SUCC(ret) && OB_NOT_NULL(default_val = col_def->default_empty_expr_)) { ObExpr *emp_expr = nullptr; - OZ (mark_expr_self_produced(col_item->default_empty_expr_)); - OZ (generate_rt_expr(*col_item->default_empty_expr_, emp_expr)); + OZ (mark_expr_self_produced(default_val)); + OZ (generate_rt_expr(*default_val, emp_expr)); if (OB_SUCC(ret) && is_lob_storage(emp_expr->obj_meta_.get_type()) && need_set_lob_header) { emp_expr->obj_meta_.set_has_lob_header(); } diff --git a/src/sql/engine/aggregate/ob_aggregate_processor.cpp b/src/sql/engine/aggregate/ob_aggregate_processor.cpp index b2d07cded2..36cbc26805 100644 --- a/src/sql/engine/aggregate/ob_aggregate_processor.cpp +++ b/src/sql/engine/aggregate/ob_aggregate_processor.cpp @@ -32,13 +32,12 @@ #include "share/stat/ob_hybrid_hist_estimator.h" #include "share/stat/ob_dbms_stats_utils.h" #include "sql/engine/expr/ob_expr_sys_op_opnsize.h" -#ifdef OB_BUILD_ORACLE_XML #include "lib/xml/ob_xml_util.h" #include "lib/xml/ob_xml_tree.h" #include "lib/xml/ob_xml_parser.h" +#include "lib/xml/ob_binary_aggregate.h" #include "sql/engine/expr/ob_expr_xml_func_helper.h" #include "lib/alloc/malloc_hook.h" -#endif #include "pl/ob_pl_user_type.h" #include "pl/ob_pl.h" @@ -6951,6 +6950,10 @@ int ObAggregateProcessor::get_json_arrayagg_result(const ObAggrInfo &aggr_info, { int ret = OB_SUCCESS; common::ObArenaAllocator tmp_alloc(ObModIds::OB_SQL_AGGR_FUNC, OB_MALLOC_NORMAL_BLOCK_SIZE, MTL_ID()); + common::ObArenaAllocator res_alloc(ObModIds::OB_SQL_AGGR_FUNC, OB_MALLOC_NORMAL_BLOCK_SIZE, MTL_ID()); + common::ObArenaAllocator res_alloc_back(ObModIds::OB_SQL_AGGR_FUNC, OB_MALLOC_NORMAL_BLOCK_SIZE, MTL_ID()); + common::ObArenaAllocator res_alloc_arr(ObModIds::OB_SQL_AGGR_FUNC, OB_MALLOC_NORMAL_BLOCK_SIZE, MTL_ID()); + ObStringBuffer value(&res_alloc_arr); if (OB_ISNULL(extra) || OB_UNLIKELY(extra->empty())) { ret = OB_ERR_UNEXPECTED; LOG_WARN("get unpexcted null", K(ret), K(extra)); @@ -6961,15 +6964,15 @@ int ObAggregateProcessor::get_json_arrayagg_result(const ObAggrInfo &aggr_info, LOG_WARN("finish_add_row failed", KPC(extra), K(ret)); } else { const ObChunkDatumStore::StoredRow *storted_row = NULL; - ObJsonArray json_array(&tmp_alloc); bool is_bool = false; if (OB_FAIL(extra->get_bool_mark(0, is_bool))) { LOG_WARN("get_bool info failed, may not distinguish between bool and int", K(ret)); } // get type - ObObj *tmp_obj = NULL; + ObBinAggSerializer bin_agg(&res_alloc, AGG_JSON, static_cast(ObJsonNodeType::J_ARRAY), false, &res_alloc_back, &res_alloc_arr); while (OB_SUCC(ret) && OB_SUCC(extra->get_next_row(storted_row))) { + ObObj *tmp_obj = NULL; if (OB_ISNULL(storted_row)) { ret = OB_ERR_UNEXPECTED; LOG_WARN("get unexpected null", K(ret), K(storted_row)); @@ -6990,15 +6993,20 @@ int ObAggregateProcessor::get_json_arrayagg_result(const ObAggrInfo &aggr_info, ObIJsonBase *json_val = NULL; ObDatum converted_datum; converted_datum.set_datum(storted_row->cells()[0]); + bool has_lob_header = tmp_obj->has_lob_header(); // convert string charset if needed if (ob_is_string_type(val_type) && (ObCharset::charset_type_by_coll(cs_type) != CHARSET_UTF8MB4)) { ObString origin_str = converted_datum.get_string(); ObString converted_str; - if (OB_FAIL(ObExprUtil::convert_string_collation(origin_str, cs_type, converted_str, + if (OB_FAIL(sql::ObTextStringHelper::read_real_string_data(&tmp_alloc, + val_type, cs_type, tmp_obj->has_lob_header(), origin_str))) { + LOG_WARN("fail to get real data.", K(ret), K(origin_str)); + } else if (OB_FAIL(ObExprUtil::convert_string_collation(origin_str, cs_type, converted_str, CS_TYPE_UTF8MB4_BIN, tmp_alloc))) { LOG_WARN("convert string collation failed", K(ret), K(cs_type), K(origin_str.length())); } else { + has_lob_header = false; converted_datum.set_string(converted_str); cs_type = CS_TYPE_UTF8MB4_BIN; } @@ -7014,13 +7022,16 @@ int ObAggregateProcessor::get_json_arrayagg_result(const ObAggrInfo &aggr_info, } else { ObJsonBoolean *bool_node = (ObJsonBoolean*)new(json_node_buf)ObJsonBoolean(converted_datum.get_bool()); json_val = bool_node; + if (OB_FAIL(ObJsonBaseFactory::transform(&tmp_alloc, json_val, ObJsonInType::JSON_BIN, json_val))) { + LOG_WARN("fail to transform to tree", K(ret)); + } } } else if (ObJsonExprHelper::is_convertible_to_json(val_type)) { if (OB_FAIL(ObJsonExprHelper::transform_convertible_2jsonBase(converted_datum, val_type, &tmp_alloc, cs_type, - json_val, false, - tmp_obj->has_lob_header(), - true))) { + json_val, ObConv2JsonParam(true, + has_lob_header, + true)))) { LOG_WARN("failed: parse value to jsonBase", K(ret), K(val_type)); } } else { @@ -7028,18 +7039,25 @@ int ObAggregateProcessor::get_json_arrayagg_result(const ObAggrInfo &aggr_info, &tmp_alloc, scale, eval_ctx_.exec_ctx_.get_my_session()->get_timezone_info(), eval_ctx_.exec_ctx_.get_my_session(), - json_val, false))) { + json_val, true))) { LOG_WARN("failed: parse value to jsonBase", K(ret), K(val_type)); } } + ObString key; + ObJsonBin *jb_node = nullptr; if (OB_FAIL(ret)) { - } else if (OB_FAIL(json_array.array_append(json_val))) { - LOG_WARN("failed: json array append json value", K(ret)); - } else if (json_array.get_serialize_size() > OB_MAX_PACKET_LENGTH) { + } else if (OB_ISNULL(jb_node = static_cast(json_val))) { + ret = OB_ERR_UNDEFINED; + LOG_WARN("get binary null", K(ret)); + } else if (OB_FAIL(bin_agg.append_key_and_value(key, value, jb_node))) { + LOG_WARN("failed to append key and value", K(ret)); + } else if (bin_agg.get_approximate_length() > OB_MAX_PACKET_LENGTH * 2) { ret = OB_ERR_TOO_LONG_STRING_IN_CONCAT; - LOG_WARN("result of json_arrayagg is too long", K(ret), K(json_array.get_serialize_size()), - K(OB_MAX_PACKET_LENGTH)); + LOG_WARN("result of json_arrayagg is too long", K(ret), K(bin_agg.get_approximate_length()), + K(OB_MAX_PACKET_LENGTH)); + } else { + tmp_alloc.reset(); } } } @@ -7048,16 +7066,15 @@ int ObAggregateProcessor::get_json_arrayagg_result(const ObAggrInfo &aggr_info, LOG_WARN("fail to get next row", K(ret)); } else { ret = OB_SUCCESS; - ObString str; - // output res - if (OB_FAIL(json_array.get_raw_binary(str, &aggr_alloc_))) { - LOG_WARN("get result binary failed", K(ret)); + if (OB_FAIL(bin_agg.serialize())) { + LOG_WARN("failed to serialize bin agg.", K(ret)); } else { + ObStringBuffer *buff = bin_agg.get_buffer(); ObTextStringDatumResult text_result(ObJsonType, aggr_info.expr_->obj_meta_.has_lob_header(), &concat_result); - if (OB_FAIL(text_result.init(str.length(), &aggr_alloc_))) { + if (OB_FAIL(text_result.init(buff->length(), &aggr_alloc_))) { LOG_WARN("init lob result failed"); - } else if (OB_FAIL(text_result.append(str.ptr(), str.length()))) { - LOG_WARN("failed to append realdata", K(ret), K(str), K(text_result)); + } else if (OB_FAIL(text_result.append(buff->ptr(), buff->length()))) { + LOG_WARN("failed to append realdata", K(ret), K(buff), K(text_result)); } else { text_result.set_result(); } @@ -7073,6 +7090,10 @@ int ObAggregateProcessor::get_ora_json_arrayagg_result(const ObAggrInfo &aggr_in { int ret = OB_SUCCESS; common::ObArenaAllocator tmp_alloc(ObModIds::OB_SQL_AGGR_FUNC, OB_MALLOC_NORMAL_BLOCK_SIZE, MTL_ID()); + common::ObArenaAllocator res_alloc(ObModIds::OB_SQL_AGGR_FUNC, OB_MALLOC_NORMAL_BLOCK_SIZE, MTL_ID()); + common::ObArenaAllocator res_alloc_back(ObModIds::OB_SQL_AGGR_FUNC, OB_MALLOC_NORMAL_BLOCK_SIZE, MTL_ID()); + common::ObArenaAllocator res_alloc_arr(ObModIds::OB_SQL_AGGR_FUNC, OB_MALLOC_NORMAL_BLOCK_SIZE, MTL_ID()); + ObStringBuffer value(&res_alloc_arr); if (OB_ISNULL(extra) || OB_UNLIKELY(extra->empty())) { ret = OB_ERR_UNEXPECTED; LOG_WARN("get unpexcted null", K(ret), K(extra)); @@ -7090,7 +7111,7 @@ int ObAggregateProcessor::get_ora_json_arrayagg_result(const ObAggrInfo &aggr_in bool is_format_json = aggr_info.format_json_; bool is_absent_on_null = !aggr_info.absent_on_null_; bool is_strict = aggr_info.strict_json_; - ObJsonArray json_array(&tmp_alloc); + ObBinAggSerializer bin_agg(&res_alloc, AGG_JSON, static_cast(ObJsonNodeType::J_ARRAY), false, &res_alloc_back, &res_alloc_arr); while (OB_SUCC(ret) && OB_SUCC(extra->get_next_row(storted_row))) { if (OB_ISNULL(storted_row) || storted_row->cnt_ < 1) { @@ -7102,19 +7123,26 @@ int ObAggregateProcessor::get_ora_json_arrayagg_result(const ObAggrInfo &aggr_in ObScale scale = data_meta.get_scale(); ObCollationType cs_type = data_meta.get_collation_type(); ObIJsonBase *json_val = nullptr; + ObJsonBin *jb_node = nullptr; + ObString key; if (OB_FAIL(ObJsonExprHelper::oracle_datum2_json_val(&datum, data_meta, &tmp_alloc, - eval_ctx_.exec_ctx_.get_my_session(), json_val, false, is_format_json, is_strict, false))) { + eval_ctx_.exec_ctx_.get_my_session(), json_val, false, is_format_json, is_strict, true))) { LOG_WARN("failed to eval json val node.", K(ret), K(is_format_json), K(is_strict), K(data_meta)); } else if (is_absent_on_null && (val_type == ObNullType || json_val->json_type() == ObJsonNodeType::J_NULL)) { // do nothing , continue - } else if (OB_FAIL(json_array.append(static_cast(json_val)))) { - LOG_WARN("failed to append array node", K(ret), K(json_array.element_count())); - } else if (json_array.get_serialize_size() > OB_MAX_PACKET_LENGTH) { + } else if (OB_ISNULL(jb_node = static_cast(json_val))) { + ret = OB_ERR_UNDEFINED; + LOG_WARN("get binary null", K(ret)); + } else if (OB_FAIL(bin_agg.append_key_and_value(key, value, jb_node))) { + LOG_WARN("failed to append key and value", K(ret)); + } else if (bin_agg.get_approximate_length() > OB_MAX_PACKET_LENGTH * 2) { ret = OB_ERR_TOO_LONG_STRING_IN_CONCAT; - LOG_WARN("result of json_arrayagg is too long", K(ret), - K(json_array.get_serialize_size()), K(OB_MAX_PACKET_LENGTH)); + LOG_WARN("result of json_arrayagg is too long", K(ret), K(bin_agg.get_approximate_length()), + K(OB_MAX_PACKET_LENGTH)); + } else { + tmp_alloc.reset(); } } } //end of while @@ -7136,14 +7164,21 @@ int ObAggregateProcessor::get_ora_json_arrayagg_result(const ObAggrInfo &aggr_in rsp_len = (ObAccuracy::DDL_DEFAULT_ACCURACY[ObJsonType]).get_length(); } - int64_t result_likely_size = json_array.get_serialize_size(); - ObJsonBuffer string_buffer(&tmp_alloc); - - if (ob_is_string_type(rsp_type) || ob_is_raw(rsp_type)) { - if (OB_FAIL(string_buffer.reserve(result_likely_size))) { - LOG_WARN("fail to reserve string.", K(ret), K(result_likely_size)); - } else if (OB_FAIL(json_array.print(string_buffer, true, false))) { + if (OB_FAIL(bin_agg.serialize())) { + LOG_WARN("failed to serialize bin agg.", K(ret)); + } else if (ob_is_string_type(rsp_type) || ob_is_raw(rsp_type)) { + ObIJsonBase *j_base = NULL; + ObStringBuffer *buff = bin_agg.get_buffer(); + if (OB_FAIL(string_buffer.reserve(buff->length()))) { + LOG_WARN("fail to reserve string.", K(ret), K(buff->length())); + } else if (OB_FAIL(ObJsonBaseFactory::get_json_base(&tmp_alloc, + buff->string(), + ObJsonInType::JSON_BIN, + ObJsonInType::JSON_BIN, + j_base))) { + LOG_WARN("fail to get real data.", K(ret), K(buff)); + } else if (OB_FAIL(j_base->print(string_buffer, true, false))) { LOG_WARN("failed: get json string text", K(ret)); } else if (rsp_type == ObVarcharType && string_buffer.length() > rsp_len) { char res_ptr[OB_MAX_DECIMAL_PRECISION] = {0}; @@ -7157,10 +7192,8 @@ int ObAggregateProcessor::get_ora_json_arrayagg_result(const ObAggrInfo &aggr_in LOG_WARN("fail to pack res result.", K(ret)); } } else if (ob_is_json(rsp_type)) { - ObString raw_binary_str; - if (OB_FAIL(json_array.get_raw_binary(raw_binary_str, &tmp_alloc))) { - LOG_WARN("get result binary failed", K(ret)); - } else if (OB_FAIL(ObJsonExprHelper::pack_json_str_res(*aggr_info.expr_, eval_ctx_, concat_result, raw_binary_str, &aggr_alloc_))) { + ObStringBuffer *buff = bin_agg.get_buffer(); + if (OB_FAIL(ObJsonExprHelper::pack_json_str_res(*aggr_info.expr_, eval_ctx_, concat_result, buff->string(), &aggr_alloc_))) { LOG_WARN("fail to pack res result.", K(ret)); } } else { @@ -7179,6 +7212,10 @@ int ObAggregateProcessor::get_json_objectagg_result(const ObAggrInfo &aggr_info, int ret = OB_SUCCESS; const int col_num = 2; common::ObArenaAllocator tmp_alloc(ObModIds::OB_SQL_AGGR_FUNC, OB_MALLOC_NORMAL_BLOCK_SIZE, MTL_ID()); + common::ObArenaAllocator res_alloc(ObModIds::OB_SQL_AGGR_FUNC, OB_MALLOC_NORMAL_BLOCK_SIZE, MTL_ID()); + common::ObArenaAllocator res_alloc_back(ObModIds::OB_SQL_AGGR_FUNC, OB_MALLOC_NORMAL_BLOCK_SIZE, MTL_ID()); + common::ObArenaAllocator res_alloc_arr(ObModIds::OB_SQL_AGGR_FUNC, OB_MALLOC_NORMAL_BLOCK_SIZE, MTL_ID()); + ObStringBuffer value(&res_alloc_arr); if (OB_ISNULL(extra) || OB_UNLIKELY(extra->empty())) { ret = OB_ERR_UNEXPECTED; LOG_WARN("get unpexcted null", K(ret), K(extra)); @@ -7189,12 +7226,12 @@ int ObAggregateProcessor::get_json_objectagg_result(const ObAggrInfo &aggr_info, LOG_WARN("finish_add_row failed", KPC(extra), K(ret)); } else { const ObChunkDatumStore::StoredRow *storted_row = NULL; - ObJsonObject json_object(&tmp_alloc); ObObj tmp_obj[col_num]; bool is_bool = false; if (OB_FAIL(extra->get_bool_mark(1, is_bool))) { LOG_WARN("get_bool info failed, may not distinguish between bool and int", K(ret)); } + ObBinAggSerializer bin_agg(&res_alloc, AGG_JSON, static_cast(ObJsonNodeType::J_OBJECT), false, &res_alloc_back, &res_alloc_arr); while (OB_SUCC(ret) && OB_SUCC(extra->get_next_row(storted_row))) { if (OB_ISNULL(storted_row)) { ret = OB_ERR_UNEXPECTED; @@ -7253,12 +7290,19 @@ int ObAggregateProcessor::get_json_objectagg_result(const ObAggrInfo &aggr_info, && (ObCharset::charset_type_by_coll(cs_type1) != CHARSET_UTF8MB4)) { ObString origin_str = converted_datum.get_string(); ObString converted_str; - if (OB_FAIL(ObExprUtil::convert_string_collation(origin_str, cs_type1, converted_str, + if (OB_FAIL(sql::ObTextStringHelper::read_real_string_data(&tmp_alloc, + val_type1, + cs_type1, + tmp_obj->has_lob_header(), + origin_str))) { + LOG_WARN("fail to get real data.", K(ret), K(origin_str)); + } else if (OB_FAIL(ObExprUtil::convert_string_collation(origin_str, cs_type1, converted_str, CS_TYPE_UTF8MB4_BIN, tmp_alloc))) { LOG_WARN("convert string collation failed", K(ret), K(cs_type1), K(origin_str.length())); } else { converted_datum.set_string(converted_str); cs_type1 = CS_TYPE_UTF8MB4_BIN; + has_lob_header1 = false; } } @@ -7271,12 +7315,15 @@ int ObAggregateProcessor::get_json_objectagg_result(const ObAggrInfo &aggr_info, } else { ObJsonBoolean *bool_node = (ObJsonBoolean*)new(json_node_buf)ObJsonBoolean(storted_row->cells()[1].get_bool()); json_val = bool_node; + if (OB_FAIL(ObJsonBaseFactory::transform(&tmp_alloc, json_val, ObJsonInType::JSON_BIN, json_val))) { + LOG_WARN("fail to transform to tree", K(ret)); + } } } else if (ObJsonExprHelper::is_convertible_to_json(val_type1)) { if (OB_FAIL(ObJsonExprHelper::transform_convertible_2jsonBase(converted_datum, val_type1, &tmp_alloc, cs_type1, - json_val, false, - has_lob_header1, true))) { + json_val, ObConv2JsonParam(true, + has_lob_header1, true)))) { LOG_WARN("failed: parse value to jsonBase", K(ret), K(val_type1)); } } else { @@ -7284,19 +7331,27 @@ int ObAggregateProcessor::get_json_objectagg_result(const ObAggrInfo &aggr_info, &tmp_alloc, scale1, eval_ctx_.exec_ctx_.get_my_session()->get_timezone_info(), eval_ctx_.exec_ctx_.get_my_session(), - json_val, false))) { + json_val, true))) { LOG_WARN("failed: parse value to jsonBase", K(ret), K(val_type1)); } } + ObJsonBin *jb_node = nullptr; + if (OB_FAIL(ret)) { - } else if (OB_FAIL(json_object.add(key_data, static_cast(json_val), false, true, false))) { - LOG_WARN("failed: json object add json value", K(ret)); - } else if (json_object.get_serialize_size() > OB_MAX_PACKET_LENGTH) { + } else if (OB_ISNULL(jb_node = static_cast(json_val))) { + ret = OB_ERR_UNDEFINED; + LOG_WARN("get binary null", K(ret)); + } else if (OB_FAIL(bin_agg.append_key_and_value(key_data, value, jb_node))) { + LOG_WARN("failed to append key and value", K(ret), K(key_data)); + } else if (bin_agg.get_approximate_length() > OB_MAX_PACKET_LENGTH * 2) { ret = OB_ERR_TOO_LONG_STRING_IN_CONCAT; - LOG_WARN("result of json_objectagg is too long", K(ret), K(json_object.get_serialize_size()), + LOG_WARN("result of json_arrayagg is too long", K(ret), K(bin_agg.get_approximate_length()), K(OB_MAX_PACKET_LENGTH)); + } else { + tmp_alloc.reset(); } + } } } @@ -7308,18 +7363,17 @@ int ObAggregateProcessor::get_json_objectagg_result(const ObAggrInfo &aggr_info, LOG_WARN("fail to get next row", K(ret)); } else { ret = OB_SUCCESS; - ObString str; - json_object.stable_sort(); - json_object.unique(); + bin_agg.set_sort_and_unique(); // output res - if (OB_FAIL(json_object.get_raw_binary(str, &aggr_alloc_))) { - LOG_WARN("get result binary failed", K(ret)); + if (OB_FAIL(bin_agg.serialize())) { + LOG_WARN("failed to serialize bin agg.", K(ret)); } else { + ObStringBuffer *buff = bin_agg.get_buffer(); ObTextStringDatumResult text_result(ObJsonType, aggr_info.expr_->obj_meta_.has_lob_header(), &concat_result); - if (OB_FAIL(text_result.init(str.length(), &aggr_alloc_))) { + if (OB_FAIL(text_result.init(buff->length(), &aggr_alloc_))) { LOG_WARN("init lob result failed"); - } else if (OB_FAIL(text_result.append(str.ptr(), str.length()))) { - LOG_WARN("failed to append realdata", K(ret), K(str), K(text_result)); + } else if (OB_FAIL(text_result.append(buff->ptr(), buff->length()))) { + LOG_WARN("failed to append realdata", K(ret), K(buff), K(text_result)); } else { text_result.set_result(); } @@ -7334,7 +7388,7 @@ int ObAggregateProcessor::get_ora_xmlagg_result(const ObAggrInfo &aggr_info, ObDatum &concat_result) { int ret = OB_SUCCESS; -#ifdef OB_BUILD_ORACLE_XML +#ifdef OB_BUILD_ORACLE_PL ObString result; common::ObArenaAllocator tmp_alloc(ObModIds::OB_SQL_AGGR_FUNC, OB_MALLOC_NORMAL_BLOCK_SIZE, MTL_ID()); ObXmlDocument *content = NULL; @@ -7374,6 +7428,7 @@ int ObAggregateProcessor::get_ora_xmlagg_result(const ObAggrInfo &aggr_info, int64_t buf_len = 0; bool deal_special = false; int64_t add_time = 0; + ObBinAggSerializer bin_agg(xml_mem_ctx->allocator_, ObBinAggType::AGG_XML, static_cast(M_CONTENT), true); while (OB_SUCC(ret) && OB_SUCC(extra->get_next_row(stored_row))) { if (OB_ISNULL(stored_row)) { @@ -7479,7 +7534,8 @@ int ObAggregateProcessor::get_ora_xmlagg_result(const ObAggrInfo &aggr_info, } } } - } else { + } else if (row_count == 1 && node_type == ObMulModeNodeType::M_DOCUMENT) { + deal_special = true; if (OB_FAIL(deep_copy_ob_string(tmp_alloc, cell_string, dup_str))) { LOG_WARN("fail copy string", K(ret), K(cell_string.length())); } else if (OB_FAIL(common::ObMulModeFactory::get_xml_base(xml_mem_ctx, dup_str, @@ -7510,10 +7566,41 @@ int ObAggregateProcessor::get_ora_xmlagg_result(const ObAggrInfo &aggr_info, } } + ObString res_str; + if (OB_SUCC(ret) && OB_FAIL(ObXMLExprHelper::pack_xml_res(*aggr_info.expr_, eval_ctx_, concat_result, content, xml_mem_ctx, + M_DOCUMENT, res_str))) { + LOG_WARN("pack document failed", K(ret)); + } + } else { + ObXmlBin *bin = nullptr; add_time++; - if (OB_SUCC(ret) && content->get_serialize_size() > OB_MAX_PACKET_LENGTH) { + ObXmlBin extend(xml_mem_ctx); + ObIMulModeBase *input_base = nullptr; + if (OB_FAIL(ObMulModeFactory::get_xml_base(xml_mem_ctx, cell_string, + ObNodeMemType::BINARY_TYPE, + ObNodeMemType::BINARY_TYPE, + input_base))) { + LOG_WARN("fail to get xml base", K(ret), K(dup_str)); + } else if (OB_ISNULL(bin = static_cast(input_base))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get bin null", K(ret)); + } else if (bin->check_extend()) { + if (OB_FAIL(bin->merge_extend(extend))) { + LOG_WARN("fail to merge", K(ret)); + } else { + bin = &extend; + } + } + + if (OB_FAIL(ret)) { + } else if (OB_FAIL(bin_agg.append_key_and_value(bin))) { + LOG_WARN("append binary failed", K(ret)); + } else if (bin_agg.get_approximate_length() > OB_MAX_PACKET_LENGTH * 2) { ret = OB_ERR_TOO_LONG_STRING_IN_CONCAT; - LOG_WARN("result of xmlagg is too long", K(ret), K(add_time), K(content->get_serialize_size()), K(OB_MAX_PACKET_LENGTH)); + LOG_WARN("result of json_arrayagg is too long", K(ret), K(bin_agg.get_approximate_length()), + K(OB_MAX_PACKET_LENGTH)); + } else if (!is_unparsed) { + is_unparsed = bin->get_unparse(); } } } // end if @@ -7523,18 +7610,16 @@ int ObAggregateProcessor::get_ora_xmlagg_result(const ObAggrInfo &aggr_info, } else { ret = OB_SUCCESS; if (!deal_special) { - ObString res_str; - ObMulModeNodeType target_type = M_MAX_TYPE; - if (node_type == M_DOCUMENT && row_count == 1) { - target_type = M_DOCUMENT; - } else if (is_unparsed) { - target_type = M_UNPARSED; + bin_agg.set_header_type(is_unparsed ? M_UNPARSED : M_CONTENT); + if (OB_FAIL(bin_agg.serialize())) { + LOG_WARN("fail to serialize aggregate binary", K(ret)); + } else if (bin_agg.get_buffer()->length() > OB_MAX_PACKET_LENGTH) { + ret = OB_ERR_TOO_LONG_STRING_IN_CONCAT; + LOG_WARN("result of xmlagg is too long", K(ret), K(add_time), K(content->get_serialize_size()), K(OB_MAX_PACKET_LENGTH)); + } else if (OB_FAIL(ObXMLExprHelper::pack_binary_res(*aggr_info.expr_, eval_ctx_, bin_agg.get_buffer()->string(), blob_locator))) { + LOG_WARN("pack binary res failed", K(ret)); } else { - target_type = M_CONTENT; - } - if (OB_FAIL(ObXMLExprHelper::pack_xml_res(*aggr_info.expr_, eval_ctx_, concat_result, content, xml_mem_ctx, - target_type, res_str))) { - LOG_WARN("pack document failed", K(ret)); + concat_result.set_string(blob_locator.ptr(), blob_locator.length()); } } } @@ -7565,6 +7650,10 @@ int ObAggregateProcessor::get_ora_json_objectagg_result(const ObAggrInfo &aggr_i { int ret = OB_SUCCESS; common::ObArenaAllocator tmp_alloc(ObModIds::OB_SQL_AGGR_FUNC, OB_MALLOC_NORMAL_BLOCK_SIZE, MTL_ID()); + common::ObArenaAllocator res_alloc(ObModIds::OB_SQL_AGGR_FUNC, OB_MALLOC_NORMAL_BLOCK_SIZE, MTL_ID()); + common::ObArenaAllocator res_alloc_back(ObModIds::OB_SQL_AGGR_FUNC, OB_MALLOC_NORMAL_BLOCK_SIZE, MTL_ID()); + common::ObArenaAllocator res_alloc_arr(ObModIds::OB_SQL_AGGR_FUNC, OB_MALLOC_NORMAL_BLOCK_SIZE, MTL_ID()); + ObStringBuffer value(&res_alloc_arr); if (OB_ISNULL(extra) || OB_UNLIKELY(extra->empty())) { ret = OB_ERR_UNEXPECTED; @@ -7587,7 +7676,7 @@ int ObAggregateProcessor::get_ora_json_objectagg_result(const ObAggrInfo &aggr_i bool is_strict = aggr_info.strict_json_; bool is_with_unique_keys = aggr_info.with_unique_keys_; - ObJsonObject j_obj(&tmp_alloc); + ObBinAggSerializer bin_agg(&res_alloc, AGG_JSON, static_cast(ObJsonNodeType::J_OBJECT), false, &res_alloc_back, &res_alloc_arr); while (OB_SUCC(ret) && OB_SUCC(extra->get_next_row(storted_row))) { if (OB_ISNULL(storted_row) || storted_row->cnt_ < 2) { @@ -7631,16 +7720,25 @@ int ObAggregateProcessor::get_ora_json_objectagg_result(const ObAggrInfo &aggr_i } else if (!need_key_string_convert && OB_FAIL(deep_copy_ob_string(tmp_alloc, key_string, key_string))) { LOG_WARN("fail to deep copy string.", K(ret), K(key_string)); } else if (OB_FAIL(ObJsonExprHelper::oracle_datum2_json_val(&datum_value, meta_value, &tmp_alloc, - eval_ctx_.exec_ctx_.get_my_session(), json_val, false, is_format_json, is_strict, false))) { + eval_ctx_.exec_ctx_.get_my_session(), json_val, false, is_format_json, is_strict, true))) { LOG_WARN("failed to eval json val node.", K(ret), K(is_format_json), K(is_strict), K(meta_value)); } else if (is_absent_on_null && json_val->json_type() == ObJsonNodeType::J_NULL) { // do nothing , continue - } else if (OB_FAIL(j_obj.add(key_string, static_cast(json_val), false, true, false))) { - LOG_WARN("failed to append array node", K(ret), K(j_obj.element_count())); - } else if (j_obj.get_serialize_size() > OB_MAX_PACKET_LENGTH) { - ret = OB_ERR_TOO_LONG_STRING_IN_CONCAT; - LOG_WARN("result of json_objectagg is too long", K(ret), - K(j_obj.get_serialize_size()), K(OB_MAX_PACKET_LENGTH)); + } else { + ObJsonBin *jb_node = nullptr; + if (OB_ISNULL(jb_node = static_cast(json_val))) { + ret = OB_ERR_UNDEFINED; + LOG_WARN("get binary null", K(ret)); + } else if (OB_FAIL(bin_agg.append_key_and_value(key_string, value, jb_node))) { + LOG_WARN("failed to append key and value", K(ret), K(key_string)); + } else if (bin_agg.get_approximate_length() > OB_MAX_PACKET_LENGTH * 2) { + ret = OB_ERR_TOO_LONG_STRING_IN_CONCAT; + LOG_WARN("result of json_arrayagg is too long", K(ret), K(bin_agg.get_approximate_length()), + K(OB_MAX_PACKET_LENGTH)); + } else { + tmp_alloc.reset(); + } + } } } @@ -7657,7 +7755,7 @@ int ObAggregateProcessor::get_ora_json_objectagg_result(const ObAggrInfo &aggr_i ObObjType rsp_type = static_cast(parse_node.int16_values_[OB_NODE_CAST_TYPE_IDX]); ObCollationType rsp_cs_type = static_cast(parse_node.int16_values_[OB_NODE_CAST_COLL_IDX]); int32_t rsp_len = parse_node.int32_values_[OB_NODE_CAST_C_LEN_IDX]; - int64_t result_likely_size = j_obj.get_serialize_size(); + //int64_t result_likely_size = j_obj.get_serialize_size(); if (!parse_node.value_) { is_default_type = true; @@ -7667,23 +7765,31 @@ int ObAggregateProcessor::get_ora_json_objectagg_result(const ObAggrInfo &aggr_i } ObJsonBuffer string_buffer(&tmp_alloc); + ObStringBuffer *buff = nullptr; - uint64_t unsorted_count = j_obj.element_count(); if (is_strict || (ob_is_json(rsp_type) && !is_default_type) || is_with_unique_keys) { - j_obj.stable_sort(); - j_obj.unique(); + bin_agg.set_sort_and_unique(); } - uint64_t sorted_count = j_obj.element_count(); - if ((is_with_unique_keys || ob_is_json(rsp_type) || is_strict) - && (unsorted_count > sorted_count)) { + if (OB_FAIL(bin_agg.serialize())) { + LOG_WARN("failed to serialize bin agg.", K(ret)); + } else if ((is_with_unique_keys || ob_is_json(rsp_type) || is_strict) + && (bin_agg.get_key_info_count() > bin_agg.get_last_count())) { ret = OB_ERR_DUPLICATE_KEY; LOG_WARN("duplicate key", K(ret)); + } else if (OB_FALSE_IT(buff = bin_agg.get_buffer())) { } else if (ob_is_string_type(rsp_type) || ob_is_raw(rsp_type)) { - if (OB_FAIL(string_buffer.reserve(result_likely_size))) { - LOG_WARN("fail to reserve string.", K(ret), K(result_likely_size)); - } else if (OB_FAIL(j_obj.print(string_buffer, true, false))) { + ObIJsonBase *j_base = NULL; + if (OB_FAIL(string_buffer.reserve(buff->length()))) { + LOG_WARN("fail to reserve string.", K(ret), K(buff->length())); + } else if (OB_FAIL(ObJsonBaseFactory::get_json_base(&tmp_alloc, + buff->string(), + ObJsonInType::JSON_BIN, + ObJsonInType::JSON_BIN, + j_base))) { + LOG_WARN("fail to get real data.", K(ret), K(buff)); + } else if (OB_FAIL(j_base->print(string_buffer, true, false))) { LOG_WARN("failed: get json string text", K(ret)); } else if (rsp_type == ObVarcharType && string_buffer.length() > rsp_len) { char res_ptr[OB_MAX_DECIMAL_PRECISION] = {0}; @@ -7697,10 +7803,7 @@ int ObAggregateProcessor::get_ora_json_objectagg_result(const ObAggrInfo &aggr_i LOG_WARN("fail to pack res result.", K(ret)); } } else if (ob_is_json(rsp_type)) { - ObString raw_binary_str; - if (OB_FAIL(j_obj.get_raw_binary(raw_binary_str, &tmp_alloc))) { - LOG_WARN("get result binary failed", K(ret)); - } else if (OB_FAIL(ObJsonExprHelper::pack_json_str_res(*aggr_info.expr_, eval_ctx_, concat_result, raw_binary_str, &aggr_alloc_))) { + if (OB_FAIL(ObJsonExprHelper::pack_json_str_res(*aggr_info.expr_, eval_ctx_, concat_result, buff->string(), &aggr_alloc_))) { LOG_WARN("fail to pack res result.", K(ret)); } } else { diff --git a/src/sql/engine/basic/ob_function_table_op.cpp b/src/sql/engine/basic/ob_function_table_op.cpp index bea72f38b6..441795af65 100644 --- a/src/sql/engine/basic/ob_function_table_op.cpp +++ b/src/sql/engine/basic/ob_function_table_op.cpp @@ -173,7 +173,10 @@ int ObFunctionTableOp::inner_get_next_row_udf() pl::ObPLRecord *record = NULL; ObObj record_obj; OZ (get_current_result(record_obj)); - if (OB_SUCC(ret) && record_obj.is_pl_extend()) { + if (OB_FAIL(ret)) { + } else if (ObUserDefinedSQLType == record_obj.get_type()) { + obj_stack[0] = record_obj; + } else if (OB_SUCC(ret) && record_obj.is_pl_extend()) { CK (OB_NOT_NULL(composite = reinterpret_cast(record_obj.get_ext()))); CK (composite->is_record()); OX (record = static_cast(composite)); diff --git a/src/sql/engine/basic/ob_json_table_op.cpp b/src/sql/engine/basic/ob_json_table_op.cpp index e344607546..1665924fe8 100644 --- a/src/sql/engine/basic/ob_json_table_op.cpp +++ b/src/sql/engine/basic/ob_json_table_op.cpp @@ -15,12 +15,18 @@ #include "ob_json_table_op.h" #include "share/object/ob_obj_cast_util.h" #include "share/object/ob_obj_cast.h" +#include "share/ob_json_access_utils.h" #include "common/sql_mode/ob_sql_mode_utils.h" #include "sql/ob_sql_utils.h" #include "sql/engine/expr/ob_datum_cast.h" #include "sql/session/ob_sql_session_info.h" #include "sql/engine/ob_physical_plan.h" #include "sql/engine/expr/ob_expr_json_func_helper.h" +#include "sql/engine/expr/ob_expr_json_value.h" +#include "sql/engine/expr/ob_expr_json_query.h" +#include "sql/engine/expr/ob_expr_json_exists.h" +#include "lib/xml/ob_binary_aggregate.h" +#include "lib/xml/ob_xpath.h" namespace oceanbase @@ -64,2497 +70,6 @@ const static int32_t JSN_EXIST_DEFAULT = 3; } \ } -int JtFuncHelpler::cast_to_int(ObIJsonBase *j_base, ObObjType dst_type, int64_t &val) -{ - INIT_SUCC(ret); - - if (OB_ISNULL(j_base)) { - ret = OB_ERR_NULL_VALUE; - LOG_WARN("json base is null", K(ret)); - } else if (OB_FAIL(j_base->to_int(val, true))) { - ret = OB_OPERATE_OVERFLOW; - LOG_USER_ERROR(OB_OPERATE_OVERFLOW, "SIGNED", "json_table"); - LOG_WARN("cast to int failed", K(ret), K(*j_base)); - } else if (dst_type < ObIntType && - OB_FAIL(int_range_check(dst_type, val, val))) { - ret = OB_OPERATE_OVERFLOW; - LOG_USER_ERROR(OB_OPERATE_OVERFLOW, "SIGNED", "json_table"); - } - - return ret; -} - -int JtFuncHelpler::cast_to_uint(ObIJsonBase *j_base, ObObjType dst_type, uint64_t &val) -{ - INIT_SUCC(ret); - - if (OB_ISNULL(j_base)) { - ret = OB_ERR_NULL_VALUE; - LOG_WARN("json base is null", K(ret)); - } else if (OB_FAIL(j_base->to_uint(val, true, true))) { - LOG_WARN("cast to uint failed", K(ret), K(*j_base)); - if (ret == OB_OPERATE_OVERFLOW) { - LOG_USER_ERROR(OB_OPERATE_OVERFLOW, "UNSIGNED", "json_table"); - } - } else if (dst_type < ObUInt64Type && - OB_FAIL(uint_upper_check(dst_type, val))) { - LOG_WARN("uint_upper_check failed", K(ret)); - } - - return ret; -} - -int JtFuncHelpler::number_range_check(const ObAccuracy &accuracy, - ObIAllocator *allocator, - number::ObNumber &val, - bool strict) -{ - INIT_SUCC(ret); - ObPrecision precision = accuracy.get_precision(); - ObScale scale = accuracy.get_scale(); - const number::ObNumber *min_check_num = NULL; - const number::ObNumber *max_check_num = NULL; - const number::ObNumber *min_num_mysql = NULL; - const number::ObNumber *max_num_mysql = NULL; - bool is_finish = false; - if (lib::is_oracle_mode()) { - if (OB_MAX_NUMBER_PRECISION >= precision - && precision >= OB_MIN_NUMBER_PRECISION - && number::ObNumber::MAX_SCALE >= scale - && scale >= number::ObNumber::MIN_SCALE) { - min_check_num = &(ObNumberConstValue::ORACLE_CHECK_MIN[precision][scale + ObNumberConstValue::MAX_ORACLE_SCALE_DELTA]); - max_check_num = &(ObNumberConstValue::ORACLE_CHECK_MAX[precision][scale + ObNumberConstValue::MAX_ORACLE_SCALE_DELTA]); - } else if (ORA_NUMBER_SCALE_UNKNOWN_YET == scale - && PRECISION_UNKNOWN_YET == precision) { - is_finish = true; - } else if (PRECISION_UNKNOWN_YET == precision - && number::ObNumber::MAX_SCALE >= scale - && scale >= number::ObNumber::MIN_SCALE) { - number::ObNumber num; - if (OB_FAIL(num.from(val, *allocator))) { - } else if (OB_FAIL(num.round(scale))) { - } else if (val.compare(num) != 0) { - ret = OB_OPERATE_OVERFLOW; - LOG_WARN("input value is out of range.", K(scale), K(val)); - } else { - is_finish = true; - } - } else { - ret = OB_INVALID_ARGUMENT; - LOG_WARN("invalid arguments", K(ret), K(precision), K(scale)); - } - } else { - if (OB_UNLIKELY(precision < scale)) { - ret = OB_ERR_M_BIGGER_THAN_D; - LOG_WARN("Invalid accuracy.", K(ret), K(scale), K(precision)); - } else if (number::ObNumber::MAX_PRECISION >= precision - && precision >= OB_MIN_DECIMAL_PRECISION - && number::ObNumber::MAX_SCALE >= scale - && scale >= 0) { - min_check_num = &(ObNumberConstValue::MYSQL_CHECK_MIN[precision][scale]); - max_check_num = &(ObNumberConstValue::MYSQL_CHECK_MAX[precision][scale]); - min_num_mysql = &(ObNumberConstValue::MYSQL_MIN[precision][scale]); - max_num_mysql = &(ObNumberConstValue::MYSQL_MAX[precision][scale]); - } else { - ret = OB_INVALID_ARGUMENT; - LOG_WARN("invalid arguments", K(ret), K(precision), K(scale)); - } - } - if (OB_SUCC(ret) && !is_finish) { - if (OB_ISNULL(min_check_num) || OB_ISNULL(max_check_num) - || (!lib::is_oracle_mode() - && (OB_ISNULL(min_num_mysql) || OB_ISNULL(max_num_mysql)))) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("min_num or max_num is null", K(ret), KPC(min_check_num), KPC(max_check_num)); - } else if (val <= *min_check_num) { - if (lib::is_oracle_mode()) { - ret = OB_ERR_VALUE_LARGER_THAN_ALLOWED; - } else { - ret = OB_DATA_OUT_OF_RANGE; - } - LOG_WARN("val is out of min range check.", K(val), K(*min_check_num)); - is_finish = true; - } else if (val >= *max_check_num) { - if (lib::is_oracle_mode()) { - ret = OB_ERR_VALUE_LARGER_THAN_ALLOWED; - } else { - ret = OB_DATA_OUT_OF_RANGE; - } - LOG_WARN("val is out of max range check.", K(val), K(*max_check_num)); - is_finish = true; - } else { - ObNumStackOnceAlloc tmp_alloc; - number::ObNumber num; - if (OB_FAIL(num.from(val, tmp_alloc))) { - } else if (OB_FAIL(num.round(scale))) { - LOG_WARN("num.round failed", K(ret), K(scale)); - } else { - if (strict) { - if (num.compare(val) != 0) { - ret = OB_OPERATE_OVERFLOW; - LOG_WARN("input value is out of range.", K(scale), K(val)); - } else { - is_finish = true; - } - } else { - if (OB_ISNULL(allocator)) { - ret = OB_ERR_NULL_VALUE; - LOG_WARN("allocator is null", K(ret)); - } else if (OB_FAIL(val.deep_copy_v3(num, *allocator))) { - LOG_WARN("val.deep_copy_v3 failed", K(ret), K(num)); - } else { - is_finish = true; - } - } - } - } - } - if (OB_SUCC(ret) && !is_finish) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("unexpected situation, res is not set", K(ret)); - } - LOG_DEBUG("number_range_check_v2 done", K(ret), K(is_finish), K(accuracy), K(val), - KPC(min_check_num), KPC(max_check_num)); - - return ret; -} - -int JtFuncHelpler::datetime_scale_check(const ObAccuracy &accuracy, int64_t &value, bool strict) -{ - INIT_SUCC(ret); - ObScale scale = accuracy.get_scale(); - - if (OB_UNLIKELY(scale > MAX_SCALE_FOR_TEMPORAL)) { - ret = OB_ERR_TOO_BIG_PRECISION; - LOG_USER_ERROR(OB_ERR_TOO_BIG_PRECISION, scale, "CAST", - static_cast(MAX_SCALE_FOR_TEMPORAL)); - } else if (OB_UNLIKELY(0 <= scale && scale < MAX_SCALE_FOR_TEMPORAL)) { - // first check zero - if (strict && - (value == ObTimeConverter::ZERO_DATE || - value == ObTimeConverter::ZERO_DATETIME)) { - ret = OB_INVALID_DATE_VALUE; - LOG_WARN("Zero datetime is invalid in json_value.", K(value)); - } else { - int64_t temp_value = value; - ObTimeConverter::round_datetime(scale, temp_value); - if (strict && temp_value != value) { - ret = OB_OPERATE_OVERFLOW; - LOG_WARN("Invalid input value.", K(value), K(scale)); - } else if (ObTimeConverter::is_valid_datetime(temp_value)) { - value = temp_value; - } else { - ret = OB_ERR_NULL_VALUE; // set null for res - LOG_DEBUG("Invalid datetime val, return set_null", K(temp_value)); - } - } - } - - return ret; -} - -int JtFuncHelpler::time_scale_check(const ObAccuracy &accuracy, int64_t &value, bool strict) -{ - INIT_SUCC(ret); - ObScale scale = accuracy.get_scale(); - - if (OB_LIKELY(0 <= scale && scale < MAX_SCALE_FOR_TEMPORAL)) { - int64_t temp_value = value; - ObTimeConverter::round_datetime(scale, temp_value); - if (strict && temp_value != value) { // round success - ret = OB_OPERATE_OVERFLOW; - LOG_WARN("Invalid input value.", K(value), K(scale)); - } else { - value = temp_value; - } - } else { - // consistent with cast process do nothing - } - - return ret; -} - -// padding %padding_cnt character, we also need to convert collation type here. -// eg: select cast('abc' as nchar(100)) from dual; -// the space must be in utf16, because dst_type is nchar -int JtFuncHelpler::padding_char_for_cast(int64_t padding_cnt, const ObCollationType &padding_cs_type, - ObIAllocator &alloc, ObString &padding_res) -{ - int ret = OB_SUCCESS; - padding_res.reset(); - const ObCharsetType &cs = ObCharset::charset_type_by_coll(padding_cs_type); - char padding_char = (CHARSET_BINARY == cs) ? OB_PADDING_BINARY : OB_PADDING_CHAR; - int64_t padding_str_size = sizeof(padding_char) * padding_cnt; - char *padding_str_ptr = reinterpret_cast(alloc.alloc(padding_str_size)); - if (OB_ISNULL(padding_str_ptr)) { - ret = OB_ALLOCATE_MEMORY_FAILED; - LOG_WARN("alloc memory failed", K(ret)); - } else if (CHARSET_BINARY == cs) { - MEMSET(padding_str_ptr, padding_char, padding_str_size); - padding_res.assign_ptr(padding_str_ptr, padding_str_size); - } else { - MEMSET(padding_str_ptr, padding_char, padding_str_size); - ObString padding_str(padding_str_size, padding_str_ptr); - if (OB_FAIL(ObExprUtil::convert_string_collation(padding_str, - ObCharset::get_system_collation(), - padding_res, - padding_cs_type, - alloc))) { - LOG_WARN("convert padding str collation faield", K(ret), K(padding_str), - K(padding_cs_type)); - } - } - LOG_DEBUG("pad char done", K(ret), K(padding_cnt), K(padding_cs_type), K(padding_res)); - return ret; -} - -int JtFuncHelpler::cast_to_string(JtColNode* node, - common::ObIAllocator *allocator, - ObIJsonBase *j_base, - ObCollationType in_cs_type, - ObCollationType dst_cs_type, - common::ObAccuracy &accuracy, - ObObjType dst_type, - ObString &val, - bool is_trunc, - bool is_quote, - bool is_const) -{ - INIT_SUCC(ret); - - if (OB_ISNULL(j_base)) { - ret = OB_ERR_NULL_VALUE; - LOG_WARN("json base is null", K(ret)); - } else if (OB_ISNULL(allocator)) { - ret = OB_ERR_NULL_VALUE; - LOG_WARN("allocator is null", K(ret)); - } else { - ObJsonBuffer j_buf(allocator); - if (OB_FAIL(j_base->print(j_buf, is_quote))) { - LOG_WARN("fail to_string as json", K(ret)); - } else { - ObObjType in_type = ObLongTextType; - ObString temp_str_val(j_buf.length(), j_buf.ptr()); - bool is_need_string_string_convert = ((CS_TYPE_BINARY == dst_cs_type) || - (ObCharset::charset_type_by_coll(in_cs_type) != - ObCharset::charset_type_by_coll(dst_cs_type))) - && !(lib::is_mysql_mode() && temp_str_val.length() == 0); - if (is_need_string_string_convert) { - if (CS_TYPE_BINARY != in_cs_type - && CS_TYPE_BINARY != dst_cs_type - && (ObCharset::charset_type_by_coll(in_cs_type) != - ObCharset::charset_type_by_coll(dst_cs_type))) { - char *buf = NULL; - const int64_t factor = 2; - int64_t buf_len = temp_str_val.length() * factor; - uint32_t result_len = 0; - buf = static_cast(allocator->alloc(buf_len)); - if (OB_ISNULL(buf)) { - ret = OB_ALLOCATE_MEMORY_FAILED; - LOG_WARN("alloc memory failed", K(ret)); - } else if (OB_FAIL(ObCharset::charset_convert(in_cs_type, temp_str_val.ptr(), - temp_str_val.length(), dst_cs_type, buf, - buf_len, result_len))) { - LOG_WARN("charset convert failed", K(ret)); - } else { - val.assign_ptr(buf, result_len); - } - } else { - if (CS_TYPE_BINARY == in_cs_type || CS_TYPE_BINARY == dst_cs_type) { - // just copy string when in_cs_type or out_cs_type is binary - const ObCharsetInfo *cs = NULL; - int64_t align_offset = 0; - if (CS_TYPE_BINARY == in_cs_type && (NULL != (cs = ObCharset::get_charset(dst_cs_type)))) { - if (cs->mbminlen > 0 && temp_str_val.length() % cs->mbminlen != 0) { - align_offset = cs->mbminlen - temp_str_val.length() % cs->mbminlen; - } - } - int64_t len = align_offset + temp_str_val.length(); - char *buf = reinterpret_cast(allocator->alloc(len)); - if (OB_ISNULL(buf)) { - ret = OB_ALLOCATE_MEMORY_FAILED; - LOG_WARN("allocate memory failed", K(ret)); - } else { - MEMMOVE(buf + align_offset, temp_str_val.ptr(), len - align_offset); - MEMSET(buf, 0, align_offset); - val.assign_ptr(buf, len); - } - } else { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("same charset should not be here, just use cast_eval_arg", K(ret), - K(in_type), K(dst_type), K(in_cs_type), K(dst_cs_type)); - } - } - } else { - val.assign_ptr(temp_str_val.ptr(), temp_str_val.length()); - } - - int32_t str_len_char; - ObLength max_accuracy_len; - if (lib::is_mysql_mode()) { - str_len_char = static_cast(ObCharset::strlen_char(dst_cs_type, val.ptr(), val.length())); - max_accuracy_len = (ob_obj_type_class(dst_type) == ObTextTC - || ob_obj_type_class(dst_type) == ObJsonTC) - ? ObAccuracy::DDL_DEFAULT_ACCURACY[dst_type].get_length() - : accuracy.get_length(); - if (OB_SUCC(ret)) { - if (max_accuracy_len == DEFAULT_STR_LENGTH) { // default string len - } else if (max_accuracy_len <= 0 || str_len_char > max_accuracy_len) { - ret = OB_OPERATE_OVERFLOW; - LOG_USER_ERROR(OB_OPERATE_OVERFLOW, "column", "json_table"); - LOG_WARN("length oversize", K(ret), K(str_len_char), K(max_accuracy_len)); - } - } - if (OB_SUCC(ret) && ObCharType == dst_type && CS_TYPE_BINARY == dst_cs_type) { // binary need padding - int64_t text_length = val.length(); - if (max_accuracy_len > text_length) { - int64_t padding_cnt = max_accuracy_len - text_length; - ObString padding_res; - if (OB_FAIL(JtFuncHelpler::padding_char_for_cast(padding_cnt, dst_cs_type, *allocator, - padding_res))) { - LOG_WARN("padding char failed", K(ret), K(padding_cnt), K(dst_cs_type)); - } else { - int64_t padding_size = padding_res.length() + val.length(); - char *buf = reinterpret_cast(allocator->alloc(padding_size)); - if (OB_ISNULL(buf)) { - ret = OB_ALLOCATE_MEMORY_FAILED; - LOG_WARN("allocate memory failed", K(ret)); - } else { - MEMMOVE(buf, val.ptr(), val.length()); - MEMMOVE(buf + val.length(), padding_res.ptr(), padding_res.length()); - val.assign_ptr(buf, padding_size); - } - } - } - } - } else { - ObLengthSemantics senmactics = node->col_info_.data_type_.get_length_semantics(); - // do str length check - str_len_char = static_cast(ObCharset::strlen_char( - senmactics == LS_BYTE ? CS_TYPE_BINARY : dst_cs_type, val.ptr(), val.length())); - max_accuracy_len = (dst_type == ObLongTextType) ? OB_MAX_LONGTEXT_LENGTH : accuracy.get_length(); - max_accuracy_len *= (senmactics == LS_BYTE ? 1 : 2); - - uint32_t byte_len = 0; - byte_len = ObCharset::charpos(senmactics == LS_BYTE ? CS_TYPE_BINARY : dst_cs_type, val.ptr(), str_len_char, max_accuracy_len); - - if (OB_SUCC(ret)) { - if (max_accuracy_len == DEFAULT_STR_LENGTH) { // default string len - } else if (is_trunc && max_accuracy_len < str_len_char) { - if (!is_const && - (node->col_info_.col_type_ == static_cast(COL_TYPE_EXISTS) - || j_base->json_type() == ObJsonNodeType::J_INT - || j_base->json_type() == ObJsonNodeType::J_UINT - || j_base->json_type() == ObJsonNodeType::J_BOOLEAN - || j_base->json_type() == ObJsonNodeType::J_DOUBLE - || j_base->json_type() == ObJsonNodeType::J_DECIMAL)) { - ret = OB_ERR_VALUE_EXCEEDED_MAX; - } else { - // bugfix: - // Q1:SELECT c1 ,jt.ww b_c1 FROM t1, json_table ( c2 columns( ww varchar2(2 char) truncate path '$.a')) jt ; - // Q2:SELECT c1 ,jt.ww b_c1 FROM t1, json_table ( c2 columns( ww varchar2(2 byte) truncate path '$.a')) jt; - // should not split in the middle of char - if (byte_len == 0) { // value has zero length - val.assign_ptr("", 0); - } else if (senmactics == LS_BYTE && dst_cs_type != CS_TYPE_BINARY) { - int64_t char_len; // not used - // zero max_accuracy_len not allowed - byte_len = ObCharset::max_bytes_charpos(dst_cs_type, val.ptr(), str_len_char, max_accuracy_len, char_len); - if (byte_len == 0) { // buffer not enough for one bytes - ret = OB_OPERATE_OVERFLOW; - } else { - val.assign_ptr(val.ptr(), byte_len); - } - } else { - val.assign_ptr(val.ptr(), byte_len); - } - } - } else if (max_accuracy_len <= 0 || str_len_char > max_accuracy_len) { - ret = OB_OPERATE_OVERFLOW; - } - } - } - } - } - - return ret; -} - -bool JtFuncHelpler::type_cast_to_string(JtColNode* node, - ObString &json_string, - common::ObIAllocator *allocator, - ObIJsonBase *j_base, - ObAccuracy &accuracy) -{ - INIT_SUCC(ret); - ret = cast_to_string(node, allocator, j_base, CS_TYPE_BINARY, CS_TYPE_BINARY, accuracy, ObLongTextType, json_string); - return ret == 0 ? true : false; -} - -int JtFuncHelpler::cast_to_datetime(JtColNode* node, - ObIJsonBase *j_base, - common::ObIAllocator *allocator, - const ObBasicSessionInfo *session, - ObEvalCtx *ctx, - const ObExpr *expr, - common::ObAccuracy &accuracy, - int64_t &val) -{ - INIT_SUCC(ret); - ObString json_string; - if (OB_ISNULL(j_base)) { - ret = OB_ERR_NULL_VALUE; - LOG_WARN("json base is null", K(ret)); - } else if (OB_ISNULL(session)) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("session is NULL", K(ret)); - } else { - oceanbase::common::ObTimeConvertCtx cvrt_ctx(session->get_timezone_info(), false); - if (lib::is_oracle_mode()) { - if (OB_FAIL(common_get_nls_format(session, *ctx, expr, ObDateTimeType, - true, - cvrt_ctx.oracle_nls_format_))) { - LOG_WARN("common_get_nls_format failed", K(ret)); - } else if (type_cast_to_string(node, json_string, allocator, j_base, accuracy) && json_string.length() > 0) { - ObJsonString json_str(json_string.ptr(),json_string.length()); - if (OB_FAIL(json_str.to_datetime(val, &cvrt_ctx))) { - LOG_WARN("wrapper to datetime failed.", K(ret), K(*j_base)); - } - } else if (OB_FAIL(j_base->to_datetime(val, &cvrt_ctx))) { - LOG_WARN("wrapper to datetime failed.", K(ret), K(*j_base)); - } else if (OB_FAIL(datetime_scale_check(accuracy, val))) { - LOG_WARN("datetime_scale_check failed.", K(ret)); - } - } else { - if (OB_FAIL(j_base->to_datetime(val, &cvrt_ctx))) { - LOG_WARN("wrapper to datetime failed.", K(ret), K(*j_base)); - } else if (OB_FAIL(datetime_scale_check(accuracy, val))) { - LOG_WARN("datetime_scale_check failed.", K(ret)); - } - } - } - - return ret; -} - -int JtFuncHelpler::cast_to_otimstamp(ObIJsonBase *j_base, - const ObBasicSessionInfo *session, - ObEvalCtx *ctx, - const ObExpr *expr, - common::ObAccuracy &accuracy, - ObObjType dst_type, - ObOTimestampData &out_val) -{ - INIT_SUCC(ret); - int64_t val; - - oceanbase::common::ObTimeConvertCtx cvrt_ctx(NULL, dst_type == ObTimestampType); - if (OB_ISNULL(session)) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("session is NULL", K(ret)); - } else if (OB_ISNULL(j_base)) { - ret = OB_ERR_NULL_VALUE; - LOG_WARN("json base is null", K(ret)); - } else if (is_oracle_mode() && j_base->is_json_number(j_base->json_type())) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("can't cast to timestamps", K(ret)); - } else { - cvrt_ctx.tz_info_ = session->get_timezone_info(); - if (OB_FAIL(common_get_nls_format(session, *ctx, expr, ObDateTimeType, - true, - cvrt_ctx.oracle_nls_format_))) { - LOG_WARN("common_get_nls_format failed", K(ret)); - } - } - if (OB_FAIL(ret)) { - } else if (OB_FAIL(j_base->to_datetime(val, &cvrt_ctx))) { - LOG_WARN("wrapper to datetime failed.", K(ret), K(*j_base)); - } else if (dst_type == ObTimestampType) { - out_val.time_us_ = val; - out_val.time_ctx_.tail_nsec_ = 0; - } else { - if (OB_FAIL(ObTimeConverter::odate_to_otimestamp(val, cvrt_ctx.tz_info_, dst_type, out_val))) { - LOG_WARN("fail to timestamp_to_timestamp_tz", K(ret), K(val), K(dst_type)); - } - } - if (OB_SUCC(ret)) { - ObScale scale = accuracy.get_scale(); - if (OB_UNLIKELY(0 <= scale && scale < MAX_SCALE_FOR_ORACLE_TEMPORAL)) { - ObOTimestampData ot_data = ObTimeConverter::round_otimestamp(scale, out_val); - if (ObTimeConverter::is_valid_otimestamp(ot_data.time_us_, - static_cast(ot_data.time_ctx_.tail_nsec_))) { - out_val = ot_data; - } else { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("invalid otimestamp, set it null ", K(ot_data), K(scale), "orig_date", out_val); - } - } - } - - return ret; -} - -int JtFuncHelpler::cast_to_date(ObIJsonBase *j_base, int32_t &val) -{ - INIT_SUCC(ret); - - if (OB_ISNULL(j_base)) { - ret = OB_ERR_NULL_VALUE; - LOG_WARN("json base is null", K(ret)); - } else if (OB_FAIL(j_base->to_date(val))) { - LOG_WARN("wrapper to date failed.", K(ret), K(*j_base)); - ret = OB_OPERATE_OVERFLOW; - LOG_USER_ERROR(OB_OPERATE_OVERFLOW, "DATE", "json_value"); - } - - return ret; -} - -int JtFuncHelpler::cast_to_time(ObIJsonBase *j_base, - common::ObAccuracy &accuracy, - int64_t &val) -{ - INIT_SUCC(ret); - if (OB_ISNULL(j_base)) { - ret = OB_ERR_NULL_VALUE; - LOG_WARN("json base is null", K(ret)); - } else if (OB_FAIL(j_base->to_time(val))) { - LOG_WARN("wrapper to time failed.", K(ret), K(*j_base)); - ret = OB_OPERATE_OVERFLOW; - LOG_USER_ERROR(OB_OPERATE_OVERFLOW, "TIME", "json_value"); - } else if (OB_FAIL(time_scale_check(accuracy, val))) { - LOG_WARN("time_scale_check failed.", K(ret)); - } - - return ret; -} - -int JtFuncHelpler::cast_to_year(ObIJsonBase *j_base, uint8_t &val) -{ - INIT_SUCC(ret); - int64_t int_val; - const uint16 min_year = 1901; - const uint16 max_year = 2155; - - if (OB_ISNULL(j_base)) { - ret = OB_ERR_NULL_VALUE; - LOG_WARN("json base is null", K(ret)); - } else if (OB_FAIL(j_base->to_int(int_val))) { - LOG_WARN("wrapper to year failed.", K(ret), K(*j_base)); - } else if (lib::is_oracle_mode() - && (0 != int_val && (int_val < min_year || int_val > max_year))) { - // different with cast, if 0 < int val < 100, do not add base year - LOG_DEBUG("int out of year range", K(int_val)); - ret = OB_DATA_OUT_OF_RANGE; - } else if(OB_FAIL(ObTimeConverter::int_to_year(int_val, val))) { - LOG_WARN("int to year failed.", K(ret), K(int_val)); - } - - return ret; -} - -int JtFuncHelpler::cast_to_float(ObIJsonBase *j_base, ObObjType dst_type, float &val) -{ - INIT_SUCC(ret); - double tmp_val; - - if (OB_ISNULL(j_base)) { - ret = OB_ERR_NULL_VALUE; - LOG_WARN("json base is null", K(ret)); - } else if (OB_FAIL(j_base->to_double(tmp_val))) { - LOG_WARN("wrapper to date failed.", K(ret), K(*j_base)); - } else { - val = static_cast(tmp_val); - if (lib::is_mysql_mode() && OB_FAIL(real_range_check(dst_type, tmp_val, val))) { - LOG_WARN("real_range_check failed", K(ret), K(tmp_val)); - } - } - - return ret; -} - -int JtFuncHelpler::cast_to_double(ObIJsonBase *j_base, ObObjType dst_type, double &val) -{ - INIT_SUCC(ret); - - if (OB_ISNULL(j_base)) { - ret = OB_ERR_NULL_VALUE; - LOG_WARN("json base is null", K(ret)); - } else if (OB_FAIL(j_base->to_double(val))) { - LOG_WARN("wrapper to date failed.", K(ret), K(*j_base)); - } else if (ObUDoubleType == dst_type && OB_FAIL(numeric_negative_check(val))) { - LOG_WARN("numeric_negative_check failed", K(ret), K(val)); - } - - return ret; -} - -int JtFuncHelpler::cast_to_number(common::ObIAllocator *allocator, - ObIJsonBase *j_base, - common::ObAccuracy &accuracy, - ObObjType dst_type, - number::ObNumber &val) -{ - INIT_SUCC(ret); - - if (OB_ISNULL(j_base)) { - ret = OB_ERR_NULL_VALUE; - LOG_WARN("json base is null", K(ret)); - } else if (OB_FAIL(j_base->to_number(allocator, val))) { - LOG_WARN("fail to cast json as decimal", K(ret)); - } else if (ObUNumberType == dst_type && OB_FAIL(numeric_negative_check(val))) { - LOG_WARN("numeric_negative_check failed", K(ret), K(val)); - } else if (OB_FAIL(number_range_check(accuracy, allocator, val))) { - LOG_WARN("number_range_check failed", K(ret), K(val)); - } - - return ret; -} - -int JtFuncHelpler::cast_to_bit(ObIJsonBase *j_base, uint64_t &val, common::ObAccuracy &accuracy) -{ - INIT_SUCC(ret); - int64_t int_val; - if (OB_ISNULL(j_base)) { - ret = OB_ERR_NULL_VALUE; - LOG_WARN("json base is null", K(ret)); - } else if (OB_FAIL(j_base->to_int(int_val))) { - ret = OB_ERR_INVALID_JSON_VALUE_FOR_CAST; - LOG_WARN("fail get int from json", K(ret)); - } else { - val = static_cast(int_val); - if (OB_FAIL(bit_length_check(accuracy, val))) { - LOG_WARN("fail to check bit range", K(ret)); - } - } - - return ret; -} - -int JtFuncHelpler::bit_length_check(const ObAccuracy &accuracy, - uint64_t &value) -{ - int ret = OB_SUCCESS; - int32_t bit_len = 0; - int32_t dst_bit_len = accuracy.get_precision(); - if (OB_FAIL(ObJsonBaseUtil::get_bit_len(value, bit_len))) { - LOG_WARN("fail to get_bit_length", K(ret), K(value), K(bit_len)); - } else if(OB_UNLIKELY(bit_len <= 0)) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("bit length is negative", K(ret), K(value), K(bit_len)); - } else { - if (OB_UNLIKELY(bit_len > dst_bit_len)) { - ret = OB_ERR_DATA_TOO_LONG; - LOG_WARN("bit type length is too long", K(ret), K(bit_len), - K(dst_bit_len), K(value)); - } - } - return ret; -} - -int JtFuncHelpler::cast_to_json(common::ObIAllocator *allocator, ObIJsonBase *j_base, ObString &val) -{ - INIT_SUCC(ret); - - if (OB_ISNULL(j_base)) { - ret = OB_ERR_NULL_VALUE; - LOG_WARN("json base is null", K(ret)); - } else if (OB_FAIL(j_base->get_raw_binary(val, allocator))) { - LOG_WARN("failed to get raw binary", K(ret)); - } - - return ret; -} - -int JtFuncHelpler::cast_to_res(JtScanCtx* ctx, ObIJsonBase* js_val, JtColNode& col_node, bool enable_error = true) -{ - INIT_SUCC(ret); - ObJtColInfo& col_info = col_node.get_column_def(); - bool is_truncate = static_cast(col_info.truncate_); - - ObExpr* expr = ctx->spec_ptr_->column_exprs_.at(col_info.output_column_idx_); - ObDatum& res = expr->locate_datum_for_write(*ctx->eval_ctx_); - ctx->res_obj_ = &res; - ObObjType dst_type = expr->datum_meta_.type_; - - if (OB_FAIL(cast_json_to_res(ctx, js_val, col_node, res, enable_error))) { - LOG_WARN("fail to cast json to res", K(ret)); - } - LOG_DEBUG("finish cast_to_res.", K(ret), K(dst_type)); - - return ret; -} - -int JtFuncHelpler::cast_json_to_res(JtScanCtx* ctx, ObIJsonBase* js_val, JtColNode& col_node, ObDatum& res, bool enable_error) -{ - INIT_SUCC(ret); - ObJtColInfo& col_info = col_node.get_column_def(); - bool is_truncate = static_cast(col_info.truncate_); - - ObExpr* expr = ctx->spec_ptr_->column_exprs_.at(col_info.output_column_idx_); - - ObObjType dst_type = expr->datum_meta_.type_; - ObCollationType coll_type = expr->datum_meta_.cs_type_; - ObAccuracy accuracy = col_info.data_type_.get_accuracy(); - ObCollationType dst_coll_type = col_info.data_type_.get_collation_type(); - ObCollationType in_coll_type = ctx->is_charset_converted_ - ? CS_TYPE_UTF8MB4_BIN - : ctx->spec_ptr_->value_expr_->datum_meta_.cs_type_; - ObCollationLevel dst_coll_level = col_info.data_type_.get_collation_level(); - - if (OB_ISNULL(js_val)) { - res.set_null(); - } else { - switch (dst_type) { - case ObNullType : { - res.set_null(); - break; - } - case ObTinyIntType: - case ObSmallIntType: - case ObMediumIntType: - case ObInt32Type: - case ObIntType: { - int64_t val; - ret = cast_to_int(js_val, dst_type, val); - if (OB_FAIL(ret) && enable_error) { - int tmp_ret = set_error_val(ctx, col_node, ret); - if (tmp_ret != OB_SUCCESS) { - LOG_WARN("failed to set error val.", K(tmp_ret)); - } - } else { - if (dst_type == ObIntType) { - res.set_int(val); - } else { - res.set_int32(static_cast(val)); - } - } - break; - } - case ObUTinyIntType: - case ObUSmallIntType: - case ObUMediumIntType: - case ObUInt32Type: - case ObUInt64Type: { - uint64_t val; - ret = cast_to_uint(js_val, dst_type, val); - if (OB_FAIL(ret) && enable_error) { - int tmp_ret = set_error_val(ctx, col_node, ret); - if (tmp_ret != OB_SUCCESS) { - LOG_WARN("failed to set error val.", K(tmp_ret)); - } - } else { - if (dst_type == ObUInt64Type) { - res.set_uint(val); - } else { - res.set_uint32(static_cast(val)); - } - } - break; - } - case ObDateTimeType: { - const ObBasicSessionInfo *session = ctx->exec_ctx_->get_my_session(); - int64_t val; - ret = cast_to_datetime(&col_node, js_val, &ctx->row_alloc_, session, ctx->eval_ctx_, expr, accuracy, val); - if (ret == OB_ERR_NULL_VALUE) { - res.set_null(); - } else if (OB_FAIL(ret) && enable_error) { - int tmp_ret = set_error_val(ctx, col_node, ret); - if (tmp_ret != OB_SUCCESS) { - LOG_WARN("failed to set error val.", K(tmp_ret)); - } - } else { - res.set_datetime(val); - } - break; - } - case ObTimestampNanoType: - case ObTimestampTZType: - case ObTimestampLTZType: - case ObTimestampType: { - const ObBasicSessionInfo *session = ctx->exec_ctx_->get_my_session(); - ObOTimestampData val; - ret = cast_to_otimstamp(js_val, session, ctx->eval_ctx_, expr, accuracy, dst_type, val); - if (OB_FAIL(ret) && enable_error) { - int tmp_ret = set_error_val(ctx, col_node, ret); - if (tmp_ret != OB_SUCCESS) { - LOG_WARN("failed to set error val.", K(tmp_ret)); - } - } else { - if (dst_type == ObTimestampTZType) { - res.set_otimestamp_tz(val); - } else if (dst_type == ObTimestampType) { - res.set_datetime(val.time_us_); - } else { - res.set_otimestamp_tiny(val); - } - } - break; - } - case ObDateType: { - int32_t val; - ret = cast_to_date(js_val, val); - if (OB_FAIL(ret) && enable_error) { - int tmp_ret = set_error_val(ctx, col_node, ret); - if (tmp_ret != OB_SUCCESS) { - LOG_WARN("failed to set error val.", K(tmp_ret)); - } - } else { - res.set_date(val); - } - break; - } - case ObTimeType: { - int64_t val; - ret = cast_to_time(js_val, accuracy, val); - if (OB_FAIL(ret) && enable_error) { - int tmp_ret = set_error_val(ctx, col_node, ret); - if (tmp_ret != OB_SUCCESS) { - LOG_WARN("failed to set error val.", K(tmp_ret)); - } - } else { - res.set_time(val); - } - break; - } - case ObYearType: { - uint8_t val; - ret = cast_to_year(js_val, val); - if (OB_FAIL(ret) && enable_error) { - int tmp_ret = set_error_val(ctx, col_node, ret); - if (tmp_ret != OB_SUCCESS) { - LOG_WARN("failed to set error val.", K(tmp_ret)); - } - } else { - res.set_year(val); - } - break; - } - case ObNumberFloatType: - case ObFloatType: - case ObUFloatType: { - float out_val; - ret = cast_to_float(js_val, dst_type, out_val); - if (OB_FAIL(ret) && enable_error) { - int tmp_ret = set_error_val(ctx, col_node, ret); - if (tmp_ret != OB_SUCCESS) { - LOG_WARN("failed to set error val.", K(tmp_ret)); - } - } else { - res.set_float(out_val); - } - break; - } - case ObDoubleType: - case ObUDoubleType: { - double out_val; - ret = cast_to_double(js_val, dst_type, out_val); - if (OB_FAIL(ret) && enable_error) { - int tmp_ret = set_error_val(ctx, col_node, ret); - if (tmp_ret != OB_SUCCESS) { - LOG_WARN("failed to set error val.", K(tmp_ret)); - } - } else { - res.set_double(out_val); - } - break; - } - case ObUNumberType: - case ObNumberType: { - number::ObNumber out_val; - ret = cast_to_number(&ctx->row_alloc_, js_val, accuracy, dst_type, out_val); - if (OB_FAIL(ret) && enable_error) { - int tmp_ret = set_error_val(ctx, col_node, ret); - if (tmp_ret != OB_SUCCESS) { - LOG_WARN("failed to set error val.", K(tmp_ret)); - } - } else { - res.set_number(out_val); - } - break; - } - case ObVarcharType: - case ObRawType: - case ObNVarchar2Type: - case ObNCharType: - case ObCharType: - case ObTinyTextType: - case ObTextType : - case ObMediumTextType: - case ObHexStringType: - case ObLongTextType: { - ObString val; - bool is_quote = (col_info.col_type_ == COL_TYPE_QUERY && OB_NOT_NULL(js_val) && js_val->json_type() == ObJsonNodeType::J_STRING); - ret = cast_to_string(&col_node, &ctx->row_alloc_, js_val, in_coll_type, dst_coll_type, - accuracy, dst_type, val, is_truncate, is_quote, ctx->is_const_input_); - if (OB_FAIL(ret) && enable_error) { - int tmp_ret = set_error_val(ctx, col_node, ret); - if (tmp_ret != OB_SUCCESS) { - LOG_WARN("failed to set error val.", K(tmp_ret)); - } - } else { - res.set_string(val); - } - break; - } - case ObBitType: { - uint64_t out_val; - ret = cast_to_bit(js_val, out_val, accuracy); - if (OB_FAIL(ret) && enable_error) { - int tmp_ret = set_error_val(ctx, col_node, ret); - if (tmp_ret != OB_SUCCESS) { - LOG_WARN("failed to set error val.", K(tmp_ret)); - } - } else { - res.set_uint(out_val); - } - break; - } - case ObJsonType: { - ObString out_val; - ret = cast_to_json(&ctx->row_alloc_, js_val, out_val); - if (OB_FAIL(ret) && enable_error) { - int tmp_ret = set_error_val(ctx, col_node, ret); - if (tmp_ret != OB_SUCCESS) { - LOG_WARN("failed to set error val.", K(tmp_ret)); - } - } else { - char *buf = static_cast(ctx->row_alloc_.alloc(out_val.length())); - if (OB_UNLIKELY(buf == NULL)) { - ret = OB_ALLOCATE_MEMORY_FAILED; - LOG_WARN("fail to alloc memory for json array result", K(ret), K(out_val.length())); - } else { - MEMCPY(buf, out_val.ptr(), out_val.length()); - out_val.assign_ptr(buf, out_val.length()); - res.set_string(out_val); - } - } - break; - } - default: { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("unexpected dst_type", K(dst_type)); - break; - } - } - } - - if (OB_SUCC(ret) && is_lob_storage(dst_type) && !res.is_null()) { - ObString val = res.get_string(); - if (OB_FAIL(ObJsonExprHelper::pack_json_str_res(*expr, *ctx->eval_ctx_, res, val, &ctx->row_alloc_))) { - LOG_WARN("fail to pack res result.", K(ret)); - } - } - - return ret; -} - -int JtFuncHelpler::pre_default_value_check_mysql(JtScanCtx* ctx, - ObIJsonBase* js_val, - JtColNode& col_node) -{ - INIT_SUCC(ret); - ObJtColInfo& col_info = col_node.get_column_def(); - bool is_truncate = static_cast(col_info.truncate_); - - ObExpr* expr = ctx->spec_ptr_->column_exprs_.at(col_info.output_column_idx_); - - ObObjType dst_type = expr->datum_meta_.type_; - ObCollationType coll_type = expr->datum_meta_.cs_type_; - ObAccuracy accuracy = col_info.data_type_.get_accuracy(); - ObCollationType dst_coll_type = col_info.data_type_.get_collation_type(); - ObCollationType in_coll_type = ctx->is_charset_converted_ - ? CS_TYPE_UTF8MB4_BIN - : ctx->spec_ptr_->value_expr_->datum_meta_.cs_type_; - ObCollationLevel dst_coll_level = col_info.data_type_.get_collation_level(); - - if (OB_ISNULL(js_val) - || (js_val->json_type() == ObJsonNodeType::J_NULL)) { - } else { - switch (dst_type) { - case ObNullType : { - break; - } - case ObTinyIntType: - case ObSmallIntType: - case ObMediumIntType: - case ObInt32Type: - case ObIntType: { - int64_t val; - ret = cast_to_int(js_val, dst_type, val); - break; - } - case ObUTinyIntType: - case ObUSmallIntType: - case ObUMediumIntType: - case ObUInt32Type: - case ObUInt64Type: { - uint64_t val; - ret = cast_to_uint(js_val, dst_type, val); - break; - } - case ObDateTimeType: { - const ObBasicSessionInfo *session = ctx->exec_ctx_->get_my_session(); - int64_t val; - ret = cast_to_datetime(&col_node, js_val, &ctx->row_alloc_, session, ctx->eval_ctx_, expr, accuracy, val); - break; - } - case ObTimestampNanoType: - case ObTimestampTZType: - case ObTimestampLTZType: - case ObTimestampType: { - const ObBasicSessionInfo *session = ctx->exec_ctx_->get_my_session(); - ObOTimestampData val; - ret = cast_to_otimstamp(js_val, session, ctx->eval_ctx_, expr, accuracy, dst_type, val); - break; - } - case ObDateType: { - int32_t val; - ret = cast_to_date(js_val, val); - break; - } - case ObTimeType: { - int64_t val; - ret = cast_to_time(js_val, accuracy, val); - break; - } - case ObYearType: { - uint8_t val; - ret = cast_to_year(js_val, val); - break; - } - case ObNumberFloatType: - case ObFloatType: - case ObUFloatType: { - float out_val; - ret = cast_to_float(js_val, dst_type, out_val); - break; - } - case ObDoubleType: - case ObUDoubleType: { - double out_val; - ret = cast_to_double(js_val, dst_type, out_val); - break; - } - case ObUNumberType: - case ObNumberType: { - number::ObNumber out_val; - ret = cast_to_number(&ctx->row_alloc_, js_val, accuracy, dst_type, out_val); - break; - } - case ObVarcharType: - case ObRawType: - case ObNVarchar2Type: - case ObNCharType: - case ObCharType: - case ObTinyTextType: - case ObTextType : - case ObMediumTextType: - case ObHexStringType: - case ObLongTextType: { - ObString val; - bool is_quote = (col_info.col_type_ == COL_TYPE_QUERY && js_val->json_type() == ObJsonNodeType::J_STRING); - ret = cast_to_string(&col_node, &ctx->row_alloc_, js_val, in_coll_type, dst_coll_type, - accuracy, dst_type, val, is_truncate, is_quote, ctx->is_const_input_); - break; - } - case ObBitType: { - uint64_t out_val; - ret = cast_to_bit(js_val, out_val, accuracy); - break; - } - case ObJsonType: { - ObString out_val; - ret = cast_to_json(&ctx->row_alloc_, js_val, out_val); - break; - } - default: { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("unexpected dst_type", K(dst_type)); - break; - } - } - } - return ret; -} - -int JtFuncHelpler::set_error_val(JtScanCtx* ctx, JtColNode& col_node, int& ret) -{ - INIT_SUCC(tmp_ret); - if (ret == OB_SUCCESS) { - } else if (lib::is_mysql_mode()) { - if (OB_FAIL(set_error_val_mysql(ctx, col_node, ret))) { - LOG_WARN("fail to resolve error val in mysql mode", K(ret)); - } - } else { - const ObJtColInfo& info = col_node.col_info_; - JtColType col_type = col_node.type(); - ObExpr* expr = ctx->spec_ptr_->column_exprs_.at(col_node.col_info_.output_column_idx_); - if (col_type == COL_TYPE_VALUE) { - if (info.on_error_ == JSN_VALUE_ERROR || (info.on_error_ == JSN_VALUE_IMPLICIT && info.on_empty_ == JSN_VALUE_ERROR)) { - EVAL_COVER_CODE(ctx, ret) ; - if (OB_SUCC(ret) && ctx->is_need_end_) { - ret = OB_ITER_END; - } - } else if (info.on_error_ == JSN_VALUE_DEFAULT) { - ObExpr* default_expr = ctx->spec_ptr_->err_default_exprs_.at(col_node.col_info_.error_expr_id_); - ObDatum* err_datum = nullptr; - col_node.is_null_result_ = false; - tmp_ret = default_expr->eval(*ctx->eval_ctx_, err_datum); - if (tmp_ret != OB_SUCCESS) { - LOG_WARN("failed do cast to returning type.", K(tmp_ret)); - } else { - ObBasicSessionInfo *session = ctx->exec_ctx_->get_my_session(); - const ObDatum& datum = *err_datum; - const ObString in_str = ob_is_string_type(default_expr->datum_meta_.type_) ? datum.get_string() : ObString(); - - if (OB_SUCCESS != (tmp_ret = col_node.check_default_cast_allowed(default_expr))) { - ret = tmp_ret; - LOG_WARN("check default value can't cast return type", K(tmp_ret), K(default_expr->datum_meta_)); - } else if (OB_FAIL(ObJsonExprHelper::pre_default_value_check(col_node.col_info_.data_type_.get_obj_type(), - in_str, - default_expr->datum_meta_.type_))) { - LOG_WARN("default value pre check fail", K(ret)); - } else if (ObJsonExprHelper::is_convertible_to_json(default_expr->datum_meta_.type_)) { - if (OB_SUCCESS != (tmp_ret = ObJsonExprHelper::transform_convertible_2jsonBase(datum, - default_expr->datum_meta_.type_, - &ctx->row_alloc_, - default_expr->datum_meta_.cs_type_, - col_node.err_val_, false, - default_expr->obj_meta_.has_lob_header()))) { - LOG_WARN("failed: parse value to jsonBase", K(tmp_ret)); - } - } else if (OB_SUCCESS != (tmp_ret = ObJsonExprHelper::transform_scalar_2jsonBase(datum, - default_expr->datum_meta_.type_, - &ctx->row_alloc_, - default_expr->datum_meta_.scale_, - session->get_timezone_info(), - session, - col_node.err_val_, false))) { - LOG_WARN("failed do cast to returning type.", K(tmp_ret)); - } - } - if (tmp_ret == OB_SUCCESS) { - if (OB_FAIL(JtFuncHelpler::cast_to_res(ctx, col_node.err_val_, col_node, false))) { - LOG_WARN("failed do cast defaut value to returning type.", K(ret)); - } else { - ret = OB_SUCCESS; - } - } - } else if (info.on_error_ == JSN_VALUE_NULL - || info.on_error_ == JSN_VALUE_IMPLICIT - || info.on_empty_ == JSN_VALUE_NULL - || info.on_empty_ == JSN_VALUE_IMPLICIT) { - col_node.is_null_result_ = true; - ret = ctx->is_need_end_ ? OB_ITER_END : OB_SUCCESS; - } - } else if (col_type == COL_TYPE_QUERY) { - if (info.on_error_ == JSN_QUERY_EMPTY || info.on_error_ == JSN_QUERY_EMPTY_ARRAY) { - col_node.curr_ = ctx->jt_op_->get_js_array(); - col_node.is_null_result_ = false; - ret = ctx->is_need_end_ ? OB_ITER_END : OB_SUCCESS; - } else if (info.on_error_ == JSN_QUERY_EMPTY_OBJECT) { - col_node.curr_ = ctx->jt_op_->get_js_object(); - col_node.is_null_result_ = false; - ret = ctx->is_need_end_ ? OB_ITER_END : OB_SUCCESS; - } else if (info.on_error_ == JSN_QUERY_NULL || info.on_error_ == JSN_QUERY_IMPLICIT) { - if (info.on_mismatch_ == JSN_QUERY_MISMATCH_ERROR) { - ret = ctx->error_code_; - } else { - col_node.is_null_result_ = true; - ret = ctx->is_need_end_ ? OB_ITER_END : OB_SUCCESS; - } - } - } else if (col_type == COL_TYPE_EXISTS) { - int is_true = 0; - if (info.on_error_ == JSN_EXIST_ERROR) { - ret = ctx->error_code_; - if (OB_SUCC(ret) && ctx->is_need_end_) { - ret = OB_ITER_END; - } - } else if (info.on_error_ == JSN_EXIST_DEFAULT || info.on_error_ == JSN_EXIST_FALSE) { - col_node.is_null_result_ = false; - ret = ctx->is_need_end_ ? OB_ITER_END : OB_SUCCESS; - } else if (info.on_error_ == JSN_EXIST_TRUE) { - is_true = 0; - col_node.is_null_result_ = false; - ret = ctx->is_need_end_ ? OB_ITER_END : OB_SUCCESS; - } - - if (OB_FAIL(ret)) { - } else if (ob_is_string_type(info.data_type_.get_obj_type())) { - ObString value = is_true ? ObString("true") : ObString("false"); - void* buf = ctx->row_alloc_.alloc(sizeof(ObJsonString)); - if (OB_ISNULL(buf)) { - ret = OB_ALLOCATE_MEMORY_FAILED; - } else { - col_node.curr_ = static_cast(new(buf)ObJsonString(value.ptr(), value.length())); - col_node.is_null_result_ = false; - } - } else if (ob_is_number_tc(info.data_type_.get_obj_type())) { - void* buf = ctx->row_alloc_.alloc(sizeof(ObJsonInt)); - if (OB_ISNULL(buf)) { - ret = OB_ALLOCATE_MEMORY_FAILED; - LOG_WARN("buf allocate failed", K(ret)); - } else { - col_node.curr_ = static_cast(new(buf)ObJsonInt(is_true)); - col_node.is_null_result_ = false; - } - } else { - if (col_node.col_info_.on_error_ != JSN_EXIST_ERROR) { - col_node.curr_ = nullptr; - col_node.is_null_result_ = true; - } else { - ret = OB_ERR_NON_NUMERIC_CHARACTER_VALUE; - } - } - - if (OB_SUCC(ret) - && !col_node.is_null_result_ - && OB_FAIL(JtFuncHelpler::cast_to_res(ctx, col_node.curr_, col_node, false))) { - LOG_WARN("failed do cast defaut value to returning type.", K(ret)); - } - } - - if (OB_SUCC(ret) && col_node.is_null_result_) { - expr->locate_datum_for_write(*ctx->eval_ctx_).set_null(); - } - } - return ret; -} - -int JtFuncHelpler::set_error_val_mysql(JtScanCtx* ctx, JtColNode& col_node, int& ret) -{ - const ObJtColInfo& info = col_node.col_info_; - if (ret == OB_SUCCESS) { - } else if (OB_ISNULL(ctx)) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("ctx can not be null", K(ret)); - } else if (OB_ISNULL(ctx->spec_ptr_)) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("spec ptr can not be null in ctx", K(ret)); - } else { - JtColType col_type = col_node.type(); - ObExpr* expr = ctx->spec_ptr_->column_exprs_.at(col_node.col_info_.output_column_idx_); - if (col_type == COL_TYPE_VALUE || col_type == COL_TYPE_QUERY) { - if (info.on_error_ == JSN_VALUE_ERROR || (info.on_error_ == JSN_VALUE_IMPLICIT && info.on_empty_ == JSN_VALUE_ERROR)) { - EVAL_COVER_CODE(ctx, ret) ; - if (OB_SUCC(ret) && ctx->is_need_end_) { - ret = OB_ITER_END; - } - } else if (info.on_error_ == JSN_VALUE_DEFAULT) { - ObExpr* expr = ctx->spec_ptr_->column_exprs_.at(info.output_column_idx_); - ObObjType dst_type = expr->datum_meta_.type_; - ObExpr* default_expr = ctx->spec_ptr_->err_default_exprs_.at(col_node.col_info_.error_expr_id_); - if (OB_FAIL(col_node.get_default_value_pre_mysql(default_expr, ctx, col_node.err_val_, dst_type))) { - LOG_WARN("fail to process empty default value", K(ret), K(dst_type)); - } else if (OB_FAIL(JtFuncHelpler::cast_to_res(ctx, col_node.err_val_, col_node, false))) { - LOG_WARN("failed do cast to returning type.", K(ret)); - } else { - col_node.iter_ = col_node.curr_ = col_node.emp_val_; - } - ret = OB_SUCCESS; - } else if (info.on_error_ == JSN_VALUE_NULL - || info.on_error_ == JSN_VALUE_IMPLICIT - || info.on_empty_ == JSN_VALUE_NULL - || info.on_empty_ == JSN_VALUE_IMPLICIT) { - col_node.is_null_result_ = true; - ret = ctx->is_need_end_ ? OB_ITER_END : OB_SUCCESS; - } - } else if (col_type == COL_TYPE_EXISTS) { - int is_true = 0; - if (info.on_error_ == JSN_EXIST_ERROR) { - ret = ctx->error_code_; - if (OB_SUCC(ret) && ctx->is_need_end_) { - ret = OB_ITER_END; - } - } else if (info.on_error_ == JSN_EXIST_DEFAULT || info.on_error_ == JSN_EXIST_FALSE) { - col_node.is_null_result_ = false; - ret = ctx->is_need_end_ ? OB_ITER_END : OB_SUCCESS; - } else if (info.on_error_ == JSN_EXIST_TRUE) { - is_true = 0; - col_node.is_null_result_ = false; - ret = ctx->is_need_end_ ? OB_ITER_END : OB_SUCCESS; - } - - if (OB_FAIL(ret)) { - } else if (ob_is_string_type(info.data_type_.get_obj_type())) { - ObString value = is_true ? ObString("1") : ObString("0"); - void* buf = ctx->row_alloc_.alloc(sizeof(ObJsonString)); - if (OB_ISNULL(buf)) { - ret = OB_ALLOCATE_MEMORY_FAILED; - LOG_WARN("buf allocate failed", K(ret)); - } else { - col_node.curr_ = static_cast(new(buf)ObJsonString(value.ptr(), value.length())); - col_node.is_null_result_ = false; - } - } else { - void* buf = ctx->row_alloc_.alloc(sizeof(ObJsonInt)); - if (OB_ISNULL(buf)) { - ret = OB_ALLOCATE_MEMORY_FAILED; - LOG_WARN("buf allocate failed", K(ret)); - } else { - col_node.curr_ = static_cast(new(buf)ObJsonInt(is_true)); - col_node.is_null_result_ = false; - } - } - - if (OB_SUCC(ret) - && !col_node.is_null_result_ - && OB_FAIL(JtFuncHelpler::cast_to_res(ctx, col_node.curr_, col_node, false))) { - LOG_WARN("failed do cast defaut value to returning type.", K(ret)); - } - } - - if (OB_SUCC(ret) && col_node.is_null_result_) { - expr->locate_datum_for_write(*ctx->eval_ctx_).set_null(); - } - } - return ret; -} - -int JtFuncHelpler::check_default_val_accuracy(const ObAccuracy &accuracy, - const ObObjType &type, - const ObDatum *obj) -{ - INIT_SUCC(ret); - ObObjTypeClass tc = ob_obj_type_class(type); - - switch (tc) { - case ObNumberTC: { - number::ObNumber temp(obj->get_number()); - ret = number_range_check(accuracy, NULL, temp, true); - LOG_WARN("number range is invalid for json_value", K(ret)); - break; - } - case ObDateTC: { - int32_t val = obj->get_date(); - if (val == ObTimeConverter::ZERO_DATE) { - // check zero date for scale over mode - ret = OB_INVALID_DATE_VALUE; - LOG_WARN("Zero date is invalid for json_value", K(ret)); - } - break; - } - case ObTimeTC: { - int64_t val = obj->get_time(); - ret = time_scale_check(accuracy, val, true); - break; - } - case ObStringTC : - case ObTextTC : { - ObString val = obj->get_string(); - const int32_t str_len_char = static_cast(ObCharset::strlen_char(CS_TYPE_UTF8MB4_BIN, - val.ptr(), val.length())); - const ObLength max_accuracy_len = (lib::is_oracle_mode() && tc == ObTextTC) ? OB_MAX_LONGTEXT_LENGTH : accuracy.get_length(); - if (OB_SUCC(ret)) { - if (max_accuracy_len == DEFAULT_STR_LENGTH) { // default string len - } else if (max_accuracy_len <= 0 || str_len_char > max_accuracy_len) { - if (lib::is_mysql_mode()) { - ret = OB_OPERATE_OVERFLOW; - LOG_USER_ERROR(OB_OPERATE_OVERFLOW, "STRING", "json_value"); - } else { - ret = OB_ERR_VALUE_EXCEEDED_MAX; - LOG_USER_ERROR(OB_ERR_VALUE_EXCEEDED_MAX, str_len_char, max_accuracy_len); - } - } - } - break; - } - default: - break; - } - - return ret; -} - - -int JtFuncHelpler::check_default_value_inner(JtScanCtx* ctx, - ObJtColInfo &col_info, - ObExpr* col_expr, - ObExpr* default_expr) -{ - INIT_SUCC(ret); - - ObString in_str; - ObDatum *emp_datum = nullptr; - - if (OB_FAIL(default_expr->eval(*ctx->eval_ctx_, emp_datum))) { - LOG_WARN("failed do cast to returning type.", K(ret)); - } else { - in_str.assign_ptr(emp_datum->ptr_, emp_datum->len_); - } - if (OB_FAIL(ret)) { - } else if (default_expr->datum_meta_.type_ == ObNullType && ob_is_string_type(col_info.data_type_.get_obj_type())) { - ret = OB_ERR_DEFAULT_VALUE_NOT_LITERAL; - LOG_WARN("default value not match returing type", K(ret)); - } else if ((lib::is_oracle_mode() && OB_FAIL(ObJsonExprHelper::pre_default_value_check(col_expr->datum_meta_.type_, in_str, default_expr->datum_meta_.type_))) - || (lib::is_mysql_mode() && !ob_is_string_tc(default_expr->datum_meta_.type_))) { - LOG_WARN("default value pre check fail", K(ret), K(in_str)); - } else if (ob_obj_type_class(col_expr->datum_meta_.type_) == ob_obj_type_class(default_expr->datum_meta_.type_) - && OB_FAIL(JtFuncHelpler::check_default_val_accuracy(col_info.data_type_.get_accuracy(), default_expr->datum_meta_.type_, emp_datum))) { - LOG_WARN("fail to check accuracy", K(ret)); - } - - return ret; -} - -int JtFuncHelpler::check_default_value(JtScanCtx* ctx, - ObJtColInfo &col_info, - ObExpr* expr) -{ - INIT_SUCC(ret); - if (static_cast(col_info.col_type_) == COL_TYPE_VALUE) { - if (col_info.on_empty_ == JSN_VALUE_DEFAULT) { - ObExpr* default_expr = ctx->spec_ptr_->emp_default_exprs_.at(col_info.empty_expr_id_); - if (OB_FAIL(check_default_value_inner(ctx, col_info, expr, default_expr))) { - LOG_WARN("fail to check empty default value", K(ret)); - } - } - - if (OB_SUCC(ret) && col_info.on_error_ == JSN_VALUE_DEFAULT) { - ObExpr* default_expr = ctx->spec_ptr_->err_default_exprs_.at(col_info.error_expr_id_); - if (OB_FAIL(check_default_value_inner(ctx, col_info, expr, default_expr))) { - LOG_WARN("fail to check error default value", K(ret)); - } - } - } - - return ret; -} - -int JtColNode::open() -{ - INIT_SUCC(ret); - - cur_pos_ = 0; - is_evaled_ = false; - curr_ = nullptr; - iter_ = nullptr; - is_sub_evaled_ = false; - ord_val_ = 0; - total_ = 0; - is_null_result_ = false; - - if (node_type_ == REG_TYPE) { - total_ = 1; - } - - return ret; -} - -int JtJoinNode::open() -{ - INIT_SUCC(ret); - if (OB_FAIL(JtColNode::open())) { - LOG_WARN("fail to open column node.", K(ret)); - } else if (left_ && OB_FAIL(left_->open())) { - LOG_WARN("fail to open left node.", K(ret)); - } else if (right_ && OB_FAIL(right_->open())) { - LOG_WARN("fail to open right node.", K(ret)); - } - return ret; -} - - -int JtScanNode::assign(const JtScanNode& other) -{ - INIT_SUCC(ret); - - if (OB_FAIL(reg_col_defs_.assign(other.reg_col_defs_))) { - LOG_WARN("fail to assign col defs.", K(ret), K(other.reg_col_defs_.count())); - } else if (OB_FAIL(child_idx_.assign(other.child_idx_))) { - LOG_WARN("fail to assign child idx defs.", K(ret), K(other.child_idx_.count())); - } else { - col_info_ = other.col_info_; - nest_col_def_ = other.nest_col_def_; - is_regular_done_ = false; - is_nested_done_ = false; - } - return ret; -} - -int JtScanNode::open() -{ - INIT_SUCC(ret); - if (OB_FAIL(JtColNode::open())) { - LOG_WARN("fail to open column node.", K(ret)); - } else { - for (int i = 0; OB_SUCC(ret) && i < reg_col_defs_.count(); ++i) { - JtColNode* node = reg_col_defs_.at(i); - if (OB_FAIL(node->open())) { - LOG_WARN("fail to open reg node.", K(ret)); - } - } - - if (OB_FAIL(ret)) { - } else if (reg_col_defs_.count() == 0) { - is_regular_done_ = true; - } - - if (OB_FAIL(ret)) { - } else if (nest_col_def_ && OB_FAIL(nest_col_def_->open())) { - LOG_WARN("fail to open nest def node.", K(ret)); - } - } - return ret; -} - -void JtColNode::destroy() -{ - // do nothing -} - -void JtJoinNode::destroy() -{ - if (OB_NOT_NULL(left_)) { - left_->destroy(); - } - - if (OB_NOT_NULL(right_)) { - right_->destroy(); - } -} - -void JtScanNode::destroy() -{ - for (size_t i = 0; i < reg_col_defs_.count(); ++i) { - reg_col_defs_.at(i)->destroy(); - } - - reg_col_defs_.reset(); - child_idx_.reset(); - - if (OB_NOT_NULL(nest_col_def_)) { - nest_col_def_->destroy(); - } -} - -int JtColNode::check_default_cast_allowed(ObExpr* expr) -{ - INIT_SUCC(ret); - if ((ob_is_string_type(col_info_.data_type_.get_obj_type()) && ob_is_number_tc(expr->datum_meta_.type_)) - || (col_info_.data_type_.get_obj_type() == ObJsonType && ob_is_number_tc(expr->datum_meta_.type_))) { - ret = OB_ERR_DEFAULT_VALUE_NOT_MATCH; - } - return ret; -} - -int JtColNode::check_col_res_type(JtScanCtx* ctx) -{ - INIT_SUCC(ret); - ObObjType obj_type = col_info_.data_type_.get_obj_type(); - JtColType col_type = type(); - if (col_type == COL_TYPE_EXISTS) { - if (ob_is_string_type(obj_type) - || ob_is_numeric_type(obj_type) - || ob_is_integer_type(obj_type)) { - // do nothing - } else { - if (ob_is_json_tc(obj_type)) { - ret = OB_ERR_USAGE_KEYWORD; - LOG_WARN("invalid usage of keyword EXISTS", K(ret)); - } else { - ret = OB_ERR_NON_NUMERIC_CHARACTER_VALUE; - SET_COVER_ERROR(ctx, ret); - } - curr_ = nullptr; - is_null_result_ = true; - } - } else if (col_type == COL_TYPE_QUERY ) { - // do nothing - } - - return ret; -} - -void JtColNode::proc_query_on_error(JtScanCtx* ctx, int& ret, bool& is_null) -{ - ret = OB_SUCCESS; - if (col_info_.on_error_ == JSN_QUERY_ERROR) { - is_null = true; - iter_ = curr_ = NULL; - LOG_WARN("result can't be returned without array wrapper", K(ret)); - } else if (col_info_.on_error_ == JSN_QUERY_EMPTY || col_info_.on_error_ == JSN_QUERY_EMPTY_ARRAY) { - iter_ = curr_ = ctx->jt_op_->get_js_array(); - is_null = false; - } else if (col_info_.on_error_ == JSN_QUERY_EMPTY_OBJECT) { - iter_ = curr_ = ctx->jt_op_->get_js_object(); - is_null = false; - } else if (col_info_.on_error_ == JSN_QUERY_NULL || col_info_.on_error_ == JSN_QUERY_IMPLICIT) { - iter_ = curr_ = NULL; - is_null = true; - } -} - -int JtColNode::set_val_on_empty(JtScanCtx* ctx, bool& need_cast_res) -{ - INIT_SUCC(ret); - JtColType col_type = type(); - - if (lib::is_mysql_mode()) { - if (OB_FAIL(set_val_on_empty_mysql(ctx, need_cast_res))) { - LOG_WARN("fail to eval mysql empty clause", K(ret)); - } - } else if (col_type == COL_TYPE_QUERY) { - switch (col_info_.on_empty_) { - case JSN_QUERY_ERROR: { - ret = OB_ERR_JSON_VALUE_NO_VALUE; - if (col_info_.on_empty_ == JSN_QUERY_ERROR) { - ctx->is_cover_error_ = 0; - } - LOG_WARN("json value seek result empty."); - break; - } - case JSN_QUERY_IMPLICIT: - case JSN_QUERY_NULL: { - iter_ = curr_ = nullptr; - is_null_result_ = true; - ret = OB_SUCCESS; - - if (col_info_.on_empty_ == JSN_QUERY_IMPLICIT) { - proc_query_on_error(ctx, ret, is_null_result_); - if (col_info_.on_error_ == JSN_QUERY_ERROR) { - ret = OB_ERR_JSON_VALUE_NO_VALUE; - } - } - break; - } - case JSN_QUERY_EMPTY: { - iter_ = curr_ = ctx->jt_op_->get_js_array(); - is_null_result_ = false; - break; - } - case JSN_QUERY_EMPTY_ARRAY: { - iter_ = curr_ = ctx->jt_op_->get_js_array(); - is_null_result_ = false; - break; - } - case JSN_QUERY_EMPTY_OBJECT: { - iter_ = curr_ = ctx->jt_op_->get_js_object(); - is_null_result_ = false; - break; - } - default: // error_type from get_on_empty_or_error has done range check, do nothing for default - break; - } - } else if (col_type == COL_TYPE_VALUE) { - switch (col_info_.on_empty_) { - case JSN_VALUE_ERROR: { - ret = OB_ERR_JSON_VALUE_NO_VALUE; - if (col_info_.on_empty_ == JSN_VALUE_ERROR) { - SET_COVER_ERROR(ctx, ret); - } - break; - } - case JSN_VALUE_IMPLICIT: { - if (col_info_.on_error_ == JSN_VALUE_ERROR) { - ret = OB_ERR_JSON_VALUE_NO_VALUE; - SET_COVER_ERROR(ctx, ret); - } else if (col_info_.on_error_ == JSN_VALUE_DEFAULT) { - ObExpr* default_expr = ctx->spec_ptr_->err_default_exprs_.at(col_info_.error_expr_id_); - ObDatum* err_datum = nullptr; - ret = default_expr->eval(*ctx->eval_ctx_, err_datum); - if (OB_FAIL(ret)) { - LOG_WARN("failed eval datum type.", K(ret)); - } else { - ObBasicSessionInfo *session = ctx->exec_ctx_->get_my_session(); - const ObDatum& datum = *err_datum; - const ObString in_str = ob_is_string_type(default_expr->datum_meta_.type_) ? datum.get_string() : ObString(); - - if (OB_FAIL(check_default_cast_allowed(default_expr))) { - LOG_WARN("check default value can't cast return type", K(ret), K(default_expr->datum_meta_)); - } else if (OB_FAIL(ObJsonExprHelper::pre_default_value_check(col_info_.data_type_.get_obj_type(), - in_str, - default_expr->datum_meta_.type_))) { - LOG_WARN("default value pre check fail", K(ret)); - } else if (ObJsonExprHelper::is_convertible_to_json(default_expr->datum_meta_.type_)) { - if (OB_FAIL(ObJsonExprHelper::transform_convertible_2jsonBase(datum, - default_expr->datum_meta_.type_, - &ctx->row_alloc_, - default_expr->datum_meta_.cs_type_, - err_val_, false, - default_expr->obj_meta_.has_lob_header()))) { - LOG_WARN("failed: parse value to jsonBase", K(ret)); - } else { - curr_ = iter_ = err_val_; - } - } else if (OB_FAIL(ObJsonExprHelper::transform_scalar_2jsonBase(datum, - default_expr->datum_meta_.type_, - &ctx->row_alloc_, - default_expr->datum_meta_.scale_, - session->get_timezone_info(), - session, - err_val_, false))) { - LOG_WARN("failed do cast to returning type.", K(ret)); - } else { - curr_ = iter_ = err_val_; - } - } - } else { - curr_ = nullptr; - is_null_result_ = true; - } - break; - } - case JSN_VALUE_NULL: { - curr_ = nullptr; - is_null_result_ = true; - ret = OB_SUCCESS; - break; - } - case JSN_VALUE_DEFAULT: { - ObExpr* default_expr = ctx->spec_ptr_->emp_default_exprs_.at(col_info_.empty_expr_id_); - ObDatum* emp_datum = nullptr; - if (OB_FAIL(default_expr->eval(*ctx->eval_ctx_, emp_datum))) { - LOG_WARN("failed do cast to returning type.", K(ret)); - } else { - ObBasicSessionInfo *session = ctx->exec_ctx_->get_my_session(); - ObIJsonBase* tmp_node = nullptr; - const ObDatum& datum = *emp_datum; - - if (OB_FAIL(check_default_cast_allowed(default_expr))) { - LOG_WARN("check default value can't cast return type", K(ret), K(default_expr->datum_meta_)); - } else if (ObJsonExprHelper::is_convertible_to_json(default_expr->datum_meta_.type_)) { - if (OB_FAIL(ObJsonExprHelper::transform_convertible_2jsonBase(datum, - default_expr->datum_meta_.type_, - &ctx->row_alloc_, - default_expr->datum_meta_.cs_type_, - emp_val_, false, - default_expr->obj_meta_.has_lob_header()))) { - LOG_WARN("failed: parse value to jsonBase", K(ret)); - } - } else if (OB_FAIL(ObJsonExprHelper::transform_scalar_2jsonBase(datum, - default_expr->datum_meta_.type_, - &ctx->row_alloc_, - default_expr->datum_meta_.scale_, - session->get_timezone_info(), - session, emp_val_, false))) { - LOG_WARN("failed do cast to returning type.", K(ret)); - } else { - iter_ = emp_val_; - curr_ = emp_val_; - } - } - - if (OB_SUCC(ret)) { - if (OB_FAIL(JtFuncHelpler::cast_to_res(ctx, emp_val_, *this, false))) { - LOG_WARN("failed do cast to returning type.", K(ret)); - } - need_cast_res = false; - } - break; - } - default: // error_type from get_on_empty_or_error has done range check, do nothing for default - break; - } - } else if (col_type == COL_TYPE_EXISTS) { - switch (col_info_.on_empty_) { - case JSN_EXIST_FALSE: - case JSN_EXIST_TRUE: - case JSN_EXIST_ERROR: - case JSN_EXIST_DEFAULT: { - if (ob_is_string_type(col_info_.data_type_.get_obj_type())) { - ObString value = "false"; - void* buf = ctx->row_alloc_.alloc(sizeof(ObJsonString)); - if (OB_ISNULL(buf)) { - ret = OB_ALLOCATE_MEMORY_FAILED; - } else { - iter_ = curr_ = static_cast(new(buf)ObJsonString(value.ptr(), value.length())); - is_null_result_ = false; - } - } else { - void* buf = ctx->row_alloc_.alloc(sizeof(ObJsonInt)); - if (OB_ISNULL(buf)) { - ret = OB_ALLOCATE_MEMORY_FAILED; - LOG_WARN("buf allocate failed", K(ret)); - } else { - iter_ = curr_ = static_cast(new(buf)ObJsonInt(0)); - is_null_result_ = false; - } - } - break; - } - default: - break; - } - } - return ret; -} - -int JtColNode::set_val_on_empty_mysql(JtScanCtx* ctx, bool& need_cast_res) -{ - INIT_SUCC(ret); - JtColType col_type = type(); - ctx->is_cover_error_ = false; // error can not cover null value - if (col_type == COL_TYPE_QUERY || col_type == COL_TYPE_VALUE) { - switch (col_info_.on_empty_) { - case JSN_VALUE_ERROR: { - ret = OB_ERR_MISSING_JSON_VALUE; - LOG_USER_ERROR(OB_ERR_MISSING_JSON_VALUE, "json_table"); - break; - } - case JSN_VALUE_IMPLICIT: - case JSN_VALUE_NULL: { - curr_ = nullptr; - is_null_result_ = true; - ret = OB_SUCCESS; - break; - } - case JSN_VALUE_DEFAULT: { - need_cast_res = false; - ObExpr* expr = ctx->spec_ptr_->column_exprs_.at(col_info_.output_column_idx_); - ObObjType dst_type = expr->datum_meta_.type_; - ObExpr* default_expr = ctx->spec_ptr_->emp_default_exprs_.at(col_info_.empty_expr_id_); - if (OB_FAIL(get_default_value_pre_mysql(default_expr, ctx, emp_val_, dst_type))) { - LOG_WARN("fail to process empty default value", K(ret), K(dst_type)); - } else if (OB_FAIL(JtFuncHelpler::cast_to_res(ctx, emp_val_, *this, false))) { - LOG_WARN("failed do cast to returning type.", K(ret)); - } else { - iter_ = curr_ = emp_val_; - } - break; - } - default: // error_type from get_on_empty_or_error has done range check, do nothing for default - break; - } - } else if (col_type == COL_TYPE_EXISTS) { - switch (col_info_.on_empty_) { - case JSN_EXIST_FALSE: - case JSN_EXIST_TRUE: - case JSN_EXIST_ERROR: - case JSN_EXIST_DEFAULT: { - if (ob_is_string_type(col_info_.data_type_.get_obj_type())) { - ObString value = "0"; - void* buf = ctx->row_alloc_.alloc(sizeof(ObJsonString)); - if (OB_ISNULL(buf)) { - ret = OB_ALLOCATE_MEMORY_FAILED; - } else { - iter_ = curr_ = static_cast(new(buf)ObJsonString(value.ptr(), value.length())); - is_null_result_ = false; - } - } else { - void* buf = ctx->row_alloc_.alloc(sizeof(ObJsonInt)); - if (OB_ISNULL(buf)) { - ret = OB_ALLOCATE_MEMORY_FAILED; - LOG_WARN("buf allocate failed", K(ret)); - } else { - iter_ = curr_ = static_cast(new(buf)ObJsonInt(0)); - is_null_result_ = false; - } - } - break; - } - default: - break; - } - } - return ret; -} - -int JtColNode::get_default_value_pre_mysql(ObExpr* default_expr, - JtScanCtx* ctx, - ObIJsonBase *&res, - ObObjType &dst_type) -{ - INIT_SUCC(ret); - ObDatum* tmp_datum = nullptr; - if (OB_FAIL(default_expr->eval(*ctx->eval_ctx_, tmp_datum))) { - LOG_WARN("failed do cast to returning type.", K(ret)); - } else { - ObBasicSessionInfo *session = ctx->exec_ctx_->get_my_session(); - ObIJsonBase* tmp_node = nullptr; - ObObjType val_type = default_expr->datum_meta_.type_; - ObCollationType cs_type = default_expr->datum_meta_.cs_type_; - // const ObDatum& datum = *tmp_datum; - ObDatum converted_datum; - converted_datum.set_datum(*tmp_datum); - // convert string charset if needed - if (ob_is_string_type(val_type) - && (ObCharset::charset_type_by_coll(cs_type) != CHARSET_UTF8MB4)) { - ObString origin_str = converted_datum.get_string(); - ObString converted_str; - if (OB_FAIL(ObExprUtil::convert_string_collation(origin_str, cs_type, converted_str, - CS_TYPE_UTF8MB4_BIN, ctx->row_alloc_))) { - LOG_WARN("convert string collation failed", K(ret), K(cs_type), K(origin_str.length())); - } else { - converted_datum.set_string(converted_str); - cs_type = CS_TYPE_UTF8MB4_BIN; - } - } - - if (OB_FAIL(check_default_cast_allowed(default_expr))) { - LOG_WARN("check default value can't cast return type", K(ret), K(default_expr->datum_meta_)); - } else if (ObJsonExprHelper::is_convertible_to_json(val_type)) { - if (OB_FAIL(ObJsonExprHelper::transform_convertible_2jsonBase(converted_datum, - val_type, - &ctx->row_alloc_, - cs_type, - res, false, - default_expr->obj_meta_.has_lob_header(), - false, lib::is_oracle_mode(), true)) - || (dst_type != ObJsonType && !res->is_json_scalar(res->json_type()))) { - ret = OB_INVALID_DEFAULT; - LOG_USER_ERROR(OB_INVALID_DEFAULT, col_info_.col_name_.length(), col_info_.col_name_.ptr()); - } - } else if (OB_FAIL(ObJsonExprHelper::transform_scalar_2jsonBase(converted_datum, - default_expr->datum_meta_.type_, - &ctx->row_alloc_, - default_expr->datum_meta_.scale_, - session->get_timezone_info(), - session, res, false, default_expr->is_boolean_)) - || (dst_type != ObJsonType && !res->is_json_scalar(res->json_type()))) { - ret = OB_INVALID_DEFAULT; - LOG_USER_ERROR(OB_INVALID_DEFAULT, col_info_.col_name_.length(), col_info_.col_name_.ptr()); - } - } - return ret; -} - -int JtColNode::process_default_value_pre_mysql(JtScanCtx* ctx) -{ - INIT_SUCC(ret); - const ObJtColInfo& info = col_info_; - ctx->is_cover_error_ = false; - ObExpr* expr = ctx->spec_ptr_->column_exprs_.at(info.output_column_idx_); - ObObjType dst_type = expr->datum_meta_.type_; - if (is_emp_evaled_) { - } else if (col_info_.on_empty_ == JSN_VALUE_DEFAULT) { - ObExpr* default_expr = ctx->spec_ptr_->emp_default_exprs_.at(col_info_.empty_expr_id_); - if (OB_FAIL(get_default_value_pre_mysql(default_expr, ctx, emp_val_, dst_type))) { - LOG_WARN("fail to process empty default value", K(ret), K(dst_type)); - } else if (OB_FAIL(JtFuncHelpler::pre_default_value_check_mysql(ctx, emp_val_, *this))) { - ret = OB_OPERATE_OVERFLOW; - LOG_WARN("fail to cast empty default value", K(ret), K(dst_type)); - } else { - is_emp_evaled_ = true; - } - } - - if (is_err_evaled_) { - } else if (OB_SUCC(ret) && col_info_.on_error_ == JSN_VALUE_DEFAULT) { - ObExpr* default_expr = ctx->spec_ptr_->err_default_exprs_.at(col_info_.error_expr_id_); - if (OB_FAIL(get_default_value_pre_mysql(default_expr, ctx, err_val_, dst_type))) { - LOG_WARN("fail to process empty default value", K(ret), K(dst_type)); - } else if (OB_FAIL(JtFuncHelpler::pre_default_value_check_mysql(ctx, err_val_, *this))) { - ret = OB_OPERATE_OVERFLOW; - LOG_WARN("fail to cast error default value", K(ret), K(dst_type)); - } else { - is_err_evaled_ = true; - } - } - return ret; -} - -int JtColNode::wrapper2_json_array(JtScanCtx* ctx, ObJsonBaseVector &hit) -{ - INIT_SUCC(ret); - void* js_arr_buf = ctx->row_alloc_.alloc(sizeof(ObJsonArray)); - ObJsonArray* js_arr_ptr = nullptr; - if (OB_ISNULL(js_arr_buf)) { - ret = OB_ALLOCATE_MEMORY_FAILED; - LOG_WARN("failed to allocate json array buf", K(ret)); - } else if (OB_ISNULL(js_arr_ptr = new (js_arr_buf) ObJsonArray(&ctx->row_alloc_))) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("failed to new json array node", K(ret)); - } else { - ObJsonNode *j_node = NULL; - ObIJsonBase *jb_node = NULL; - for (int32_t i = 0; OB_SUCC(ret) && i < hit.size(); i++) { - if (OB_FAIL(ObJsonBaseFactory::transform(&ctx->row_alloc_, hit[i], ObJsonInType::JSON_TREE, jb_node))) { // to tree - LOG_WARN("fail to transform to tree", K(ret), K(i), K(*(hit[i]))); - } else { - j_node = static_cast(jb_node); - if (OB_FAIL(js_arr_ptr->array_append(j_node->clone(&ctx->row_alloc_)))) { - LOG_WARN("failed to array append", K(ret), K(i), K(*j_node)); - } - } - } - - if (OB_SUCC(ret)) { - curr_ = js_arr_ptr; - } - } - return ret; -} - -int JtColNode::get_next_row(ObIJsonBase* in, JtScanCtx* ctx, bool& is_null_value) -{ - INIT_SUCC(ret); - JtColType col_type = type(); - ObExpr* col_expr = ctx->spec_ptr_->column_exprs_.at(col_info_.output_column_idx_); - ctx->res_obj_ = &col_expr->locate_datum_for_write(*ctx->eval_ctx_); - bool need_cast_res = true; - bool need_pro_emtpy = false; - - if (lib::is_mysql_mode() && OB_ISNULL(in)) { - in_ = in; - need_cast_res = false; - curr_ = iter_ = nullptr; - col_expr->locate_datum_for_write(*ctx->eval_ctx_).set_null(); - } else if (col_type == COL_TYPE_ORDINALITY) { - if (OB_ISNULL(in)) { - col_expr->locate_datum_for_write(*ctx->eval_ctx_).set_null(); - } else { - col_expr->locate_datum_for_write(*ctx->eval_ctx_).set_int(ctx->ord_val_); - } - col_expr->get_eval_info(*ctx->eval_ctx_).evaluated_ = true; - if (ctx->is_need_end_) { - ret = OB_ITER_END; - } - } else if (lib::is_oracle_mode() && OB_FAIL(check_col_res_type(ctx))) { - LOG_WARN("check column res type failed", K(ret), K(col_info_.data_type_), K(col_info_.col_type_)); - } else if (OB_FAIL(init_js_path(ctx))) { - RESET_COVER_CODE(ctx); - LOG_WARN("fail to init js path", K(ret)); - } else if (lib::is_oracle_mode() && OB_FAIL(JtFuncHelpler::check_default_value(ctx, col_info_, col_expr))) { - // json value empty need check default value first - LOG_WARN("default value check fail", K(ret)); - } else if (OB_ISNULL(in)) { - in_ = in; - is_null_result_ = true; - need_pro_emtpy = true; - EVAL_COVER_CODE(ctx, ret); - } else if (in != in_ || !is_evaled_) { - in_ = in; - is_null_result_ = false; - ObJsonBaseVector hit; - in_->set_allocator(&ctx->row_alloc_); - if (OB_FAIL(in_->seek(*js_path_, js_path_->path_node_cnt(), true, false, hit))) { - SET_COVER_ERROR(ctx, ret); - LOG_WARN("json seek failed", K(col_info_.path_), K(ret)); - } else if (lib::is_mysql_mode() && OB_FAIL(process_default_value_pre_mysql(ctx))) { - LOG_WARN("fail to resolve default value in mysql mode", K(ret)); - } else if (hit.size() == 0) { - curr_ = iter_ = nullptr; - total_ = 1; - if (OB_FAIL(set_val_on_empty(ctx, need_cast_res))) { - LOG_WARN("fail to process on empty", K(ret)); - } - } else { - is_null_result_ = false; - curr_ = hit[0]; - total_ = 1; - bool is_array_wrapper = false; - if (col_type == COL_TYPE_QUERY) { - if (col_info_.wrapper_ == JSN_QUERY_WITHOUT_WRAPPER - || col_info_.wrapper_ == JSN_QUERY_WITHOUT_ARRAY_WRAPPER - || col_info_.wrapper_ == JSN_QUERY_WRAPPER_IMPLICIT) { - if (hit.size() > 1) { - proc_query_on_error(ctx, ret, is_null_result_); - if (col_info_.on_error_ == JSN_QUERY_ERROR) { - ret = OB_ERR_WITHOUT_ARR_WRAPPER; - LOG_WARN("result can't be returned without array wrapper", K(ret)); - } - SET_COVER_ERROR(ctx, ret); - } else { - if ((curr_->json_type() != ObJsonNodeType::J_ARRAY && curr_->json_type() != ObJsonNodeType::J_OBJECT) - && col_info_.allow_scalar_ == JSN_QUERY_SCALARS_DISALLOW) { - curr_ = nullptr; - is_null_result_ = true; - ret = OB_ERR_WITHOUT_ARR_WRAPPER; - LOG_WARN("result can't be returned without array wrapper"); - SET_COVER_ERROR(ctx, ret); - } - } - } else if (col_info_.wrapper_ == JSN_QUERY_WITH_WRAPPER - || col_info_.wrapper_ == JSN_QUERY_WITH_ARRAY_WRAPPER - || col_info_.wrapper_ == JSN_QUERY_WITH_UNCONDITIONAL_WRAPPER - || col_info_.wrapper_ == JSN_QUERY_WITH_UNCONDITIONAL_ARRAY_WRAPPER) { - is_array_wrapper = true; - } else if (col_info_.wrapper_ == JSN_QUERY_WITH_CONDITIONAL_WRAPPER - || col_info_.wrapper_ == JSN_QUERY_WITH_CONDITIONAL_ARRAY_WRAPPER) { - if (hit.size() == 1) { - if (col_info_.allow_scalar_ == JSN_QUERY_SCALARS_DISALLOW - && curr_->json_type() != ObJsonNodeType::J_ARRAY - && curr_->json_type() != ObJsonNodeType::J_OBJECT) { - is_array_wrapper = 1; - } else { - curr_ = hit[0]; - } - - } else { - is_array_wrapper = 1; - } - } - - if (is_array_wrapper) { - void* js_arr_buf = ctx->row_alloc_.alloc(sizeof(ObJsonArray)); - ObJsonArray* js_arr_ptr = nullptr; - if (OB_ISNULL(js_arr_buf)) { - ret = OB_ALLOCATE_MEMORY_FAILED; - LOG_WARN("failed to allocate json array buf", K(ret)); - } else if (OB_ISNULL(js_arr_ptr = new (js_arr_buf) ObJsonArray(&ctx->row_alloc_))) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("failed to new json array node", K(ret)); - } else { - ObJsonNode *j_node = NULL; - ObIJsonBase *jb_node = NULL; - for (int32_t i = 0; OB_SUCC(ret) && i < hit.size(); i++) { - if (OB_FAIL(ObJsonBaseFactory::transform(&ctx->row_alloc_, hit[i], ObJsonInType::JSON_TREE, jb_node))) { // to tree - LOG_WARN("fail to transform to tree", K(ret), K(i), K(*(hit[i]))); - } else { - j_node = static_cast(jb_node); - if (OB_FAIL(js_arr_ptr->array_append(j_node->clone(&ctx->row_alloc_)))) { - LOG_WARN("failed to array append", K(ret), K(i), K(*j_node)); - } - } - } - - if (OB_SUCC(ret)) { - curr_ = js_arr_ptr; - } - } - } - } else if (col_type == COL_TYPE_VALUE) { - if (lib::is_mysql_mode() && ob_is_json(col_expr->datum_meta_.type_) && hit.size() > 1) { - if (OB_FAIL(wrapper2_json_array(ctx, hit))) { - LOG_WARN("fail to get json value", K(ret)); - } - } else if (hit.size() > 1) { - ret = OB_ERR_JSON_VALUE_NO_SCALAR; - SET_COVER_ERROR(ctx, ret); - } else if (hit[0]->json_type() == ObJsonNodeType::J_NULL) { - need_cast_res = false; - col_expr->locate_datum_for_write(*ctx->eval_ctx_).set_null(); - } else if (!ob_is_json(col_expr->datum_meta_.type_) - && (hit[0]->json_type() == ObJsonNodeType::J_ARRAY || hit[0]->json_type() == ObJsonNodeType::J_OBJECT)) { - ret = OB_ERR_JSON_VALUE_NO_SCALAR; - SET_COVER_ERROR(ctx, ret); - } else if (lib::is_oracle_mode() && curr_->json_type() == ObJsonNodeType::J_BOOLEAN && ob_is_number_tc(col_info_.data_type_.get_obj_type())) { - curr_ = nullptr; - is_null_result_ = true; - ret = OB_ERR_BOOL_CAST_NUMBER; - LOG_WARN("boolean cast number cast not support"); - SET_COVER_ERROR(ctx, ret); - } else if ((curr_->json_type() == ObJsonNodeType::J_INT - || curr_->json_type() == ObJsonNodeType::J_INT) - && (ob_is_datetime_tc(col_info_.data_type_.get_obj_type()))) { - char* res_ptr = ctx->buf; - int len = snprintf(ctx->buf, sizeof(ctx->buf), "%ld", curr_->get_int()); - if (len > 0) { - ObJsonString* j_string = nullptr; - if (OB_ISNULL(j_string = static_cast(ctx->row_alloc_.alloc(sizeof(ObJsonString))))) { - ret = OB_ALLOCATE_MEMORY_FAILED; - RESET_COVER_CODE(ctx); - LOG_WARN("fail to allocate json string node", K(ret)); - } else { - curr_ = new(j_string) ObJsonString(ctx->buf, len); - } - } else { - ret = OB_ERR_UNEXPECTED; - RESET_COVER_CODE(ctx); - LOG_WARN("fail to print int value", K(ret)); - } - } - } else if (col_type == COL_TYPE_EXISTS) { - if (ob_is_string_type(col_info_.data_type_.get_obj_type())) { - ObString value("true"); - if (lib::is_mysql_mode()) { - value.assign_ptr("1", 1); - } - void* buf = ctx->row_alloc_.alloc(sizeof(ObJsonString)); - if (OB_ISNULL(buf)) { - ret = OB_ALLOCATE_MEMORY_FAILED; - } else { - curr_ = static_cast(new(buf)ObJsonString(value.ptr(), value.length())); - is_null_result_ = false; - } - } else { - void* buf = ctx->row_alloc_.alloc(sizeof(ObJsonInt)); - if (OB_ISNULL(buf)) { - ret = OB_ALLOCATE_MEMORY_FAILED; - LOG_WARN("buf allocate failed", K(ret)); - } else { - curr_ = static_cast(new(buf)ObJsonInt(1)); - is_null_result_ = false; - } - } - } - } - if (OB_SUCC(ret)) { - cur_pos_ = 0; - is_evaled_ = true; - } - } - - if (OB_FAIL(ret)) { - if (ctx->is_cover_error_) { - int tmp_ret = JtFuncHelpler::set_error_val(ctx, *this, ret); - if (tmp_ret != OB_SUCCESS) { - LOG_WARN("failed to set error val.", K(tmp_ret)); - } else if (OB_ISNULL(in_) && is_evaled_) { - ret = OB_ITER_END; - } - } - } else if (col_type == COL_TYPE_EXISTS || col_type == COL_TYPE_QUERY || col_type == COL_TYPE_VALUE) { - if (!need_cast_res) { - } else if (is_null_result_ || (curr_ && curr_->json_type() == ObJsonNodeType::J_NULL && (!curr_->is_real_json_null(curr_) || lib::is_mysql_mode()))) { - if (!need_pro_emtpy) { - col_expr->locate_datum_for_write(*ctx->eval_ctx_).set_null(); - } else if (OB_FAIL(set_val_on_empty(ctx, need_cast_res))) { - LOG_WARN("fail to process on empty", K(ret)); - } else if (OB_ISNULL(iter_)) { - col_expr->locate_datum_for_write(*ctx->eval_ctx_).set_null(); - } else if (OB_FAIL(JtFuncHelpler::cast_to_res(ctx, iter_, *this, false))) { - LOG_WARN("failed set to res type", K(ret)); - } - } else if (OB_FAIL(JtFuncHelpler::cast_to_res(ctx, curr_, *this))) { - LOG_WARN("failed to do cast to res type", K(ret)); - } - - if (OB_SUCC(ret)) { - col_expr->get_eval_info(*ctx->eval_ctx_).evaluated_ = true; - } - - if (is_sub_evaled_) { - ret = OB_ITER_END; - } - } - - return ret; -} - -int JtColNode::init_js_path(JtScanCtx* ctx) -{ - INIT_SUCC(ret); - if (!is_evaled_ && OB_ISNULL(js_path_)) { - void* path_buf = ctx->op_exec_alloc_->alloc(sizeof(ObJsonPath)); - if (OB_ISNULL(path_buf)) { - ret = OB_ALLOCATE_MEMORY_FAILED; - LOG_WARN("fail to allocate json path buffer", K(ret)); - } else { - js_path_ = new (path_buf) ObJsonPath(col_info_.path_, ctx->op_exec_alloc_); - if (OB_FAIL(js_path_->parse_path())) { - ret = OB_ERR_JSON_PATH_EXPRESSION_SYNTAX_ERROR; - LOG_USER_ERROR(OB_ERR_JSON_PATH_EXPRESSION_SYNTAX_ERROR, col_info_.path_.length(), col_info_.path_.ptr()); - } - } - } - - return ret; -} - -ObIJsonBase* container_at(ObIJsonBase* in, int32_t pos) -{ - INIT_SUCC(ret); - ObIJsonBase* res = nullptr; - - if (in->json_type() == ObJsonNodeType::J_ARRAY) { - if (OB_FAIL(in->get_array_element(pos, res))) { - LOG_WARN("fail to get array element", K(ret), K(pos)); - } - } else if (in->json_type() == ObJsonNodeType::J_OBJECT) { - if (OB_FAIL(in->get_object_value(pos, res))) { - LOG_WARN("fail to get object element", K(ret), K(pos)); - } - } - - return res; -} - -int JtScanNode::add_reg_column_node(JtColNode* node, bool add_idx) -{ - INIT_SUCC(ret); - if (add_idx && OB_FAIL(child_idx_.push_back(node->node_idx()))) { - LOG_WARN("fail to store node id", K(ret), K(child_idx_.count())); - } else if (OB_FAIL(reg_col_defs_.push_back(node))) { - LOG_WARN("fail to store node ptr", K(ret), K(reg_col_defs_.count())); - } - return ret; -} - -int JtScanNode::get_next_row(ObIJsonBase* in, JtScanCtx* ctx, bool& is_null_value) -{ - INIT_SUCC(ret); - if (OB_FAIL(init_js_path(ctx))) { - RESET_COVER_CODE(ctx); - LOG_WARN("fail to init js path", K(ret)); - } else if (!is_evaled_ || in_ != in) { - ObJsonBaseVector hit; - is_sub_evaled_ = false; - is_nested_evaled_ = false; - in_ = in; - if (OB_NOT_NULL(in_)) { - if (OB_ISNULL(in_->get_allocator())) { - in_->set_allocator(&ctx->row_alloc_); - } - } - if (OB_ISNULL(in)) { - total_ = 1; - is_null_result_ = is_null_value = true; - curr_ = iter_ = nullptr; - } else if (OB_FAIL(in_->seek(*js_path_, js_path_->path_node_cnt(), true, false, hit))) { - LOG_WARN("json seek failed", K(col_info_.path_), K(ret)); - SET_COVER_ERROR(ctx, ret); - } else if (hit.size() == 0) { - total_ = 1; - is_null_value = is_null_result_ = true; - curr_ = iter_ = nullptr; - if (col_info_.parent_id_ == common::OB_INVALID_ID - || (ctx->jt_op_->get_root_param() == in - && ctx->jt_op_->get_root_entry()->reg_column_count() == 0 - && ctx->jt_op_->get_root_entry() == this)) { - ret = OB_ITER_END; - } - } else if (hit.size() == 1) { - iter_ = curr_ = hit[0]; - is_null_value = is_null_result_ = false; - total_ = 1; - } else { - is_null_value = false; - void* js_arr_buf = ctx->row_alloc_.alloc(sizeof(ObJsonArray)); - ObJsonArray* js_arr_ptr = nullptr; - if (OB_ISNULL(js_arr_buf)) { - ret = OB_ALLOCATE_MEMORY_FAILED; - LOG_WARN("failed to allocate json array buf", K(ret)); - } else if (OB_ISNULL(js_arr_ptr = new (js_arr_buf) ObJsonArray(&ctx->row_alloc_))) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("failed to new json array node", K(ret)); - } else { - ObJsonNode *j_node = NULL; - ObIJsonBase *jb_node = NULL; - for (int32_t i = 0; OB_SUCC(ret) && i < hit.size(); i++) { - if (ObJsonBaseFactory::transform(&ctx->row_alloc_, hit[i], ObJsonInType::JSON_TREE, jb_node)) { // to tree - LOG_WARN("fail to transform to tree", K(ret), K(i), K(*(hit[i]))); - } else { - j_node = static_cast(jb_node); - if (OB_FAIL(js_arr_ptr->array_append(j_node->clone(&ctx->row_alloc_)))) { - LOG_WARN("failed to array append", K(ret), K(i), K(*j_node)); - } - } - } - - if (OB_SUCC(ret)) { - curr_ = js_arr_ptr; - total_ = hit.size(); - if (OB_FAIL(js_arr_ptr->get_array_element(0, iter_))) { - LOG_WARN("failed to get array selement 0.", K(ret)); - } - } - } - } - - if (OB_SUCC(ret)) { - cur_pos_ = 0; - is_evaled_= true; - } - } - - if (OB_SUCC(ret)) { - uint32_t reg_count = reg_col_defs_.count(); - bool tmp_is_null = false; - for (uint32_t i = 0; OB_SUCC(ret) && i < reg_count && !is_sub_evaled_; ++i) { - JtColNode* cur_node = reg_col_defs_.at(i); - if (cur_node->type() == COL_TYPE_ORDINALITY) { - ctx->ord_val_ = cur_pos_ + 1; - } - if (OB_FAIL(cur_node->get_next_row(iter_, ctx, tmp_is_null))) { - LOG_WARN("fail to get regular column value", K(ret)); - } - } - - bool is_curr_row_valid = !is_sub_evaled_; - bool is_sub_result_null = false; - if (OB_SUCC(ret)) { - bool is_cur_end = false; - JtColNode* nest_node = nest_col_def_; - if (nest_node) { - if (OB_FAIL(nest_node->get_next_row(iter_, ctx, is_sub_result_null))) { - if (OB_FAIL(ret) && ret != OB_ITER_END) { - LOG_WARN("fail to get column value", K(ret)); - } else if (ret == OB_ITER_END) { - is_cur_end = true; - is_sub_evaled_ = true; - if (!is_nested_evaled_) { - is_nested_evaled_ = true; - ret = OB_SUCCESS; - } else { - is_curr_row_valid = false; - } - } - } else if (OB_SUCC(ret)) { - is_nested_evaled_ = true; - } - } else { - is_nested_evaled_ = true; - is_cur_end = true; - } - - if (is_cur_end || is_sub_result_null || iter_ == nullptr) { - if (cur_pos_ + 1 < total_ && !is_curr_row_valid) { - cur_pos_++; - if (OB_ISNULL(iter_ = container_at(curr_, cur_pos_))) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("fail to get container element.", K(ret), K(cur_pos_)); - } else if (is_sub_evaled_) { - is_sub_evaled_ = false; - is_cur_end = false; - if (OB_FAIL(get_next_row(in_, ctx, is_sub_result_null))) { - if (ret != OB_ITER_END) { - LOG_WARN("fail to get next row.", K(ret), K(cur_pos_)); - } - } - } - } else if (is_cur_end) { - if (!is_curr_row_valid) { - reset_reg_columns(ctx); - ret = OB_ITER_END; - } - is_sub_evaled_ = true; - } - } - - if (is_sub_result_null && (cur_pos_ + 1 > total_)) { - is_null_value = is_sub_result_null; - } - } - } - - return ret; -} - -int JtJoinNode::get_next_row(ObIJsonBase* in, JtScanCtx* ctx, bool& is_null_value) -{ - INIT_SUCC(ret); - - JtColNode* left_node = left(); - JtColNode* right_node = right(); - - bool is_left_null = false; - if (OB_NOT_NULL(left_node)) { - ret = left_node->get_next_row(in, ctx, is_null_value); - if (OB_FAIL(ret) && ret != OB_ITER_END) { - LOG_WARN("fail to get next row", K(ret)); - } - is_left_null = is_null_value; - } else { - ret = OB_ITER_END; - } - - if (OB_SUCC(ret)) { - if (is_null_value && OB_NOT_NULL(right_node)) { - ret = right_node->get_next_row(in, ctx, is_null_value); - if (OB_FAIL(ret) && ret != OB_ITER_END) { - LOG_WARN("fail to get next row", K(ret)); - } - } - } else if (OB_NOT_NULL(right_node) && (ret == OB_ITER_END)) { - ret = right_node->get_next_row(in, ctx, is_null_value); - if (OB_FAIL(ret) && ret != OB_ITER_END) { - LOG_WARN("fail to get next row", K(ret)); - } else if (OB_SUCC(ret) && is_null_value) { - if (in_ == in) { - ret = OB_ITER_END; - } - } - } - - in_ = in; - return ret; -} - - ObJtColInfo::ObJtColInfo() : col_type_(0), truncate_(0), @@ -2732,110 +247,66 @@ int ObJtColInfo::from_JtColBaseInfo(const ObJtColBaseInfo& info) return ret; } -static int construct_jt_scan_node(ObIAllocator* allocator, - const ObJtColInfo& col_info, - JtScanNode*& jt_node) +static int construct_table_func_join_node(ObIAllocator* allocator, + const ObJtColInfo& col_info, + JoinNode*& jt_node) { INIT_SUCC(ret); void* node_buf = static_cast(jt_node); if (OB_ISNULL(node_buf)) { - node_buf = allocator->alloc(sizeof(JtScanNode)); + node_buf = allocator->alloc(sizeof(JoinNode)); if (OB_ISNULL(node_buf)) { ret = OB_ALLOCATE_MEMORY_FAILED; LOG_WARN("fail to alloc col node buffer", K(ret)); } - jt_node = static_cast(new(node_buf)JtScanNode(col_info)); + jt_node = static_cast(new(node_buf)JoinNode()); } else { - jt_node = static_cast(new(node_buf)JtScanNode(col_info)); + jt_node = static_cast(new(node_buf)JoinNode()); } return ret; } -static int construct_jt_reg_node(ObIAllocator* allocator, - const ObJtColInfo& col_info, - JtColNode*& jt_node) +static int construct_table_func_reg_node(ObIAllocator* allocator, + const ObJtColInfo& col_info, + ObRegCol*& jt_node) { INIT_SUCC(ret); - void* node_buf = allocator->alloc(sizeof(JtColNode)); + void* node_buf = allocator->alloc(sizeof(ObRegCol)); if (OB_ISNULL(node_buf)) { ret = OB_ALLOCATE_MEMORY_FAILED; LOG_WARN("fail to alloc col node buffer", K(ret)); } else { - jt_node = static_cast(new(node_buf)JtColNode(col_info)); + jt_node = static_cast(new(node_buf)ObRegCol(col_info)); } return ret; } -static int construct_jt_join_node(ObIAllocator* allocator, - const ObJtColInfo& col_info, - JtJoinNode*& jt_node) +static int construct_table_func_union_node(ObIAllocator* allocator, + const ObJtColInfo& col_info, + UnionNode*& jt_node) { INIT_SUCC(ret); - void* node_buf = allocator->alloc(sizeof(JtJoinNode)); + void* node_buf = allocator->alloc(sizeof(UnionNode)); if (OB_ISNULL(node_buf)) { ret = OB_ALLOCATE_MEMORY_FAILED; LOG_WARN("fail to alloc col node buffer", K(ret)); } else { - jt_node = static_cast(new(node_buf)JtJoinNode(col_info)); + jt_node = static_cast(new(node_buf)UnionNode()); } return ret; } -int ObJsonTableSpec::construct_tree(common::ObArray all_nodes, JtScanNode* parent) +static int construct_table_func_scan_node(ObIAllocator* allocator, + const ObJtColInfo& col_info, + ScanNode*& jt_node) { INIT_SUCC(ret); - - for (int64_t i = 0; i < parent->child_idx_.count(); ++i) { - int64_t idx = parent->child_idx_.at(i); - JtColNode* node = all_nodes.at(idx); - if (node->node_type() == REG_TYPE) { - if (OB_FAIL(parent->add_reg_column_node(node))) { - LOG_WARN("fail to add reg column node", K(ret), K(i), K(idx)); - } - } else { - if (OB_FAIL(parent->add_nest_column_node(node))) { - LOG_WARN("fail to add nest column node", K(ret), K(i), K(idx)); - } else { - JtNodeType type = node->node_type(); - if (type == JOIN_TYPE && OB_FAIL(construct_tree(all_nodes, static_cast(node)))) { - LOG_WARN("fail to construct join node", K(ret), K(i), K(idx)); - } else if (type == SCAN_TYPE && OB_FAIL(construct_tree(all_nodes, static_cast(node)))) { - LOG_WARN("fail to construct scan node", K(ret), K(i), K(idx)); - } - } - } - } - - return ret; -} - -int ObJsonTableSpec::construct_tree(common::ObArray all_nodes, JtJoinNode* parent) -{ - INIT_SUCC(ret); - JtColNode* node = nullptr; - JtNodeType type; - int64_t left = parent->left_idx(); - if (left != OB_INVALID_ID) { - node = all_nodes.at(left); - type = node->node_type(); - parent->set_left(node); - if (type == JOIN_TYPE && OB_FAIL(construct_tree(all_nodes, static_cast(node)))) { - LOG_WARN("fail to construct join node", K(ret), K(left)); - } else if (type == SCAN_TYPE && OB_FAIL(construct_tree(all_nodes, static_cast(node)))) { - LOG_WARN("fail to construct scan node", K(ret), K(left)); - } - } - - int64_t right = parent->right_idx(); - if (right != OB_INVALID_ID) { - node = all_nodes.at(right); - type = node->node_type(); - parent->set_right(node); - if (type == JOIN_TYPE && OB_FAIL(construct_tree(all_nodes, static_cast(node)))) { - LOG_WARN("fail to construct join node", K(ret), K(right)); - } else if (type == SCAN_TYPE && OB_FAIL(construct_tree(all_nodes, static_cast(node)))) { - LOG_WARN("fail to construct scan node", K(ret), K(right)); - } + void* node_buf = allocator->alloc(sizeof(ScanNode)); + if (OB_ISNULL(node_buf)) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("fail to alloc col node buffer", K(ret)); + } else { + jt_node = static_cast(new(node_buf)ScanNode(col_info)); } return ret; } @@ -2865,6 +336,10 @@ OB_DEF_SERIALIZE(ObJsonTableSpec) const ObJtColInfo& info = *cols_def_.at(i); OB_UNIS_ENCODE(info); } + if (table_type_ == MulModeTableType::OB_ORA_XML_TABLE_TYPE) { + OB_UNIS_ENCODE(table_type_); + OB_UNIS_ENCODE(namespace_def_); + } return ret; } @@ -2885,7 +360,10 @@ OB_DEF_SERIALIZE_SIZE(ObJsonTableSpec) const ObJtColInfo& info = *cols_def_.at(i); OB_UNIS_ADD_LEN(info); } - + if (table_type_ == MulModeTableType::OB_ORA_XML_TABLE_TYPE) { + OB_UNIS_ADD_LEN(table_type_); + OB_UNIS_ADD_LEN(namespace_def_); + } return len; } @@ -2900,6 +378,7 @@ OB_DEF_DESERIALIZE(ObJsonTableSpec) OB_UNIS_DECODE(has_correlated_expr_); int32_t column_count = 0; + int8_t table_type_flag = OB_JSON_TABLE; OB_UNIS_DECODE(column_count); if (OB_SUCC(ret) && OB_FAIL(cols_def_.init(column_count))) { @@ -2918,39 +397,41 @@ OB_DEF_DESERIALIZE(ObJsonTableSpec) *col_info = tmp_col_info; if (OB_FAIL(cols_def_.push_back(col_info))) { LOG_WARN("fail to store col info.", K(ret), K(cols_def_.count())); + } else if (col_info->col_type_ >= COL_TYPE_VAL_EXTRACT_XML) { + table_type_flag = OB_XML_TABLE; } } } - + if (table_type_flag == OB_XML_TABLE) { + OB_UNIS_DECODE(table_type_); + OB_UNIS_DECODE(namespace_def_); + } return ret; } int ObJsonTableOp::generate_table_exec_tree(ObIAllocator* allocator, const JtColTreeNode& orig_col, - JtScanNode*& scan_col, - int64_t& node_idx) + JoinNode*& join_col) { INIT_SUCC(ret); int reg_count = orig_col.regular_cols_.count(); int nest_count = orig_col.nested_cols_.count(); + ScanNode* scan_col = nullptr; - if (OB_FAIL(construct_jt_scan_node(allocator, orig_col.col_base_info_, scan_col))) { + if (OB_FAIL(construct_table_func_join_node(allocator, orig_col.col_base_info_, join_col))) { + LOG_WARN("fail to construct join col node", K(ret)); + } else if (OB_FAIL(construct_table_func_scan_node(allocator, orig_col.col_base_info_, scan_col))) { LOG_WARN("fail to construct scan col node", K(ret)); } else { - scan_col->set_idx(node_idx++); - ObIArray& child_nodes = scan_col->child_node_ref(); - if (OB_FAIL(child_nodes.reserve(reg_count + (nest_count > 0 ? 1 : 0)))) { - LOG_WARN("fail to reserve space for idx array", K(ret), K(reg_count)); - } + join_col->set_left(scan_col); } for (int i = 0; OB_SUCC(ret) && i < reg_count; ++i) { - JtColNode* reg_node = nullptr; - if (OB_FAIL(construct_jt_reg_node(allocator, orig_col.regular_cols_.at(i)->col_base_info_, reg_node))) { + ObRegCol* reg_node = nullptr; + if (OB_FAIL(construct_table_func_reg_node(allocator, orig_col.regular_cols_.at(i)->col_base_info_, reg_node))) { LOG_WARN("fail to construct reg col node", K(ret), K(reg_count), K(i)); } else { - reg_node->set_idx(node_idx++); if (OB_FAIL(scan_col->add_reg_column_node(reg_node))) { LOG_WARN("fail to store col node", K(ret), K(reg_count), K(i)); } @@ -2958,43 +439,36 @@ int ObJsonTableOp::generate_table_exec_tree(ObIAllocator* allocator, } if (OB_SUCC(ret) && nest_count > 0) { - common::ObArray ji_nodes; + common::ObArray ji_nodes; for (size_t i = 0; OB_SUCC(ret) && i < nest_count; ++i) { - JtJoinNode* ji_node = nullptr; - if (OB_FAIL(construct_jt_join_node(allocator, orig_col.nested_cols_.at(i)->col_base_info_, ji_node))) { + UnionNode* ji_node = nullptr; + if (OB_FAIL(construct_table_func_union_node(allocator, orig_col.nested_cols_.at(i)->col_base_info_, ji_node))) { LOG_WARN("fail to construct join col node", K(ret)); } else if (OB_FAIL(ji_nodes.push_back(ji_node))) { LOG_WARN("fail to store ji nodes in tmp array", K(ret), K(nest_count), K(i)); - } else { - ji_node->set_idx(node_idx++); } } if (OB_SUCC(ret)) { int j = 0; - JtJoinNode* last_node = nullptr; - scan_col->add_nest_column_node(ji_nodes.at(j)); - ji_nodes.at(j)->set_join_type(RIGHT_TYPE); - + UnionNode* last_node = nullptr; + join_col->set_right(ji_nodes.at(j)); last_node = ji_nodes.at(j); ++j; - while (j < nest_count) { - JtJoinNode* cur_node = ji_nodes.at(j); - - last_node->set_left(cur_node); - cur_node->set_join_type(LEFT_TYPE); + while (j < nest_count) { // nested col/union node in right child + UnionNode* cur_node = ji_nodes.at(j); + last_node->set_right(cur_node); last_node = cur_node; ++j; }; for (int i = 0; OB_SUCC(ret) && i < nest_count; ++i) { - JtScanNode* col_node = nullptr; - if (OB_FAIL(generate_table_exec_tree(allocator, *orig_col.nested_cols_.at(i), col_node, node_idx))) { + JoinNode* col_node = nullptr; + if (OB_FAIL(generate_table_exec_tree(allocator, *orig_col.nested_cols_.at(i), col_node))) { LOG_WARN("fail to generate sub col node", K(ret), K(i)); } else { - ji_nodes.at(i)->set_join_type(RIGHT_TYPE); - ji_nodes.at(i)->set_right(col_node); + ji_nodes.at(i)->set_left(col_node); } } } @@ -3006,10 +480,9 @@ int ObJsonTableOp::generate_table_exec_tree(ObIAllocator* allocator, int ObJsonTableOp::generate_table_exec_tree() { INIT_SUCC(ret); - int64_t node_idx = 0; if (OB_FAIL(generate_column_trees(def_root_))) { LOG_WARN("fail to generate column tree", K(ret)); - } else if (OB_FAIL(generate_table_exec_tree(allocator_, *def_root_, jt_root_, node_idx))) { + } else if (OB_FAIL(generate_table_exec_tree(allocator_, *def_root_, root_))) { LOG_WARN("fail to generate sub col node", K(ret)); } return ret; @@ -3126,8 +599,8 @@ int ObJsonTableOp::inner_open() } else if (OB_ISNULL(MY_SPEC.value_expr_)) { ret = OB_ERR_UNEXPECTED; LOG_WARN("failed to open iter, value expr is null.", K(ret)); - } else if (OB_FAIL(jt_root_->open())) { - LOG_WARN("failed to open jt column node.", K(ret)); + } else if (OB_FAIL(root_->open(&jt_ctx_))) { + LOG_WARN("failed to open table func xml column node.", K(ret)); } else { is_evaled_ = false; } @@ -3140,11 +613,37 @@ int ObJsonTableOp::inner_rescan() int ret = OB_SUCCESS; if (OB_FAIL(ObOperator::inner_rescan())) { LOG_WARN("failed to inner rescan", K(ret)); - } else if (OB_FAIL(inner_open())) { - LOG_WARN("failed to inner open", K(ret)); } else { jt_ctx_.row_alloc_.reuse(); } + if (OB_FAIL(ret)) { + } else if (OB_FAIL(reset_variable())) { + LOG_WARN("failed to inner open", K(ret)); + } + return ret; +} + +int ObJsonTableOp::reset_variable() +{ + INIT_SUCC(ret); + jt_ctx_.is_cover_error_ = false; + jt_ctx_.error_code_ = 0; + jt_ctx_.is_need_end_ = 0; + + if (OB_FAIL(ObXmlUtil::create_mulmode_tree_context(&jt_ctx_.row_alloc_, jt_ctx_.mem_ctx_))) { + LOG_WARN("fail to create tree memory context", K(ret)); + } + + if (OB_FAIL(ret)) { + } else if (OB_ISNULL(MY_SPEC.value_expr_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("failed to open iter, value expr is null.", K(ret)); + } else if (OB_FAIL(root_->reset(&jt_ctx_))) { + LOG_WARN("failed to open table func xml column node.", K(ret)); + } else { + is_evaled_ = false; + } + return ret; } @@ -3154,50 +653,84 @@ int ObJsonTableOp::switch_iterator() return OB_ITER_END; } - int ObJsonTableOp::init() { INIT_SUCC(ret); if (!is_inited_) { const ObJsonTableSpec* spec_ptr = reinterpret_cast(&spec_); + jt_ctx_.spec_ptr_ = const_cast(spec_ptr); if (OB_FAIL(generate_table_exec_tree())) { LOG_WARN("fail to init json table op, as generate exec tree occur error.", K(ret)); } else { const sql::ObSQLSessionInfo *session = get_exec_ctx().get_my_session(); - uint64_t tenant_id = session ? common::OB_SERVER_TENANT_ID : session->get_effective_tenant_id(); - - is_inited_ = true; - jt_ctx_.spec_ptr_ = const_cast(spec_ptr); - jt_ctx_.eval_ctx_ = &eval_ctx_; - jt_ctx_.exec_ctx_ = &get_exec_ctx(); - jt_ctx_.row_alloc_.set_tenant_id(tenant_id); - jt_ctx_.op_exec_alloc_ = allocator_; - jt_ctx_.is_evaled_ = false; - jt_ctx_.is_charset_converted_ = false; - jt_ctx_.res_obj_ = nullptr; - jt_ctx_.jt_op_ = this; + uint64_t tenant_id = -1; + if (OB_ISNULL(session)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("session is NULL", K(ret)); + } else { + tenant_id = session->get_effective_tenant_id(); + is_inited_ = true; + jt_ctx_.spec_ptr_ = const_cast(spec_ptr); + jt_ctx_.eval_ctx_ = &eval_ctx_; + jt_ctx_.exec_ctx_ = &get_exec_ctx(); + jt_ctx_.row_alloc_.set_tenant_id(tenant_id); + jt_ctx_.op_exec_alloc_ = allocator_; + jt_ctx_.is_evaled_ = false; + jt_ctx_.is_charset_converted_ = false; + jt_ctx_.res_obj_ = nullptr; + jt_ctx_.jt_op_ = this; + jt_ctx_.is_const_input_ = !MY_SPEC.has_correlated_expr_; + } + } + void* table_func_buf = NULL; + if (OB_FAIL(ret)) { + } else if (jt_ctx_.is_json_table_func()) { + table_func_buf = jt_ctx_.op_exec_alloc_->alloc(sizeof(JsonTableFunc)); + if (OB_ISNULL(table_func_buf)) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("failed to allocate table func buf", K(ret)); + } else if (OB_ISNULL(jt_ctx_.table_func_ = new (table_func_buf) JsonTableFunc())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("failed to new json array node", K(ret)); + } + } else if (OB_FAIL(ObXmlUtil::create_mulmode_tree_context(jt_ctx_.op_exec_alloc_, jt_ctx_.xpath_ctx_))) { + LOG_WARN("fail to create xpath memory context", K(ret)); + } else if (jt_ctx_.is_xml_table_func()) { + table_func_buf = jt_ctx_.op_exec_alloc_->alloc(sizeof(XmlTableFunc)); + if (OB_ISNULL(table_func_buf)) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("failed to allocate table func buf", K(ret)); + } else if (OB_ISNULL(jt_ctx_.table_func_ = new (table_func_buf) XmlTableFunc())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("failed to new json array node", K(ret)); + } } } - jt_ctx_.is_cover_error_ = false; - jt_ctx_.is_const_input_ = !MY_SPEC.has_correlated_expr_; jt_ctx_.error_code_ = 0; jt_ctx_.is_need_end_ = 0; + if (OB_SUCC(ret) && OB_FAIL(ObXmlUtil::create_mulmode_tree_context(&jt_ctx_.row_alloc_, jt_ctx_.mem_ctx_))) { + LOG_WARN("fail to create tree memory context", K(ret)); + } return ret; } int ObJsonTableOp::inner_close() { INIT_SUCC(ret); - if (OB_NOT_NULL(jt_root_)) { - jt_root_->destroy(); + if (OB_NOT_NULL(root_)) { + root_->destroy(); } - if (OB_NOT_NULL(def_root_)) { def_root_->destroy(); } - jt_ctx_.row_alloc_.clear(); + if (OB_NOT_NULL(jt_ctx_.table_func_) && OB_NOT_NULL(jt_ctx_.op_exec_alloc_)) { + jt_ctx_.op_exec_alloc_->free(jt_ctx_.table_func_); + } + if (OB_NOT_NULL(jt_ctx_.xpath_ctx_) && OB_NOT_NULL(jt_ctx_.op_exec_alloc_)) { + jt_ctx_.op_exec_alloc_->free(jt_ctx_.xpath_ctx_); + } return ret; } @@ -3211,7 +744,7 @@ void ObJsonTableOp::reset_columns() } } -void JtScanNode::reset_reg_columns(JtScanCtx* ctx) +void ScanNode::reset_reg_columns(JtScanCtx* ctx) { for (size_t i = 0; i < reg_column_count(); ++i) { ObExpr* col_expr = ctx->spec_ptr_->column_exprs_.at(reg_col_node(i)->col_info_.output_column_idx_); @@ -3226,103 +759,2067 @@ void ObJsonTableOp::destroy() ObOperator::destroy(); } +int RegularCol::check_item_method_json(ObRegCol &col_node, JtScanCtx* ctx) +{ + INIT_SUCC(ret); + ObExpr* expr = ctx->spec_ptr_->column_exprs_.at(col_node.col_info_.output_column_idx_); + if (col_node.type() == COL_TYPE_QUERY) { + if (col_node.expr_param_.dst_type_ != ObVarcharType + && col_node.expr_param_.dst_type_ != ObLongTextType + && col_node.expr_param_.dst_type_ != ObJsonType) { + ret = OB_ERR_INVALID_DATA_TYPE_RETURNING; + LOG_USER_ERROR(OB_ERR_INVALID_DATA_TYPE_RETURNING); + } else if (col_node.expr_param_.json_path_->is_last_func() + && OB_FAIL( ObJsonExprHelper::check_item_func_with_return(col_node.expr_param_.json_path_->get_last_node_type(), + col_node.expr_param_.dst_type_, expr->datum_meta_.cs_type_, 1))) { + if (ret == OB_ERR_INVALID_DATA_TYPE_RETURNING) { + ret = OB_ERR_INVALID_DATA_TYPE; + LOG_USER_ERROR(OB_ERR_INVALID_DATA_TYPE, "JSON_TABLE"); + } + LOG_WARN("check item func with return type fail", K(ret)); + } else if (OB_FAIL(ObExprJsonQuery::check_item_method_valid_with_wrapper(col_node.expr_param_.json_path_, + col_node.expr_param_.wrapper_))) { + LOG_WARN("fail to check item method with wrapper", K(ret)); + } + } else if (col_node.type() == COL_TYPE_VALUE) { + if (col_node.expr_param_.json_path_->is_last_func() + && OB_FAIL( ObJsonExprHelper::check_item_func_with_return(col_node.expr_param_.json_path_->get_last_node_type(), + col_node.expr_param_.dst_type_, expr->datum_meta_.cs_type_, 0))) { + if (ret == OB_ERR_INVALID_DATA_TYPE_RETURNING) { + ret = OB_ERR_INVALID_DATA_TYPE; + LOG_USER_ERROR(OB_ERR_INVALID_DATA_TYPE, "JSON_TABLE"); + } + LOG_WARN("check item func with return type fail", K(ret)); + } + } + return ret; +} + +int RegularCol::eval_query_col(ObRegCol &col_node, JtScanCtx* ctx, ObExpr* col_expr, bool& is_null) +{ + INIT_SUCC(ret); + ObJsonArray* in = static_cast(col_node.curr_); + is_null = false; + int8_t use_wrapper = 0; + if (in->element_count() == 0) { + is_null = true; + } else if (in->element_count() == 1 + && OB_FAIL(ObExprJsonQuery::get_single_obj_wrapper(col_node.col_info_.wrapper_, + use_wrapper, in[0][0]->json_type(), col_node.col_info_.allow_scalar_))) { + SET_COVER_ERROR(ctx, ret); + LOG_WARN("result can't be returned without array wrapper", K(ret)); + } else if (in->element_count() > 1 + && OB_FAIL(ObExprJsonQuery::get_multi_scalars_wrapper_type(col_node.col_info_.wrapper_, use_wrapper))) { + SET_COVER_ERROR(ctx, ret); + LOG_WARN("result can't be returned without array wrapper", K(ret)); + } else if (!use_wrapper) { + col_node.curr_ = in[0][0]; + } + return ret; +} + +int RegularCol::eval_value_col(ObRegCol &col_node, JtScanCtx* ctx, ObExpr* col_expr, bool& is_null) +{ + INIT_SUCC(ret); + is_null = false; + uint8_t is_type_mismatch = 0; + ObIJsonBase* in = static_cast(col_node.curr_); + + if (ob_is_json(col_expr->datum_meta_.type_)) { + } else if (in->json_type() == ObJsonNodeType::J_OBJECT + || in->json_type() == ObJsonNodeType::J_ARRAY) { + ret = OB_ERR_JSON_VALUE_NO_SCALAR; + LOG_WARN("result can not be object", K(ret)); + SET_COVER_ERROR(ctx, ret); + } else if (lib::is_oracle_mode() + && OB_FAIL(ObExprJsonValue::deal_item_method_in_seek(in, is_null, col_node.expr_param_.json_path_, + &ctx->row_alloc_, is_type_mismatch))) { + SET_COVER_ERROR(ctx, ret); + LOG_WARN("fail to check res valid" , K(ret)); + } else if (lib::is_oracle_mode() + && in->json_type() == ObJsonNodeType::J_BOOLEAN + && ob_is_number_tc(col_node.col_info_.data_type_.get_obj_type())) { + col_node.curr_ = nullptr; + is_null = true; + ret = OB_ERR_BOOL_CAST_NUMBER; + LOG_WARN("boolean cast number cast not support"); + SET_COVER_ERROR(ctx, ret); + } else if ((in->json_type() == ObJsonNodeType::J_INT + || in->json_type() == ObJsonNodeType::J_INT) + && (ob_is_datetime_tc(col_node.col_info_.data_type_.get_obj_type()))) { + char* res_ptr = ctx->buf; + int len = snprintf(ctx->buf, sizeof(ctx->buf), "%ld", in->get_int()); + if (len > 0) { + ObJsonString* j_string = nullptr; + if (OB_ISNULL(j_string = static_cast(ctx->row_alloc_.alloc(sizeof(ObJsonString))))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + RESET_COVER_CODE(ctx); + LOG_WARN("fail to allocate json string node", K(ret)); + } else { + col_node.curr_ = new(j_string) ObJsonString(ctx->buf, len); + } + } else { + ret = OB_ERR_UNEXPECTED; + RESET_COVER_CODE(ctx); + LOG_WARN("fail to print int value", K(ret)); + } + } + return ret; +} +int RegularCol::eval_exist_col(ObRegCol &col_node, JtScanCtx* ctx, ObExpr* col_expr, bool& is_null) +{ + INIT_SUCC(ret); + is_null = true; + if (ob_is_string_type(col_node.col_info_.data_type_.get_obj_type())) { + ObString value("true"); + if (lib::is_mysql_mode()) { + value.assign_ptr("1", 1); + } + void* buf = ctx->row_alloc_.alloc(sizeof(ObJsonString)); + if (OB_ISNULL(buf)) { + ret = OB_ALLOCATE_MEMORY_FAILED; + } else { + col_node.curr_ = static_cast(new(buf)ObJsonString(value.ptr(), value.length())); + is_null = false; + } + } else { + void* buf = ctx->row_alloc_.alloc(sizeof(ObJsonInt)); + if (OB_ISNULL(buf)) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("buf allocate failed", K(ret)); + } else { + col_node.curr_ = static_cast(new(buf)ObJsonInt(1)); + is_null = false; + } + } + return ret; +} + +int RegularCol::eval_xml_scalar_col(ObRegCol &col_node, JtScanCtx* ctx, ObExpr* col_expr) +{ + INIT_SUCC(ret); + ObXmlBin *hit = static_cast(col_node.curr_); + if (hit->size() > 1 && hit->type() != M_DOCUMENT) { + ret = OB_ERR_XQUERY_MULTI_VALUE; + SET_COVER_ERROR(ctx, ret); + } + return ret; +} + +int RegularCol::eval_xml_type_col(ObRegCol &col_node, JtScanCtx* ctx, ObExpr* col_expr) +{ + INIT_SUCC(ret); + return ret; +} + +// xmltable expr function +int XmlTableFunc::container_at(void* in, void *&out, int32_t pos) +{ + INIT_SUCC(ret); + ObXmlBin *t_in = static_cast(in); + if (pos < 0 || pos >= t_in->size()) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("pos out of range", K(ret), K(pos)); + } else { + out = t_in->at(pos); + } + return ret; +} + +int XmlTableFunc::eval_input(ObJsonTableOp &jt, JtScanCtx &ctx, ObEvalCtx &eval_ctx) +{ + INIT_SUCC(ret); + common::ObObjMeta& doc_obj_datum = ctx.spec_ptr_->value_expr_->obj_meta_; + ObDatumMeta& doc_datum = ctx.spec_ptr_->value_expr_->datum_meta_; + ObObjType doc_type = doc_datum.type_; + ObCollationType doc_cs_type = doc_datum.cs_type_; + ObString j_str; + bool is_null = false; + ObIMulModeBase *input_node = NULL; + + if (doc_type == ObNullType) { + ret = OB_ITER_END; + } else if (ctx.is_xml_table_func()) { + if (!doc_obj_datum.is_xml_sql_type() && !ob_is_string_type(doc_type)) { + ret = OB_ERR_INVALID_TYPE_FOR_OP; + LOG_WARN("inconsistent datatypes", K(ret), K(ob_obj_type_str(doc_type))); + } else { + jt.reset_columns(); + // get input_node + if (OB_FAIL(ObXMLExprHelper::get_xml_base_from_expr(ctx.spec_ptr_->value_expr_, ctx.mem_ctx_, eval_ctx, input_node))) { + LOG_WARN("get real data failed", K(ret)); + } else { + jt.input_ = input_node; + } + } + } else { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("invalid table func", K(ret)); + } + return ret; +} + +int XmlTableFunc::reset_ctx(ObRegCol &scan_node, JtScanCtx*& ctx) +{ + INIT_SUCC(ret); + bool need_eval = false; + if (scan_node.node_type() == REG_TYPE) { + bool is_datum_data = false; + if (scan_node.type() == COL_TYPE_XMLTYPE_XML) { + if (scan_node.col_info_.on_empty_ == JSN_VALUE_DEFAULT) { + need_eval = true; + } + } else if (scan_node.type() == COL_TYPE_VAL_EXTRACT_XML) { + if (scan_node.col_info_.on_empty_ == JSN_VALUE_DEFAULT) { + need_eval = true; + is_datum_data = true; + } + } + if (need_eval) { + ObExpr* default_expr = ctx->spec_ptr_->emp_default_exprs_.at(scan_node.col_info_.empty_expr_id_); + if (OB_FAIL(eval_default_value(ctx, default_expr, scan_node.emp_val_, is_datum_data))) { + LOG_WARN("fail to eval default value", K(ret)); + } else { + scan_node.is_emp_evaled_ = true; + scan_node.res_flag_ = ResultType::NOT_DATUM; + ObExpr* col_expr = ctx->spec_ptr_->column_exprs_.at(scan_node.col_info_.output_column_idx_); + } + } + } + return ret; +} + +int XmlTableFunc::init_ctx(ObRegCol &scan_node, JtScanCtx*& ctx) +{ + INIT_SUCC(ret); + bool need_eval = false; // flag of eval default value + // init path + scan_node.tab_type_ = MulModeTableType::OB_ORA_XML_TABLE_TYPE; + if (!scan_node.is_path_evaled_ && OB_ISNULL(scan_node.path_) + && (scan_node.node_type() == REG_TYPE || scan_node.node_type() == SCAN_TYPE) + && !scan_node.col_info_.path_.empty()) { + ObPathExprIter *t_iter = NULL; + ObIMulModeBase *doc = nullptr; + scan_node.path_ = NULL; + void* path_buf = ctx->op_exec_alloc_->alloc(sizeof(ObPathExprIter)); + if (OB_ISNULL(doc)) { + if (OB_ISNULL(doc = OB_NEWx(ObXmlDocument, ctx->mem_ctx_->allocator_, ObMulModeNodeType::M_CONTENT, ctx->mem_ctx_))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("fail to create document", K(ret)); + } + } + if (OB_FAIL(ret)) { + } else if (OB_FAIL(ObXMLExprHelper::construct_namespace_params(ctx->spec_ptr_->namespace_def_, + ctx->default_ns, + ctx->context, *ctx->op_exec_alloc_))) { + LOG_WARN("fail to get namespace", K(ret)); + } else if (OB_ISNULL(path_buf)) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("fail to allocate json path buffer", K(ret)); + } else { + ObPathVarObject *t_ns = static_cast(ctx->context); + t_iter = new (path_buf) ObPathExprIter(ctx->op_exec_alloc_, ctx->mem_ctx_->allocator_); + if (OB_FAIL(t_iter->init(ctx->xpath_ctx_, scan_node.col_info_.path_, ctx->default_ns, doc, t_ns))) { + LOG_WARN("fail to init xpath iterator", K(scan_node.col_info_.path_), K(ctx->default_ns), K(ret)); + } else if (OB_FAIL(t_iter->open())) { + ret = OB_ERR_PARSE_XQUERY_EXPR; + LOG_USER_ERROR(OB_ERR_PARSE_XQUERY_EXPR, t_iter->get_path_str().length(), t_iter->get_path_str().ptr()); + LOG_WARN("fail to open xpath iterator", K(ret)); + // ObXMLExprHelper::replace_xpath_ret_code(ret); + } else if (OB_FAIL(ObXMLExprHelper::check_xpath_valid(*t_iter, scan_node.node_type() == SCAN_TYPE))) { + LOG_WARN("check xpath valid failed", K(ret)); + } else { + scan_node.path_ = t_iter; + scan_node.is_path_evaled_ = true; + } + } + } + // default value init + if (OB_FAIL(ret)) { + } else if (scan_node.node_type() == REG_TYPE) { + if (!scan_node.is_emp_evaled_) { + need_eval = false; + bool is_datum_data = false; + if (scan_node.type() == COL_TYPE_XMLTYPE_XML) { + if (scan_node.col_info_.on_empty_ == JSN_VALUE_DEFAULT) { + need_eval = true; + } + } else if (scan_node.type() == COL_TYPE_VAL_EXTRACT_XML) { + if (scan_node.col_info_.on_empty_ == JSN_VALUE_DEFAULT) { + need_eval = true; + is_datum_data = true; + } + } + if (need_eval) { + ObExpr* default_expr = ctx->spec_ptr_->emp_default_exprs_.at(scan_node.col_info_.empty_expr_id_); + if (OB_FAIL(eval_default_value(ctx, default_expr, scan_node.emp_val_, is_datum_data))) { + LOG_WARN("fail to eval default value", K(ret)); + } else { + scan_node.is_emp_evaled_ = true; + scan_node.res_flag_ = ResultType::NOT_DATUM; + ObExpr* col_expr = ctx->spec_ptr_->column_exprs_.at(scan_node.col_info_.output_column_idx_); + if (OB_FAIL(check_default_value(ctx, scan_node, col_expr))) { + LOG_WARN("check default value failed", K(ret)); + } + } + } + } + } + return ret; +} + +int XmlTableFunc::eval_default_value(JtScanCtx*& ctx, ObExpr*& default_expr, void*& res, bool need_datum) +{ + INIT_SUCC(ret); + ObDatum* emp_datum = nullptr; + if (OB_FAIL(default_expr->eval(*ctx->eval_ctx_, emp_datum))) { + LOG_WARN("failed do cast to returning type.", K(ret)); + } else if (!ob_is_xml_sql_type(default_expr->datum_meta_.type_, default_expr->obj_meta_.get_subschema_id())) { + res = emp_datum; + } else { + ObIMulModeBase* xml_base = NULL; + if (OB_FAIL(ObXMLExprHelper::get_xml_base(ctx->xpath_ctx_, emp_datum, *ctx->eval_ctx_, xml_base, ObGetXmlBaseType::OB_SHOULD_CHECK))) { + LOG_WARN("failed do cast to returning type.", K(ret)); + } else if (OB_NOT_NULL(xml_base)) { + if (need_datum) { + res = emp_datum; + } else { + res = xml_base; + } + } else { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get null value", K(ret)); + } + } + return ret; +} + +// init_flag +int XmlTableFunc::reset_path_iter(ObRegCol &scan_node, void* in, JtScanCtx*& ctx, ScanType init_flag, bool &is_null_value) +{ + INIT_SUCC(ret); + ObPathExprIter *t_iter = NULL; + ObIMulModeBase *doc = static_cast(in); + if (init_flag == COL_NODE_TYPE) { + if (OB_ISNULL(doc)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("doc can not be null", K(ret)); + } else if (scan_node.col_info_.path_.length() == 0) { + ret = OB_ERR_PARSE_XQUERY_EXPR; + LOG_WARN("path can not be null", K(ret)); + } else if (scan_node.col_info_.path_[0] != '/' && doc->size() == 1) { + char* extend_start = nullptr; + int64_t extend_len = 0; + ObXmlBin* bin_doc = nullptr; + if (doc->check_extend()) { + bin_doc = static_cast(doc); + if (OB_FAIL(bin_doc->get_extend(extend_start, extend_len))) { + LOG_WARN("fail to get extend", K(ret)); + } + } + if (OB_SUCC(ret)) { + doc = doc->at(0); + if (OB_NOT_NULL(doc) && OB_NOT_NULL(bin_doc = static_cast(doc)) + && OB_NOT_NULL(extend_start) && extend_len > 0 + && OB_FAIL(bin_doc->append_extend(extend_start, extend_len))) { + LOG_WARN("fail to append extend", K(ret)); + } + } + } + } + + if (OB_FAIL(ret)) { + } else if (OB_ISNULL(doc)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("doc can not be null", K(ret)); + } else if (scan_node.is_path_evaled_ && OB_NOT_NULL(scan_node.path_)) { + t_iter = static_cast(scan_node.path_); + if (OB_FAIL(t_iter->reset(doc, ctx->mem_ctx_->allocator_))) { + LOG_WARN("fail to reset t_iter", K(ret)); + } + } else { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("path_ should not be null", K(ret)); + } + // scan node get first node + if (OB_SUCC(ret) && scan_node.node_type() == SCAN_TYPE) { + bool is_null_res = false; + if (OB_FAIL(get_iter_value(scan_node, ctx, is_null_res))) { + if (ret != OB_ITER_END) { + LOG_WARN("fail to get first node", K(ret)); + } + } else { + is_null_value = is_null_res; + } + } + return ret; +} + +int XmlTableFunc::get_iter_value(ObRegCol &col_node, JtScanCtx* ctx, bool &is_null_value) +{ + INIT_SUCC(ret); + is_null_value = false; + ObPathExprIter *xpath_iter = static_cast(col_node.path_); + ObIMulModeBase *node = NULL; + if (OB_ISNULL(xpath_iter)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("xpath iter can not be null", K(ret)); + } else if (OB_FAIL(xpath_iter->get_next_node(node))) { + if (ret != OB_ITER_END) { + ret = OB_ERR_PARSE_XQUERY_EXPR; + LOG_USER_ERROR(OB_ERR_PARSE_XQUERY_EXPR, xpath_iter->get_path_str().length(), xpath_iter->get_path_str().ptr()); + LOG_WARN("fail to get next xml node", K(ret)); + } + } else if (OB_ISNULL(node)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("xpath result node is null", K(ret)); + } else if (node->type() == ObMulModeNodeType::M_DOCUMENT + || node->type() == ObMulModeNodeType::M_CONTENT) { + col_node.iter_ = node; + col_node.curr_ = node; + col_node.cur_pos_ ++; + } else if (node->is_tree() && OB_FAIL(ObMulModeFactory::transform(ctx->mem_ctx_, node, BINARY_TYPE, node))) { + LOG_WARN("fail to transform to tree", K(ret)); + } else { + ObBinAggSerializer bin_agg(ctx->mem_ctx_->allocator_, ObBinAggType::AGG_XML, static_cast(M_CONTENT)); + + ObXmlBin *bin = nullptr; + char* extend_start = nullptr; + int64_t extend_len = 0; + if (OB_ISNULL(bin = static_cast(node))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get bin failed", K(ret)); + } else if (bin->check_extend()) { + // must be one ans, append extend after final result + if (OB_FAIL(bin->get_extend(extend_start, extend_len))) { + LOG_WARN("fail to get extend", K(ret)); + } else if (OB_FAIL(bin->remove_extend())) { + LOG_WARN("fail to remove extend", K(ret)); + } + } + if (OB_FAIL(ret)) { + } else if (OB_FAIL(bin_agg.append_key_and_value(bin))) { + LOG_WARN("fail to add node in doc", K(ret)); + } else { + /* seek result can not be next input xml,should add doc node in front */ + ObMulModeNodeType type = bin->type(); + ObMulModeNodeType node_type = M_CONTENT; + if (type == ObMulModeNodeType::M_ELEMENT) { + node_type = M_DOCUMENT; + } + bin_agg.set_header_type(node_type); + if (OB_FAIL(bin_agg.serialize())) { + LOG_WARN("failed to serialize binary.", K(ret)); + } else if (OB_FAIL(ObMulModeFactory::get_xml_base(ctx->mem_ctx_, bin_agg.get_buffer()->string(), + ObNodeMemType::BINARY_TYPE, + ObNodeMemType::BINARY_TYPE, + node))) { + LOG_WARN("fail to transform to tree", K(ret)); + } else if (OB_NOT_NULL(node) && OB_NOT_NULL(bin = static_cast(node)) && OB_NOT_NULL(extend_start) + && OB_FAIL(bin->append_extend(extend_start, extend_len))) { + LOG_WARN("fail to append extend", K(ret), K(node)); + } else { + col_node.iter_ = node; + col_node.curr_ = node; + col_node.cur_pos_ ++; + } + } + } + return ret; +} + +int XmlTableFunc::eval_seek_col(ObRegCol &col_node, void* in, JtScanCtx* ctx, bool &is_null_value, bool &need_cast_res) +{ + INIT_SUCC(ret); + ObIMulModeBase *xml_res = NULL; + ObPathExprIter *xml_iter = static_cast(col_node.path_); + if (OB_ISNULL(xml_iter)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("input containter or xpath can not be null", K(ret)); + } else if (OB_ISNULL(in)) { + is_null_value = true; + col_node.curr_ = nullptr; + } else if (OB_FAIL(ObXMLExprHelper::get_xpath_result(*xml_iter, xml_res, ctx->mem_ctx_, col_node.type() == COL_TYPE_XMLTYPE_XML))) { + LOG_WARN("xml seek failed", K(col_node.col_info_.path_), K(ret)); + SET_COVER_ERROR(ctx, ret); + } else if (OB_ISNULL(xml_res) || xml_res->size() == 0) { + is_null_value = true; + col_node.curr_ = nullptr; + } else { + is_null_value = false; + col_node.curr_ = xml_res; + } + return ret; +} + +int XmlTableFunc::col_res_type_check(ObRegCol &col_node, JtScanCtx* ctx) +{ + INIT_SUCC(ret); + ObObjType obj_type = col_node.col_info_.data_type_.get_obj_type(); + JtColType col_type = col_node.type(); + if (col_type == COL_TYPE_XMLTYPE_XML) { + } + return ret; +} + +// default value cast type check +bool RegularCol::check_cast_allowed(const ObObjType orig_type, + const ObCollationType orig_cs_type, + const ObObjType expect_type, + const ObCollationType expect_cs_type, + const bool is_explicit_cast) +{ + UNUSED(expect_cs_type); + bool res = true; + ObObjTypeClass ori_tc = ob_obj_type_class(orig_type); + ObObjTypeClass expect_tc = ob_obj_type_class(expect_type); + bool is_expect_lob_tc = (ObLobTC == expect_tc || ObTextTC == expect_tc); + bool is_ori_lob_tc = (ObLobTC == ori_tc || ObTextTC == ori_tc); + if (is_oracle_mode()) { + if (is_explicit_cast) { + // can't cast lob to other type except char/varchar/nchar/nvarchar2/raw. clob to raw not allowed too. + if (is_ori_lob_tc) { + if (expect_tc == ObJsonTC) { + /* oracle mode, json text use lob store */ + } else if (ObStringTC == expect_tc) { + // do nothing + } else if (ObRawTC == expect_tc) { + res = CS_TYPE_BINARY == orig_cs_type; + } else { + res = false; + } + } + // any type to lob type not allowed. + if (is_expect_lob_tc) { + res = false; + } + } else { + // BINARY FLOAT/DOUBLE not allow cast lob whether explicit + if (is_ori_lob_tc) { + if (expect_tc == ObFloatTC || expect_tc == ObDoubleTC) { + res = false; + } + } + } + } + return res; +} + +int XmlTableFunc::check_default_value(JtScanCtx* ctx, ObRegCol &col_node, ObExpr* expr) +{ + INIT_SUCC(ret); + ObString in_str; + + if (col_node.col_info_.on_empty_ == JSN_VALUE_DEFAULT) { + ObExpr* default_expr = ctx->spec_ptr_->emp_default_exprs_.at(col_node.col_info_.empty_expr_id_); + + if (static_cast(col_node.col_info_.col_type_) == COL_TYPE_XMLTYPE_XML) { + // 检查默认值类型 + if (!default_expr->obj_meta_.is_xml_sql_type()) { + ret = OB_ERR_INVALID_XML_DATATYPE; + LOG_USER_ERROR(OB_ERR_INVALID_XML_DATATYPE, "XMLTYPE", ob_obj_type_str(default_expr->datum_meta_.type_)); + } + } else if (static_cast(col_node.col_info_.col_type_) == COL_TYPE_VAL_EXTRACT_XML) { + if (OB_ISNULL(default_expr) || OB_ISNULL(expr)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("dat can not be null", K(ret), KP(default_expr), KP(expr)); + } + if (OB_FAIL(ret)) { + } else if (!RegularCol::check_cast_allowed(default_expr->datum_meta_.type_, + default_expr->datum_meta_.cs_type_, + expr->datum_meta_.type_, + expr->datum_meta_.cs_type_, + true)) { + ret = OB_ERR_INVALID_TYPE_FOR_OP; + LOG_WARN("explicit cast to lob type not allowed", K(ret), K(expr->datum_meta_.type_)); + } + } + } + return ret; +} + +int XmlTableFunc::set_on_empty(ObRegCol& col_node, JtScanCtx* ctx, bool &need_cast, bool& is_null) +{ + INIT_SUCC(ret); + JtColType col_type = col_node.type(); + if (col_type == COL_TYPE_XMLTYPE_XML) { + switch (col_node.col_info_.on_empty_) { + case JSN_VALUE_IMPLICIT: + case JSN_VALUE_NULL: { + col_node.curr_ = nullptr; + is_null = true; + ret = OB_SUCCESS; + break; + } + case JSN_VALUE_DEFAULT: { + if (col_node.is_emp_evaled_ && OB_NOT_NULL(col_node.emp_val_)) { + col_node.curr_ = col_node.emp_val_; + col_node.res_flag_ = ResultType::EMPTY_DATUM; + } else { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get null value", K(ret)); + } + + if (OB_SUCC(ret)) { + if (OB_FAIL(ctx->table_func_->cast_to_result(col_node, ctx, false))) { + LOG_WARN("fail to cast to res", K(ret)); + } else { + need_cast = false; + is_null = false; + } + } + break; + } + default: // error_type from get_on_empty_or_error has done range check, do nothing for default + break; + } + } else if (col_type == COL_TYPE_VAL_EXTRACT_XML) { + switch (col_node.col_info_.on_empty_) { + case JSN_VALUE_IMPLICIT: + case JSN_VALUE_NULL: { + col_node.curr_ = nullptr; + is_null = true; + ret = OB_SUCCESS; + break; + } + case JSN_VALUE_DEFAULT: { + is_null = false; + if (col_node.is_emp_evaled_ && OB_NOT_NULL(col_node.emp_val_)) { + col_node.curr_ = col_node.emp_val_; + col_node.res_flag_ = ResultType::EMPTY_DATUM; + } else { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get null value", K(ret)); + } + + if (OB_SUCC(ret)) { + if (OB_FAIL(ctx->table_func_->cast_to_result(col_node, ctx, true))) { + LOG_WARN("fail to cast to res", K(ret)); + } else { + need_cast = false; + } + } + break; + } + default: // error_type from get_on_empty_or_error has done range check, do nothing for default + break; + } + } + return ret; +} + +int XmlTableFunc::set_on_error(ObRegCol& col_node, JtScanCtx* ctx, int& ret) +{ + INIT_SUCC(tmp_ret); + if (ret == OB_SUCCESS) { + } else { + const ObJtColInfo& info = col_node.col_info_; + if (info.on_error_ == JSN_VALUE_ERROR || info.on_error_ == JSN_VALUE_IMPLICIT) { + EVAL_COVER_CODE(ctx, ret) ; + if (OB_SUCC(ret) && ctx->is_need_end_) { + ret = OB_ITER_END; + } + } + } + return tmp_ret; +} + +int XmlTableFunc::cast_to_result(ObRegCol& col_node, JtScanCtx* ctx, bool enable_error, bool is_pack_result) +{ + INIT_SUCC(ret); + UNUSED(enable_error); + UNUSED(is_pack_result); + JtColType col_type = col_node.type(); + + ObJtColInfo& col_info = col_node.get_column_def(); + bool is_truncate = static_cast(col_info.truncate_); + + ObExpr* expr = ctx->spec_ptr_->column_exprs_.at(col_info.output_column_idx_); + ObDatum& res = expr->locate_datum_for_write(*ctx->eval_ctx_); + ctx->res_obj_ = &res; + ObXmlBin *doc = static_cast(col_node.curr_); + + ObObjType dst_type = expr->datum_meta_.type_; + ObCollationType coll_type = expr->datum_meta_.cs_type_; + ObAccuracy accuracy = col_info.data_type_.get_accuracy(); + ObCollationType dst_coll_type = col_info.data_type_.get_collation_type(); + ObCollationType in_coll_type = ctx->is_charset_converted_ + ? CS_TYPE_UTF8MB4_BIN + : ctx->spec_ptr_->value_expr_->datum_meta_.cs_type_; + ObCollationLevel dst_coll_level = col_info.data_type_.get_collation_level(); + ObString xml_str; + + // 这里是不是不应该根据列定义划分,而是根据类型划分,,xml列就使用extract的逻辑,其他列就调用cast逻辑。 + switch(col_type) { + case COL_TYPE_XMLTYPE_XML : { + ObString blob_locator; + if (OB_FAIL(doc->get_raw_binary(xml_str, &ctx->row_alloc_))) { + LOG_WARN("failed to get bin", K(ret)); + } else if (OB_FAIL(ObXMLExprHelper::pack_binary_res(*expr, *ctx->eval_ctx_, xml_str, blob_locator))) { + LOG_WARN("pack binary res failed", K(ret)); + } else { + res.set_string(blob_locator.ptr(), blob_locator.length()); + } + break; + } + case COL_TYPE_VAL_EXTRACT_XML : { + if (col_node.res_flag_ == ResultType::NOT_DATUM) { // xmltype to unxmltype + if (OB_FAIL(ObXMLExprHelper::extract_xml_text_node(ctx->mem_ctx_, doc, xml_str))) { + LOG_WARN("fail to extract xml text node", K(ret), K(xml_str)); + } else if (OB_FAIL(ObXMLExprHelper::cast_to_res(ctx->row_alloc_, xml_str, *expr, *ctx->eval_ctx_, res))) { + LOG_WARN("fail to cast to res", K(ret), K(xml_str)); + } + } else { // use datum cast non xmltype, current only use for default value + ObExpr* default_expr = ctx->spec_ptr_->emp_default_exprs_.at(col_node.col_info_.empty_expr_id_); + ObDatum *src = static_cast(col_node.curr_); + col_node.res_flag_ = NOT_DATUM; // reset flag; + ObObj src_obj; + bool need_check_acc = false; + if (ob_is_xml_sql_type(default_expr->datum_meta_.type_, default_expr->obj_meta_.get_subschema_id())) { + need_check_acc = true; + } + if (src->is_null()) { + res.set_null(); + } else if (OB_FAIL(src->to_obj(src_obj, default_expr->obj_meta_, default_expr->obj_datum_map_))) { + LOG_WARN("fail cast datum to obj", K(ret)); + } else if (OB_FAIL(ObXMLExprHelper::cast_to_res(ctx->row_alloc_, src_obj, *expr, *ctx->eval_ctx_, res, need_check_acc))) { + LOG_WARN("fail to cast to res", K(ret), K(xml_str)); + } + } + + break; + } + default : { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("invalid column type", K(ret), K(col_type)); + break; + } + } + return ret; +} + +// scan node function implement +int ScanNode::assign(const ScanNode& other) +{ + INIT_SUCC(ret); + + if (OB_FAIL(reg_col_defs_.assign(other.reg_col_defs_))) { + LOG_WARN("fail to assign col defs.", K(ret), K(other.reg_col_defs_.count())); + } else if (OB_FAIL(child_idx_.assign(other.child_idx_))) { + LOG_WARN("fail to assign child idx defs.", K(ret), K(other.child_idx_.count())); + } else { + seek_node_ = other.seek_node_; + } + return ret; +} + +int JoinNode::assign(const JoinNode& other) +{ + INIT_SUCC(ret); + right_ = other.right_; + left_ = other.left_; + return ret; +} + +int ScanNode::add_reg_column_node(ObRegCol* node, bool add_idx) +{ + INIT_SUCC(ret); + if (OB_FAIL(reg_col_defs_.push_back(node))) { + LOG_WARN("fail to store node ptr", K(ret), K(reg_col_defs_.count())); + } + return ret; +} + +// basic function +int ObRegCol::open(JtScanCtx* ctx) +{ + INIT_SUCC(ret); + if (OB_FAIL(ctx->table_func_->init_ctx(*this, ctx))) { + LOG_WARN("fail to init variable" , K(ret)); + } else { + ord_val_ = -1; + cur_pos_ = -1; + } + + return ret; +} + +int ObRegCol::reset(JtScanCtx* ctx) +{ + INIT_SUCC(ret); + if (OB_FAIL(ctx->table_func_->reset_ctx(*this, ctx))) { + LOG_WARN("fail to init variable" , K(ret)); + } else { + ord_val_ = -1; + cur_pos_ = -1; + } + return ret; +} + +int UnionNode::open(JtScanCtx* ctx) +{ + INIT_SUCC(ret); + if (OB_FAIL(ObMultiModeTableNode::open(ctx))) { + LOG_WARN("fail to open column node.", K(ret)); + } else if (left_ && OB_FAIL(left_->open(ctx))) { + LOG_WARN("fail to open left node.", K(ret)); + } else if (right_ && OB_FAIL(right_->open(ctx))) { + LOG_WARN("fail to open right node.", K(ret)); + } + return ret; +} + +int UnionNode::reset(JtScanCtx* ctx) +{ + INIT_SUCC(ret); + if (OB_FAIL(ObMultiModeTableNode::reset(ctx))) { + LOG_WARN("fail to reset base node", K(ret)); + } else if (left_ && OB_FAIL(left_->reset(ctx))) { + LOG_WARN("fail to reset left child", K(ret)); + } else if (right_ && OB_FAIL(right_->reset(ctx))) { + LOG_WARN("fail to reset right child", K(ret)); + } else { + is_left_iter_end_ = false; + is_right_iter_end_ = true; + } + return ret; +} + +int ScanNode::open(JtScanCtx* ctx) +{ + INIT_SUCC(ret); + if (OB_FAIL(ObMultiModeTableNode::open(ctx))) { + LOG_WARN("fail to open column node.", K(ret)); + } else if (OB_FAIL(seek_node_.open(ctx))) { + LOG_WARN("fail to open seek node.", K(ret)); + } else { + for (int i = 0; OB_SUCC(ret) && i < reg_col_defs_.count(); ++i) { + ObRegCol* node = reg_col_defs_.at(i); + if (OB_FAIL(node->open(ctx))) { + LOG_WARN("fail to open reg node.", K(ret)); + } + } + } + return ret; +} + +int ScanNode::reset(JtScanCtx* ctx) +{ + INIT_SUCC(ret); + if (OB_FAIL(ObMultiModeTableNode::reset(ctx))) { + LOG_WARN("fail to reset base node", K(ret)); + } else if (OB_FAIL(seek_node_.reset(ctx))) { + LOG_WARN("fail to reset seek node", K(ret)); + } + for (int i = 0; OB_SUCC(ret) && i < reg_col_defs_.count(); ++i) { + ObRegCol* node = reg_col_defs_.at(i); + ret = node->reset(ctx); + } + return ret; +} + +void ObMultiModeTableNode::destroy() +{ + // do nothing +} + +void ObRegCol::destroy() +{ + // path destory + if (is_path_evaled_ && OB_NOT_NULL(path_)) { + is_path_evaled_ = false; + if (tab_type_ == OB_ORA_XML_TABLE_TYPE) { + ObPathExprIter* tmp_iter = static_cast(path_); + tmp_iter->~ObPathExprIter(); + } else if (tab_type_ == OB_ORA_JSON_TABLE_TYPE) { // do nothing current + ObJsonPath* json_path = static_cast(path_); + json_path->~ObJsonPath(); + } + } +} + +void UnionNode::destroy() +{ + if (OB_NOT_NULL(left_)) { + left_->destroy(); + } + + if (OB_NOT_NULL(right_)) { + right_->destroy(); + } +} + +void ScanNode::destroy() +{ + seek_node_.destroy(); + for (size_t i = 0; i < reg_col_defs_.count(); ++i) { + reg_col_defs_.at(i)->destroy(); + } + + reg_col_defs_.reset(); + child_idx_.reset(); +} + +// common logical : iter +int ObRegCol::eval_regular_col(void *in, JtScanCtx* ctx, bool& is_null_value) +{ + INIT_SUCC(ret); + JtColType col_type = type(); + is_null_value = false; + bool is_null_res = false; + ObExpr* col_expr = ctx->spec_ptr_->column_exprs_.at(col_info_.output_column_idx_); + ctx->res_obj_ = &col_expr->locate_datum_for_write(*ctx->eval_ctx_); + bool need_cast_res = true; + bool enable_error = true; + + if (lib::is_mysql_mode() && OB_ISNULL(in)) { + is_null_value = true; + need_cast_res = false; + curr_ = nullptr; + col_expr->locate_datum_for_write(*ctx->eval_ctx_).set_null(); + } else if (col_type == COL_TYPE_ORDINALITY + || col_type == COL_TYPE_ORDINALITY_XML) { + if (OB_ISNULL(in)) { + col_expr->locate_datum_for_write(*ctx->eval_ctx_).set_null(); + } else { + col_expr->locate_datum_for_write(*ctx->eval_ctx_).set_int(ctx->ord_val_); + } + col_expr->get_eval_info(*ctx->eval_ctx_).evaluated_ = true; + } else { + if (OB_FAIL(ctx->table_func_->col_res_type_check(*this, ctx))) { + LOG_WARN("check column res type failed", K(ret), K(col_info_.data_type_), K(col_info_.col_type_)); + } else if (OB_ISNULL(in)) { + is_null_value = true; + curr_ = nullptr; + EVAL_COVER_CODE(ctx, ret); + } else if (OB_FAIL(ctx->table_func_->reset_path_iter(*this, in, ctx, ScanType::COL_NODE_TYPE, is_null_res))) { + RESET_COVER_CODE(ctx); + LOG_WARN("fail to init func path", K(ret)); + } else if (is_null_res) { + is_null_value = true; + } else if (OB_FAIL(ctx->table_func_->eval_seek_col(*this, in, ctx, is_null_res, need_cast_res))) { + SET_COVER_ERROR(ctx, ret); + LOG_WARN("json seek failed", K(col_info_.path_), K(ret)); + } else if (curr_ == nullptr || is_null_res) { + is_null_value = true; + } else { + is_null_value = false; + if (col_type == COL_TYPE_QUERY) { + if (OB_FAIL(RegularCol::eval_query_col(*this, ctx, col_expr, is_null_value))) { + LOG_WARN("fail to eval json query value", K(ret), K(col_type)); + } + } else if (col_type == COL_TYPE_VALUE) { + if (OB_FAIL(RegularCol::eval_value_col(*this, ctx, col_expr, is_null_value))) { + LOG_WARN("fail to eval json value value", K(ret), K(col_type)); + } + } else if (col_type == COL_TYPE_EXISTS) { + if (OB_FAIL(RegularCol::eval_exist_col(*this, ctx, col_expr, is_null_value))) { + LOG_WARN("fail to eval json exist value", K(ret), K(col_type)); + } + } else if (col_type == COL_TYPE_XMLTYPE_XML) { + if (OB_FAIL(RegularCol::eval_xml_type_col(*this, ctx, col_expr))) { + LOG_WARN("fail to eval xml type value", K(ret), K(col_type)); + } + } else if (col_type == COL_TYPE_VAL_EXTRACT_XML) { + if (OB_FAIL(RegularCol::eval_xml_scalar_col(*this, ctx, col_expr))) { + LOG_WARN("fail to eval xml scalar value", K(ret), K(col_type)); + } + } + } + } + + if (OB_FAIL(ret)) { // deal empty value + } else if (col_type == COL_TYPE_EXISTS + || col_type == COL_TYPE_QUERY + || col_type == COL_TYPE_VALUE + || col_type == COL_TYPE_XMLTYPE_XML + || col_type == COL_TYPE_VAL_EXTRACT_XML) { + if (is_null_value) { + if (OB_FAIL(ctx->table_func_->set_on_empty(*this, ctx, need_cast_res, is_null_value))) { + LOG_WARN("fail to process on empty", K(ret)); + } + } else if (ctx->is_json_table_func() + && curr_ && NOT_DATUM == res_flag_ + && static_cast(curr_)->json_type() == ObJsonNodeType::J_NULL + && (!static_cast(curr_)->is_real_json_null(static_cast(curr_)) + || lib::is_mysql_mode())) { + curr_ = nullptr; + } + } + // deal error value + if (OB_FAIL(ret)) { + if (ctx->is_cover_error_) { + enable_error = false; + int tmp_ret = ctx->table_func_->set_on_error(*this, ctx, ret); + if (tmp_ret != OB_SUCCESS) { + LOG_WARN("failed to set error val.", K(tmp_ret)); + } + } + } + // cast_to_res + if (OB_FAIL(ret)) { + } else if (col_type == COL_TYPE_EXISTS + || col_type == COL_TYPE_QUERY + || col_type == COL_TYPE_VALUE + || col_type == COL_TYPE_XMLTYPE_XML + || col_type == COL_TYPE_VAL_EXTRACT_XML) { + if (OB_ISNULL(curr_)) { + is_null_value = true; + col_expr->locate_datum_for_write(*ctx->eval_ctx_).set_null(); + } else if (need_cast_res && OB_FAIL(ctx->table_func_->cast_to_result(*this, ctx, enable_error))) { + LOG_WARN("failed to do cast to res type", K(ret)); + } else if (OB_ISNULL(curr_)) { + is_null_value = true; + } + } + if (OB_SUCC(ret)) { + res_flag_ = NOT_DATUM; + col_expr->get_eval_info(*ctx->eval_ctx_).evaluated_ = true; + } + + return ret; +} + +int RegularCol::check_default_value_inner_oracle(JtScanCtx* ctx, + ObJtColInfo &col_info, + ObExpr* col_expr, + ObExpr* default_expr) +{ + INIT_SUCC(ret); + ObString in_str; + ObDatum *emp_datum = nullptr; + + if (OB_FAIL(default_expr->eval(*ctx->eval_ctx_, emp_datum))) { + LOG_WARN("failed do cast to returning type.", K(ret)); + } else { + in_str.assign_ptr(emp_datum->ptr_, emp_datum->len_); + } + if (OB_FAIL(ret)) { + } else if (default_expr->datum_meta_.type_ == ObNullType && ob_is_string_type(col_info.data_type_.get_obj_type())) { + ret = OB_ERR_DEFAULT_VALUE_NOT_LITERAL; + LOG_WARN("default value not match returing type", K(ret)); + } else if (OB_FAIL(ObJsonExprHelper::pre_default_value_check(col_expr->datum_meta_.type_, in_str, default_expr->datum_meta_.type_, col_info.data_type_.get_accuracy().get_length()))) { + LOG_WARN("default value pre check fail", K(ret), K(in_str)); + } else { + if (ob_obj_type_class(col_expr->datum_meta_.type_) == ob_obj_type_class(default_expr->datum_meta_.type_) + && OB_FAIL(ObExprJsonValue::check_default_val_accuracy(col_info.data_type_.get_accuracy(), default_expr->datum_meta_.type_, emp_datum))) { + LOG_WARN("fail to check accuracy", K(ret)); + } + } + return ret; +} + +int ScanNode::get_next_iter(void* in, JtScanCtx* ctx, bool& is_null_value) +{ + INIT_SUCC(ret); + is_null_value = false; + bool is_null_iter = false; + if (!is_evaled_ || in_ != in) { + in_ = in; + is_null_result_ = false; + if (OB_ISNULL(in_)) { + is_null_value = is_null_result_ = true; + seek_node_.curr_ = seek_node_.iter_ = nullptr; + seek_node_.cur_pos_ = 0; + seek_node_.total_ = 0; + } else if (OB_FAIL(ctx->table_func_->reset_path_iter(seek_node_, in_, ctx, ScanType::SCAN_NODE_TYPE, is_null_iter))) { // reset path & get first result + RESET_COVER_CODE(ctx); + LOG_WARN("fail to init path", K(ret), K(ctx->spec_ptr_->table_type_), K(ctx->table_func_)); + } else if (is_null_iter) { + is_null_value = is_null_result_ = true; + seek_node_.curr_ = seek_node_.iter_ = nullptr; + seek_node_.total_ = 1; + seek_node_.cur_pos_ = 0; + // 1. if root node seek result is NULL, but input(in) not null,then return end. + if (seek_node_.col_info_.parent_id_ == common::OB_INVALID_ID + || (ctx->jt_op_->get_root_param() == in // 2. if path == '$' && root scan node not have regular column, + && ctx->jt_op_->get_root_entry()->get_scan_node()->reg_column_count() == 0 + && ctx->jt_op_->get_root_entry()->get_scan_node() == this)) { + ret = OB_ITER_END; + } + } + if (OB_SUCC(ret)) { + is_evaled_= true; + seek_node_.cur_pos_ = 0; + } + } else { + is_null_iter = false; + if (OB_FAIL(ctx->table_func_->get_iter_value(seek_node_, ctx, is_null_iter))) { + if (ret == OB_ITER_END) { + seek_node_.curr_ = seek_node_.iter_ = nullptr; + } else { + LOG_WARN("fail to get seek value", K(ret)); + } + } else if (is_null_iter) { + is_null_value = is_null_result_ = true; + seek_node_.curr_ = seek_node_.iter_ = nullptr; + } + } + return ret; +} + +int ScanNode::get_next_row(void* in, JtScanCtx* ctx, bool& is_null_value) +{ + INIT_SUCC(ret); + bool is_empty_node = false; + is_null_value = false; + if (OB_FAIL(get_next_iter(in, ctx, is_empty_node))) { + LOG_WARN("fail to get current node", K(ret)); + } + is_null_value = is_empty_node; + // need reset column + reset_reg_columns(ctx); + + // eval regular column + if (OB_SUCC(ret)) { + uint32_t reg_count = reg_col_defs_.count(); + bool tmp_is_null = false; + for (uint32_t i = 0; OB_SUCC(ret) && i < reg_count && is_evaled_; ++i) { + ObRegCol* cur_node = reg_col_defs_.at(i); + if (cur_node->type() == COL_TYPE_ORDINALITY || cur_node->type() == COL_TYPE_ORDINALITY_XML) { + ctx->ord_val_ = seek_node_.cur_pos_ + 1; + } + if (OB_FAIL(cur_node->eval_regular_col(seek_node_.iter_, ctx, tmp_is_null))) { + LOG_WARN("fail to get regular column value", K(ret)); + } else { + is_null_value &= tmp_is_null; + } + } // eval scan node in join node + } + + return ret; +} + +int JoinNode::get_next_row(void* in, JtScanCtx* ctx, bool& is_null_value) +{ + INIT_SUCC(ret); + is_null_value = false; + + ObMultiModeTableNode* left_node = left(); + ObMultiModeTableNode* right_node = right(); + + bool is_left_null = false; + bool is_right_null = false; + if (!is_right_iter_end_) { // right node can expand more value, + } else if (OB_NOT_NULL(left_)) { + is_right_iter_end_ = false; + ret = left_node->get_next_row(in, ctx, is_left_null); + if (OB_FAIL(ret) && ret != OB_ITER_END) { + LOG_WARN("fail to get next row", K(ret)); + } else if (OB_SUCC(ret)) { + is_null_value = is_left_null; + } + } else { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("fail to get iter node", K(ret)); + } + + bool is_sub_result_null = false; // nested child result is null then + if (OB_SUCC(ret)) { + // child : join ->right child_ = union node + if (right_node) { + if (OB_FAIL(right_node->get_next_row(get_curr_iter_value(), ctx, is_sub_result_null))) { + if (OB_FAIL(ret) && ret != OB_ITER_END) { + LOG_WARN("fail to get column value", K(ret)); + } else if (ret == OB_ITER_END) { + is_right_iter_end_ = true; + } + } else if (OB_SUCC(ret)) { + is_null_value &= is_sub_result_null; + if (is_sub_result_null) { + is_right_iter_end_ = true; + } + } + } else { + is_right_iter_end_ = true; + } + } + if (OB_SUCC(ret) && OB_ISNULL(get_curr_iter_value()) + && ctx->jt_op_->get_root_entry() == this) { + ret = OB_ITER_END; + } else if (OB_FAIL(ret) && is_right_iter_end_) { // nested column evaled finis should get next iter + if (!is_evaled_) { + ret = OB_SUCCESS; // ignore only one null result + } else if (OB_FAIL(get_next_row(in, ctx, is_null_value))) { + if (ret != OB_ITER_END) { + LOG_WARN("fail to get value", K(ret)); + } + } + } else if (OB_FAIL(ret)) { // if return fail, need reset flag + is_right_iter_end_ = true; + } + is_evaled_ = true; + + return ret; +} + +int UnionNode::get_next_row(void* in, JtScanCtx* ctx, bool& is_null_value) +{ + INIT_SUCC(ret); + + ObMultiModeTableNode* left_node = left(); + ObMultiModeTableNode* right_node = right(); + is_null_value = false; + bool is_left_null = false; + bool is_right_null = false; + if (in != in_) { + is_left_iter_end_ = false; + } + if (OB_NOT_NULL(left_node) && !is_left_iter_end_) { + ret = left_node->get_next_row(in, ctx, is_left_null); + if (OB_FAIL(ret)) { + if (ret != OB_ITER_END) { + LOG_WARN("fail to get next row", K(ret)); + } else { + is_left_iter_end_ = true; + } + } else { + is_null_value = is_left_null; + if (is_left_null) { + is_left_iter_end_ = true; + } + } + } else { + ret = OB_ITER_END; + } + + if (OB_SUCC(ret)) { + if (is_left_null && OB_NOT_NULL(right_node)) { + ret = right_node->get_next_row(in, ctx, is_right_null); + if (OB_FAIL(ret) && ret != OB_ITER_END) { + LOG_WARN("fail to get next row", K(ret)); + } else if (OB_SUCC(ret)) { + if (!is_right_null) { + is_null_value = false; + } + } + } + } else if (OB_NOT_NULL(right_node) && (ret == OB_ITER_END)) { + ret = right_node->get_next_row(in, ctx, is_right_null); + if (OB_FAIL(ret) && ret != OB_ITER_END) { + LOG_WARN("fail to get next row", K(ret)); + } else if (OB_SUCC(ret) && is_right_null) { + if (in_ == in) { + ret = OB_ITER_END; + } + } else if (OB_SUCC(ret) && !is_right_null) { + is_null_value = false; + } + } + + in_ = in; + return ret; +} + +// json/xml table function inner int ObJsonTableOp::inner_get_next_row() { INIT_SUCC(ret); bool is_root_null = false; - if (OB_FAIL(init())) { - LOG_WARN("failed to init.", K(ret)); + if (!(jt_ctx_.is_xml_table_func() + || jt_ctx_.is_json_table_func())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unsupport table function", K(ret)); } else if (is_evaled_) { clear_evaluated_flag(); - reset_columns(); - if (OB_FAIL(jt_root_->get_next_row(in_, &jt_ctx_, is_root_null))) { + if (OB_FAIL(root_->get_next_row(input_, &jt_ctx_, is_root_null))) { if (ret != OB_ITER_END) { LOG_WARN("failed to open get next row.", K(ret)); } } } else { clear_evaluated_flag(); - common::ObObjMeta& doc_obj_datum = MY_SPEC.value_expr_->obj_meta_; - ObDatumMeta& doc_datum = MY_SPEC.value_expr_->datum_meta_; - ObObjType doc_type = doc_datum.type_; - ObCollationType doc_cs_type = doc_datum.cs_type_; - ObString j_str; - bool is_null = false; - - if (doc_type == ObNullType) { - ret = OB_ITER_END; - } else if (doc_type == ObNCharType || - !(doc_type == ObJsonType - || doc_type == ObRawType - || ob_is_string_type(doc_type))) { - ret = OB_ERR_INPUT_JSON_TABLE; - LOG_WARN("fail to get json base", K(ret), K(doc_type)); + if (OB_FAIL(jt_ctx_.table_func_->eval_input(*this, jt_ctx_, *jt_ctx_.eval_ctx_))) { // get input value + if (ret != OB_ITER_END) { + LOG_WARN("fail to get input val", K(ret)); + } + } else if (OB_FAIL(root_->get_next_row(input_, &jt_ctx_, is_root_null))) { + if (ret != OB_ITER_END) { + LOG_WARN("fail to get next row", K(ret)); + } } else { - reset_columns(); - if (OB_FAIL(ObJsonExprHelper::get_json_or_str_data(MY_SPEC.value_expr_,eval_ctx_, - jt_ctx_.row_alloc_, j_str, is_null))) { - LOG_WARN("get real data failed", K(ret)); - } else if (is_null) { - ret = OB_ITER_END; - } else if ((ob_is_string_type(doc_type) || doc_type == ObLobType) - && (doc_cs_type != CS_TYPE_BINARY) - && (ObCharset::charset_type_by_coll(doc_cs_type) != CHARSET_UTF8MB4)) { - // need convert to utf8 first, we are using GenericInsituStringStream > - char *buf = nullptr; - const int64_t factor = 2; - int64_t buf_len = j_str.length() * factor; - uint32_t result_len = 0; - - if (OB_ISNULL(buf = static_cast(jt_ctx_.row_alloc_.alloc(buf_len)))) { - ret = OB_ALLOCATE_MEMORY_FAILED; - LOG_WARN("alloc memory failed", K(ret)); - } else if (OB_FAIL(ObCharset::charset_convert(doc_cs_type, j_str.ptr(), - j_str.length(), CS_TYPE_UTF8MB4_BIN, buf, - buf_len, result_len))) { - LOG_WARN("charset convert failed", K(ret)); - } else { - jt_ctx_.is_charset_converted_ = true; - j_str.assign_ptr(buf, result_len); - } - } - - ObJsonInType j_in_type = ObJsonExprHelper::get_json_internal_type(doc_type); - ObJsonInType expect_type = ObJsonInType::JSON_TREE; - uint32_t parse_flag = lib::is_oracle_mode() ? ObJsonParser::JSN_RELAXED_FLAG : ObJsonParser::JSN_DEFAULT_FLAG; - - // json type input, or has is json check - bool is_ensure_json = lib::is_oracle_mode() && (doc_type != ObJsonType); - - if (OB_FAIL(ret)) { - } else if (OB_FAIL(ObJsonBaseFactory::get_json_base(&jt_ctx_.row_alloc_, j_str, j_in_type, expect_type, in_, parse_flag)) - || (in_->json_type() != ObJsonNodeType::J_ARRAY && in_->json_type() != ObJsonNodeType::J_OBJECT)) { - if (OB_FAIL(ret) || (is_ensure_json)) { - in_= nullptr; - ret = OB_ERR_JSON_SYNTAX_ERROR; - SET_COVER_ERROR(&jt_ctx_, ret); - jt_ctx_.is_need_end_ = 1; - if (lib::is_oracle_mode() && jt_root_->col_info_.on_error_ != JSN_VALUE_ERROR) { - ret = OB_SUCCESS; - } - } else { - ret = OB_SUCCESS; - } - } - - if (OB_FAIL(ret)) { - } else if (OB_FAIL(jt_root_->get_next_row(in_, &jt_ctx_, is_root_null))) { - if (ret != OB_ITER_END) { - LOG_WARN("fail to get next row", K(ret), KP(in_)); - } - } else { - is_evaled_ = true; - } + is_evaled_ = true; } } return ret; } +int JsonTableFunc::col_res_type_check(ObRegCol &col_node, JtScanCtx* ctx) +{ + INIT_SUCC(ret); + ObObjType obj_type = col_node.col_info_.data_type_.get_obj_type(); + JtColType col_type = col_node.type(); + if (lib::is_mysql_mode()) { + } else if (col_type == COL_TYPE_EXISTS) { + if (ob_is_string_type(obj_type) + || ob_is_numeric_type(obj_type) + || ob_is_integer_type(obj_type)) { + // do nothing + } else if (ob_is_json_tc(obj_type)) { + ret = OB_ERR_USAGE_KEYWORD; + LOG_WARN("invalid usage of keyword EXISTS", K(ret)); + } else { + ret = OB_ERR_NON_NUMERIC_CHARACTER_VALUE; + SET_COVER_ERROR(ctx, ret); + } + } else if (col_type == COL_TYPE_QUERY ) { + // do nothing + } + return ret; +} + +int JsonTableFunc::check_default_value(JtScanCtx* ctx, ObRegCol &col_node, ObExpr* expr) +{ + INIT_SUCC(ret); + if (lib::is_mysql_mode()) { + // in mysql mode, should check default value with parse json + if (OB_FAIL(RegularCol::check_default_value_mysql(col_node, ctx, expr))) { + LOG_WARN("fail to check default value in mysql", K(ret)); + } + } else { // oracle mode can use datum as result + if (OB_FAIL(RegularCol::check_default_value_oracle(ctx, col_node.col_info_, expr))) { + LOG_WARN("fail to check default value in oracle", K(ret)); + } + } + return ret; +} + +int RegularCol::parse_default_value_2json(ObExpr* default_expr, + JtScanCtx* ctx, + ObDatum*& tmp_datum, + ObIJsonBase *&res) +{ + INIT_SUCC(ret); + ObObjType val_type = default_expr->datum_meta_.type_; + ObCollationType cs_type = default_expr->datum_meta_.cs_type_; + ObDatum converted_datum; + converted_datum.set_datum(*tmp_datum); + ObString origin_str = converted_datum.get_string(); + // convert string charset if needed + if (ob_is_string_type(val_type) && ObCharset::charset_type_by_coll(cs_type) != CHARSET_UTF8MB4) { + ObString converted_str; + if (OB_FAIL(ObExprUtil::convert_string_collation(origin_str, cs_type, converted_str, + CS_TYPE_UTF8MB4_BIN, ctx->row_alloc_))) { + LOG_WARN("convert string collation failed", K(ret), K(cs_type), K(origin_str.length())); + } else { + converted_datum.set_string(converted_str); + cs_type = CS_TYPE_UTF8MB4_BIN; + } + } + origin_str = converted_datum.get_string(); + if (OB_SUCC(ret) + && OB_FAIL(ObJsonExprHelper::get_json_val(converted_datum, *ctx->exec_ctx_, default_expr, + ctx->op_exec_alloc_, val_type, cs_type, res))) { + LOG_WARN("fail to parse default value", K(ret)); + } + return ret; +} + +int RegularCol::check_default_value_mysql(ObRegCol &col_node, JtScanCtx* ctx, ObExpr* expr) +{ + INIT_SUCC(ret); + ctx->is_cover_error_ = false; + ObIJsonBase* j_res = NULL; + + if (col_node.col_info_.on_empty_ == JSN_VALUE_DEFAULT) { + j_res = static_cast(col_node.emp_val_); + ObExpr* default_expr = ctx->spec_ptr_->emp_default_exprs_.at(col_node.col_info_.empty_expr_id_); + if (OB_FAIL(RegularCol::check_default_value_inner_mysql(ctx, col_node, default_expr, expr, j_res))) { + LOG_WARN("fail to check empty default value", K(ret)); + } + } + + if (OB_SUCC(ret) && col_node.col_info_.on_error_ == JSN_VALUE_DEFAULT) { + j_res = static_cast(col_node.err_val_); + ObExpr* default_expr = ctx->spec_ptr_->err_default_exprs_.at(col_node.col_info_.error_expr_id_); + if (OB_FAIL(RegularCol::check_default_value_inner_mysql(ctx, col_node, default_expr, expr, j_res))) { + LOG_WARN("fail to check error default value", K(ret)); + } + } + return ret; +} + +int RegularCol::check_default_value_inner_mysql(JtScanCtx* ctx, + ObRegCol &col_node, + ObExpr* default_expr, + ObExpr* expr, + ObIJsonBase* j_base) +{ + INIT_SUCC(ret); + ObDatum res; + ObDatum* tmp_datum = nullptr; + uint8_t is_type_mismatch = 0; + ObAccuracy accuracy = col_node.col_info_.data_type_.get_accuracy(); + char col_str[col_node.col_info_.col_name_.length()]; + ObObjType dst_type = expr->datum_meta_.type_; + ObJsonCastParam cast_param(dst_type, default_expr->datum_meta_.cs_type_, expr->datum_meta_.cs_type_, false); + cast_param.is_only_check_ = true; + cast_param.is_json_table_ = true; + cast_param.rt_expr_ = expr; + + if (OB_ISNULL(j_base)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("json data can not be null", K(ret)); + } else if ((dst_type != ObJsonType && !j_base->is_json_scalar(j_base->json_type()))) { + ret = OB_INVALID_DEFAULT; + LOG_USER_ERROR(OB_INVALID_DEFAULT, col_node.col_info_.col_name_.length(), col_node.col_info_.col_name_.ptr()); + } else if (OB_FAIL(ObJsonUtil::cast_to_res(&ctx->row_alloc_, *ctx->eval_ctx_, + j_base, accuracy, cast_param, res, is_type_mismatch))) { + ret = OB_OPERATE_OVERFLOW; + databuff_printf(col_str, col_node.col_info_.col_name_.length(), 0, "%s", col_node.col_info_.col_name_.length(), col_node.col_info_.col_name_.ptr()); + LOG_USER_ERROR(OB_OPERATE_OVERFLOW, "JSON_TABLE", col_str); + } + return ret; +} + +int RegularCol::check_default_value_oracle(JtScanCtx* ctx, ObJtColInfo &col_info, ObExpr* expr) +{ + INIT_SUCC(ret); + if (static_cast(col_info.col_type_) == COL_TYPE_VALUE) { + if (col_info.on_empty_ == JSN_VALUE_DEFAULT) { + ObExpr* default_expr = ctx->spec_ptr_->emp_default_exprs_.at(col_info.empty_expr_id_); + if (OB_FAIL(RegularCol::check_default_value_inner_oracle(ctx, col_info, expr, default_expr))) { + LOG_WARN("fail to check empty default value in oracle", K(ret)); + } + } + if (OB_SUCC(ret) && col_info.on_error_ == JSN_VALUE_DEFAULT) { + ObExpr* default_expr = ctx->spec_ptr_->err_default_exprs_.at(col_info.error_expr_id_); + if (OB_FAIL(RegularCol::check_default_value_inner_oracle(ctx, col_info, expr, default_expr))) { + LOG_WARN("fail to check error default value in oracle", K(ret)); + } + } + } + return ret; +} + +int JsonTableFunc::set_expr_exec_param(ObRegCol& col_node, JtScanCtx* ctx) +{ + INIT_SUCC(ret); + ObExpr* expr = ctx->spec_ptr_->column_exprs_.at(col_node.col_info_.output_column_idx_); + + ObObjType dst_type = expr->datum_meta_.type_; + ObAccuracy accuracy = col_node.col_info_.data_type_.get_accuracy(); + col_node.expr_param_.truncate_ = col_node.col_info_.truncate_; + col_node.expr_param_.format_json_ = col_node.col_info_.format_json_; + col_node.expr_param_.wrapper_ = col_node.col_info_.wrapper_; + col_node.expr_param_.empty_type_ = col_node.col_info_.on_empty_; + col_node.expr_param_.error_type_ = col_node.col_info_.on_error_; + col_node.expr_param_.accuracy_ = accuracy; + col_node.expr_param_.dst_type_ = dst_type; + col_node.expr_param_.pretty_type_ = 0; + col_node.expr_param_.ascii_type_ = 0; + col_node.expr_param_.scalars_type_ = col_node.col_info_.allow_scalar_; + if (OB_FAIL(col_node.expr_param_.on_mismatch_.push_back(col_node.col_info_.on_mismatch_))) { + LOG_WARN("fail to push mismatch value into array", K(ret)); + } else if (OB_FAIL(col_node.expr_param_.on_mismatch_type_.push_back(col_node.col_info_.on_mismatch_type_))) { + LOG_WARN("fail to push mismatch type into array", K(ret)); + } + return ret; +} + +int JsonTableFunc::set_on_empty(ObRegCol& col_node, JtScanCtx* ctx, bool &need_cast, bool& is_null) +{ + INIT_SUCC(ret); + JtColType col_type = col_node.type(); + bool is_cover_by_error = true; + if (col_type == COL_TYPE_QUERY) { + is_null = false; + bool is_json_arr = false; + bool is_json_obj = false; + if (OB_FAIL(ObExprJsonQuery::get_empty_option(is_cover_by_error, + col_node.col_info_.on_empty_, is_null, is_json_arr, + is_json_obj))) { + if (is_cover_by_error) { + SET_COVER_ERROR(ctx, ret); + } + LOG_WARN("empty cluase report error res", K(ret)); + } else if (is_null) { + col_node.curr_ = nullptr; + ret = OB_SUCCESS; + } else if (is_json_arr) { + col_node.curr_ = ctx->jt_op_->get_js_array(); + } else if (is_json_obj) { + col_node.curr_ = ctx->jt_op_->get_js_object(); + } + } else if (col_type == COL_TYPE_VALUE) { + is_null = false; + ObDatum *t_res = NULL; + if (OB_FAIL(ObExprJsonValue::get_empty_option(t_res, is_cover_by_error, col_node.col_info_.on_empty_, + static_cast(col_node.err_val_), is_null))) { + if (is_cover_by_error) { + SET_COVER_ERROR(ctx, ret); + } + LOG_WARN("empty clause report error opt", K(ret)); + } else if (is_null) { + col_node.curr_ = nullptr; + } else { + if (OB_ISNULL(col_node.emp_val_) || !col_node.is_emp_evaled_) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("fail to get err_val", K(ret), K(col_node.is_emp_evaled_)); + } else if (lib::is_oracle_mode()) { + col_node.curr_ = col_node.emp_val_; + col_node.res_flag_ = ResultType::EMPTY_DATUM; + } else { // mysql mode + col_node.curr_ = col_node.emp_val_; + col_node.res_flag_ = ResultType::NOT_DATUM; + } + } + } else if (col_type == COL_TYPE_EXISTS) { + bool res_val = false; + if (OB_FAIL(ObExprJsonExists::get_empty_option(col_node.col_info_.on_empty_, res_val))) { + LOG_WARN("empty clause report error opt", K(ret)); + } else if (!res_val) { // result will return false currently, not return true + if (ob_is_string_type(col_node.col_info_.data_type_.get_obj_type())) { + ObString value = lib::is_oracle_mode() ? "false" : "0"; + void* buf = ctx->row_alloc_.alloc(sizeof(ObJsonString)); + if (OB_ISNULL(buf)) { + ret = OB_ALLOCATE_MEMORY_FAILED; + } else { + col_node.curr_ = static_cast(new(buf)ObJsonString(value.ptr(), value.length())); + is_null = false; + } + } else { + void* buf = ctx->row_alloc_.alloc(sizeof(ObJsonInt)); + if (OB_ISNULL(buf)) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("buf allocate failed", K(ret)); + } else { + col_node.curr_ = static_cast(new(buf)ObJsonInt(0)); + is_null = false; + } + } + } + } + return ret; +} + +int JsonTableFunc::set_on_error(ObRegCol& col_node, JtScanCtx* ctx, int& ret) +{ + INIT_SUCC(tmp_ret); + if (ret == OB_SUCCESS) { + } else { + bool is_null = false; + const ObJtColInfo& info = col_node.col_info_; + JtColType col_type = col_node.type(); + ObIJsonBase* t_val = nullptr; + bool has_default_val = false; + ObExpr* expr = ctx->spec_ptr_->column_exprs_.at(col_node.col_info_.output_column_idx_); + if (col_type == COL_TYPE_VALUE) { + ObExprJsonValue::get_error_option(info.on_error_, is_null, has_default_val); + if (is_null) { + col_node.curr_ = nullptr; + ret = ctx->is_need_end_ ? OB_ITER_END : OB_SUCCESS; + } else if (has_default_val) { + if (OB_ISNULL(col_node.err_val_) || !col_node.is_err_evaled_) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("fail to get err_val", K(ret)); + } else if (lib::is_oracle_mode()) { + col_node.curr_ = col_node.err_val_; + col_node.res_flag_ = ResultType::ERROR_DATUM; + ret = OB_SUCCESS; + } else { // mysql mode + col_node.curr_ = col_node.err_val_; + col_node.res_flag_ = ResultType::NOT_DATUM; + ret = OB_SUCCESS; + } + } else if (ret != OB_SUCCESS) { + EVAL_COVER_CODE(ctx, ret) ; + if (ctx->is_need_end_ && OB_SUCC(ret)) { + ret = OB_ITER_END; + } + } + } else if (col_type == COL_TYPE_QUERY) { + t_val = NULL; + if (col_node.expr_param_.error_type_ == JSN_QUERY_ERROR) { + } else if (OB_FAIL(ObExprJsonQuery::get_error_option(col_node.expr_param_.error_type_, t_val, + ctx->jt_op_->get_js_array(), ctx->jt_op_->get_js_object(), is_null))) { + LOG_WARN("error option report error", K(ret)); + } else if (is_null) { + if (OB_FAIL(ObExprJsonQuery::get_mismatch_option(col_node.expr_param_.on_mismatch_[0], ctx->error_code_))) { + LOG_WARN("mismatch clause will report error", K(ret)); + } else { + is_null = true; + col_node.curr_ = nullptr; + ret = ctx->is_need_end_ ? OB_ITER_END : OB_SUCCESS; + } + } else { + col_node.curr_ = t_val; + ret = ctx->is_need_end_ ? OB_ITER_END : OB_SUCCESS; + } + } else if (col_type == COL_TYPE_EXISTS) { + int is_true = 0; + if (info.on_error_ == JSN_EXIST_ERROR) { + ret = ctx->error_code_; + if (OB_SUCC(ret) && ctx->is_need_end_) { + ret = OB_ITER_END; + } + } else if (info.on_error_ == JSN_EXIST_DEFAULT || info.on_error_ == JSN_EXIST_FALSE) { + is_null = false; + ret = ctx->is_need_end_ ? OB_ITER_END : OB_SUCCESS; + } else if (info.on_error_ == JSN_EXIST_TRUE) { + is_true = 0; + is_null = false; + ret = ctx->is_need_end_ ? OB_ITER_END : OB_SUCCESS; + } + + if (OB_FAIL(ret)) { + } else if (ob_is_string_type(info.data_type_.get_obj_type())) { + ObString value = is_true ? ObString("true") : ObString("false"); + if (lib::is_mysql_mode()) { + value = is_true ? ObString("1") : ObString("0"); + } + void* buf = ctx->row_alloc_.alloc(sizeof(ObJsonString)); + if (OB_ISNULL(buf)) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("buf allocate failed", K(ret)); + } else { + col_node.curr_ = static_cast(new(buf)ObJsonString(value.ptr(), value.length())); + is_null = false; + } + } else if (ob_is_number_tc(info.data_type_.get_obj_type()) || lib::is_mysql_mode()) { + void* buf = ctx->row_alloc_.alloc(sizeof(ObJsonInt)); + if (OB_ISNULL(buf)) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("buf allocate failed", K(ret)); + } else { + col_node.curr_ = static_cast(new(buf)ObJsonInt(is_true)); + is_null = false; + } + } else { + if (col_node.col_info_.on_error_ != JSN_EXIST_ERROR) { + col_node.curr_ = nullptr; + is_null = true; + } else { + ret = OB_ERR_NON_NUMERIC_CHARACTER_VALUE; + } + } + } + + if (OB_SUCC(ret) && is_null) { + expr->locate_datum_for_write(*ctx->eval_ctx_).set_null(); + } + } + return ret; +} + +int JsonTableFunc::cast_to_result(ObRegCol& col_node, JtScanCtx* ctx, bool enable_error, bool is_pack_result) +{ + INIT_SUCC(ret); + ObIJsonBase *js_val = static_cast(col_node.curr_); + ObJtColInfo& col_info = col_node.get_column_def(); + bool is_truncate = static_cast(col_info.truncate_); + JtColType col_type = col_node.type(); + + ObExpr* expr = ctx->spec_ptr_->column_exprs_.at(col_info.output_column_idx_); + ObDatum& res = expr->locate_datum_for_write(*ctx->eval_ctx_); + ctx->res_obj_ = &res; + uint8_t is_type_mismatch = false; + uint8_t ascii_type = false; + + ObObjType dst_type = expr->datum_meta_.type_; + ObCollationType coll_type = expr->datum_meta_.cs_type_; + ObAccuracy accuracy = col_info.data_type_.get_accuracy(); + ObCollationType dst_coll_type = col_info.data_type_.get_collation_type(); + ObCollationType in_coll_type = ctx->is_charset_converted_ + ? CS_TYPE_UTF8MB4_BIN + : ctx->spec_ptr_->value_expr_->datum_meta_.cs_type_; + ObCollationLevel dst_coll_level = col_info.data_type_.get_collation_level(); + bool is_quote = (col_info.col_type_ == COL_TYPE_QUERY && js_val->json_type() == ObJsonNodeType::J_STRING); + ObJsonCastParam cast_param(dst_type, in_coll_type, dst_coll_type, ascii_type); + cast_param.is_const_ = ctx->is_const_input_; + cast_param.is_trunc_ = is_truncate; + cast_param.is_json_table_ = true; + cast_param.rt_expr_ = expr; + switch (col_type) { + case JtColType::COL_TYPE_VALUE: { + if (col_node.res_flag_ != ResultType::NOT_DATUM) { + ObDatum *js_val = static_cast(col_node.curr_); + ObExpr* default_expr; + if (col_node.res_flag_ == ResultType::EMPTY_DATUM) { + default_expr = ctx->spec_ptr_->emp_default_exprs_.at(col_node.col_info_.empty_expr_id_); + } else if (col_node.res_flag_ == ResultType::ERROR_DATUM) { + default_expr = ctx->spec_ptr_->err_default_exprs_.at(col_node.col_info_.error_expr_id_); + } + + if (OB_ISNULL(js_val)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("empty/error value can not be null", K(ret)); + } else if (OB_FAIL(ObJsonExprHelper::cast_to_res(ctx->row_alloc_, *js_val, *expr, *default_expr, *ctx->eval_ctx_, res, true))) { + enable_error = false; + LOG_WARN("fail to cast to res", K(ret)); + } + } else { + ret = ObJsonUtil::cast_to_res(&ctx->row_alloc_, *ctx->eval_ctx_, + js_val, accuracy, cast_param, res, is_type_mismatch); + } + break; + } + case JtColType::COL_TYPE_QUERY: { + cast_param.is_quote_ = true; + ret = ObJsonUtil::cast_to_res(&ctx->row_alloc_, *ctx->eval_ctx_, + js_val, accuracy, cast_param, res, is_type_mismatch); + break; + } + case JtColType::COL_TYPE_EXISTS: { + ret = ObJsonUtil::cast_to_res(&ctx->row_alloc_, *ctx->eval_ctx_, + js_val, accuracy, cast_param, res, is_type_mismatch); + break; + } + default: { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpect type input", K(ret)); + break; + } + } + if (OB_FAIL(ret) && enable_error) { + int tmp_ret = set_on_error(col_node, ctx, ret); + if (tmp_ret != OB_SUCCESS) { + LOG_WARN("failed to set error val.", K(tmp_ret)); + } else if (OB_SUCC(tmp_ret) && OB_NOT_NULL(col_node.curr_) + && OB_FAIL(cast_to_result(col_node, ctx, false, false))) { // due of without type calc, so use cast transform default value to res. + LOG_WARN("fail to cast default value to res", K(ret)); + } + } + + if (OB_SUCC(ret) && is_pack_result && is_lob_storage(dst_type) && (col_node.res_flag_ == ResultType::NOT_DATUM) && !res.is_null()) { + ObString val = res.get_string(); + if (OB_FAIL(ObJsonExprHelper::pack_json_str_res(*expr, *ctx->eval_ctx_, res, val, &ctx->row_alloc_))) { + LOG_WARN("fail to pack res result.", K(ret)); + } + } + return ret; +} + +int JsonTableFunc::eval_seek_col(ObRegCol &col_node, void* in, JtScanCtx* ctx, bool &is_null_value, bool &need_cast_res) +{ + INIT_SUCC(ret); + ObJsonSeekResult hit; + ObIJsonBase* in_val = static_cast(in); + ObJsonPath* json_path = static_cast(col_node.path_); + ObExpr* col_expr = ctx->spec_ptr_->column_exprs_.at(col_node.col_info_.output_column_idx_); + in_val->set_allocator(&ctx->row_alloc_); + if (OB_FAIL(in_val->seek(*json_path, json_path->path_node_cnt(), true, false, hit))) { + SET_COVER_ERROR(ctx, ret); + LOG_WARN("json seek failed", K(col_node.col_info_.path_), K(ret)); + } else if (hit.size() == 0) { + col_node.curr_ = nullptr; + is_null_value = true; + } else if (col_node.type() == COL_TYPE_EXISTS && hit.size() > 0) { + is_null_value = false; + col_node.curr_ = hit[0]; + } else if (col_node.type() != COL_TYPE_QUERY && hit.size() == 1) { + is_null_value = false; + col_node.curr_ = hit[0]; + } else if (col_node.type() == COL_TYPE_VALUE + && !(lib::is_mysql_mode() + && ob_is_json(col_expr->datum_meta_.type_))) { + ret = OB_ERR_JSON_VALUE_NO_SCALAR; + SET_COVER_ERROR(ctx, ret); + } else if (col_node.type() == COL_TYPE_QUERY || + (col_node.type() == COL_TYPE_VALUE + && lib::is_mysql_mode() + && ob_is_json(col_expr->datum_meta_.type_))) { + void* js_arr_buf = ctx->row_alloc_.alloc(sizeof(ObJsonArray)); + ObIJsonBase* js_arr_ptr = nullptr; + if (OB_ISNULL(js_arr_buf)) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("failed to allocate json array buf", K(ret)); + } else if (OB_ISNULL(js_arr_ptr = new (js_arr_buf) ObJsonArray(&ctx->row_alloc_))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("failed to new json array node", K(ret)); + } else if (OB_FAIL(ObExprJsonQuery::append_node_into_res(js_arr_ptr, json_path, hit, &ctx->row_alloc_))) { + LOG_WARN("fail to tree apeend node", K(ret)); + } + + if (OB_SUCC(ret)) { + is_null_value = false; + col_node.curr_ = js_arr_ptr; + } + } + return ret; +} + +int JsonTableFunc::init_ctx(ObRegCol &scan_node, JtScanCtx*& ctx) +{ + INIT_SUCC(ret); + ObJsonPath* js_path = NULL; + scan_node.tab_type_ = MulModeTableType::OB_ORA_JSON_TABLE_TYPE; + bool need_eval = false; // flag of eval default value + bool need_datum = lib::is_oracle_mode(); + if (!scan_node.is_path_evaled_ && OB_ISNULL(scan_node.path_) + && (scan_node.node_type() == REG_TYPE + || scan_node.node_type() == SCAN_TYPE) + && !scan_node.col_info_.path_.empty()) { + void* path_buf = ctx->op_exec_alloc_->alloc(sizeof(ObJsonPath)); + if (OB_ISNULL(path_buf)) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("fail to allocate json path buffer", K(ret)); + } else { + js_path = new (path_buf) ObJsonPath(scan_node.col_info_.path_, ctx->op_exec_alloc_); + if (OB_FAIL(js_path->parse_path())) { + ret = OB_ERR_JSON_PATH_EXPRESSION_SYNTAX_ERROR; + LOG_USER_ERROR(OB_ERR_JSON_PATH_EXPRESSION_SYNTAX_ERROR, scan_node.col_info_.path_.length(), scan_node.col_info_.path_.ptr()); + } else { + scan_node.expr_param_.json_path_ = js_path; + scan_node.path_ = js_path; + scan_node.is_path_evaled_ = true; + } + } + } + if (OB_FAIL(ret)) { + } else if (scan_node.node_type() == REG_TYPE) { + if (!scan_node.is_emp_evaled_) { + need_eval = false; + if (scan_node.type() == COL_TYPE_VALUE) { + if (scan_node.col_info_.on_empty_ == JSN_VALUE_IMPLICIT + && scan_node.col_info_.on_error_ == JSN_VALUE_DEFAULT) { + ObExpr* default_expr = ctx->spec_ptr_->err_default_exprs_.at(scan_node.col_info_.error_expr_id_); + if (OB_FAIL(eval_default_value(ctx, default_expr, scan_node.err_val_, need_datum))) { + ret = OB_INVALID_DEFAULT; + LOG_USER_ERROR(OB_INVALID_DEFAULT, scan_node.col_info_.col_name_.length(), scan_node.col_info_.col_name_.ptr()); + } else { + scan_node.is_err_evaled_ = true; + } + } else if (scan_node.col_info_.on_empty_ == JSN_VALUE_DEFAULT) { + ObExpr* default_expr = ctx->spec_ptr_->emp_default_exprs_.at(scan_node.col_info_.empty_expr_id_); + if (OB_FAIL(eval_default_value(ctx, default_expr, scan_node.emp_val_, need_datum))) { + ret = OB_INVALID_DEFAULT; + LOG_USER_ERROR(OB_INVALID_DEFAULT, scan_node.col_info_.col_name_.length(), scan_node.col_info_.col_name_.ptr()); + } else { + scan_node.is_emp_evaled_ = true; + } + } + } + } + if (OB_SUCC(ret) && !scan_node.is_err_evaled_) { + need_eval = false; + if (scan_node.type() == COL_TYPE_VALUE) { + if (scan_node.col_info_.on_error_ == JSN_VALUE_DEFAULT) { + need_eval = true; + } + if (need_eval) { + ObExpr* default_expr = ctx->spec_ptr_->err_default_exprs_.at(scan_node.col_info_.error_expr_id_); + if (OB_FAIL(eval_default_value(ctx, default_expr, scan_node.err_val_, need_datum))) { + ret = OB_INVALID_DEFAULT; + LOG_USER_ERROR(OB_INVALID_DEFAULT, scan_node.col_info_.col_name_.length(), scan_node.col_info_.col_name_.ptr()); + } else { + scan_node.is_err_evaled_ = true; + } + } + } + } + scan_node.res_flag_ = ResultType::NOT_DATUM; + ObExpr* col_expr = ctx->spec_ptr_->column_exprs_.at(scan_node.col_info_.output_column_idx_); + if (OB_FAIL(ret)) { + } else if (OB_FAIL(set_expr_exec_param(scan_node, ctx))) { + LOG_WARN("fail to init expr param", K(ret)); + } else if (OB_FAIL(RegularCol::check_item_method_json(scan_node, ctx))) { + LOG_WARN("fail to check expr param", K(ret)); + } else if (OB_FAIL(check_default_value(ctx, scan_node, col_expr))) { + // json value empty need check default value first + LOG_WARN("default value check fail", K(ret)); + } + } + return ret; +} + +int JsonTableFunc::eval_default_value(JtScanCtx*& ctx, ObExpr*& default_expr, void*& res, bool need_datum) +{ + INIT_SUCC(ret); + ObDatum* emp_datum = nullptr; + ObIJsonBase* emp_json = nullptr; + if (OB_FAIL(default_expr->eval(*ctx->eval_ctx_, emp_datum))) { + LOG_WARN("failed do cast to returning type.", K(ret)); + } else if (need_datum) { + res = emp_datum; + } else if (OB_FAIL(RegularCol::parse_default_value_2json(default_expr, ctx, emp_datum, emp_json))) { + LOG_WARN("fail to process empty default value", K(ret)); + } else { + res = emp_json; + } + return ret; +} + +int JsonTableFunc::reset_path_iter(ObRegCol &scan_node, void* in, JtScanCtx*& ctx, ScanType init_flag, bool &is_null_value) +{ + INIT_SUCC(ret); + if (init_flag == SCAN_NODE_TYPE) { + ObJsonSeekResult hit; + ObIJsonBase* in_val = static_cast(in); + ObJsonPath* js_path = static_cast(scan_node.path_); + in_val->set_allocator(&ctx->row_alloc_); + if (OB_ISNULL(in_val) || OB_ISNULL(js_path)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpect input", K(ret)); + } else if (OB_FAIL(in_val->seek(*js_path, js_path->path_node_cnt(), true, false, hit))) { + LOG_WARN("json seek failed", K(js_path), K(ret)); + SET_COVER_ERROR(ctx, ret); + } else if (hit.size() == 0) { + scan_node.cur_pos_ = 0; + scan_node.total_ = 1; + is_null_value = true; + scan_node.curr_ = scan_node.iter_ = nullptr; + } else if (hit.size() == 1) { + scan_node.iter_ = scan_node.curr_ = hit[0]; + is_null_value = false; + scan_node.cur_pos_ = 0; + scan_node.total_ = 1; + } else { + is_null_value = false; + void* js_arr_buf = ctx->row_alloc_.alloc(sizeof(ObJsonArray)); + ObJsonArray* js_arr_ptr = nullptr; + if (OB_ISNULL(js_arr_buf)) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("failed to allocate json array buf", K(ret)); + } else if (OB_ISNULL(js_arr_ptr = new (js_arr_buf) ObJsonArray(&ctx->row_alloc_))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("failed to new json array node", K(ret)); + } else { + ObJsonNode *j_node = NULL; + ObIJsonBase *jb_node = NULL; + for (int32_t i = 0; OB_SUCC(ret) && i < hit.size(); i++) { + if (ObJsonBaseFactory::transform(&ctx->row_alloc_, hit[i], ObJsonInType::JSON_TREE, jb_node)) { // to tree + LOG_WARN("fail to transform to tree", K(ret), K(i), K(*(hit[i]))); + } else { + j_node = static_cast(jb_node); + if (OB_FAIL(js_arr_ptr->array_append(j_node->clone(&ctx->row_alloc_)))) { + LOG_WARN("failed to array append", K(ret), K(i), K(*j_node)); + } + } + } + + if (OB_SUCC(ret)) { + scan_node.curr_ = js_arr_ptr; + scan_node.total_ = hit.size(); + ObIJsonBase* iter = NULL; + if (OB_FAIL(js_arr_ptr->get_array_element(0, iter))) { + LOG_WARN("failed to get array selement 0.", K(ret)); + } else { + scan_node.iter_ = iter; + } + } + } + } + } else { + // do nothing in col node + } + return ret; +} + +int JsonTableFunc::get_iter_value(ObRegCol &col_node, JtScanCtx* ctx, bool &is_null_value) +{ + INIT_SUCC(ret); + + if (col_node.cur_pos_ + 1 < col_node.total_) { + col_node.cur_pos_++; + if (OB_FAIL(container_at(col_node.curr_, col_node.iter_, col_node.cur_pos_))) { + LOG_WARN("fail to get container element.", K(ret), K(col_node.cur_pos_)); + } + } else { + ret = OB_ITER_END; + } + return ret; +} + +int JsonTableFunc::eval_input(ObJsonTableOp &jt, JtScanCtx& ctx, ObEvalCtx &eval_ctx) +{ + INIT_SUCC(ret); + common::ObObjMeta& doc_obj_datum = ctx.spec_ptr_->value_expr_->obj_meta_; + ObDatumMeta& doc_datum = ctx.spec_ptr_->value_expr_->datum_meta_; + ObObjType doc_type = doc_datum.type_; + ObCollationType doc_cs_type = doc_datum.cs_type_; + ObString j_str; + bool is_null = false; + ObIJsonBase* in = NULL; + + if (doc_type == ObNullType) { + ret = OB_ITER_END; + } else if (doc_type == ObNCharType || + !(doc_type == ObJsonType + || doc_type == ObRawType + || ob_is_string_type(doc_type))) { + ret = OB_ERR_INPUT_JSON_TABLE; + LOG_WARN("fail to get json base", K(ret), K(doc_type)); + } else { + jt.reset_columns(); + if (OB_FAIL(ObJsonExprHelper::get_json_or_str_data(ctx.spec_ptr_->value_expr_, eval_ctx, + ctx.row_alloc_, j_str, is_null))) { + ret = OB_ERR_INPUT_JSON_TABLE; + LOG_WARN("get real data failed", K(ret)); + } else if (is_null) { + ret = OB_ITER_END; + } else if ((ob_is_string_type(doc_type) || doc_type == ObLobType) + && (doc_cs_type != CS_TYPE_BINARY) + && (ObCharset::charset_type_by_coll(doc_cs_type) != CHARSET_UTF8MB4)) { + // need convert to utf8 first, we are using GenericInsituStringStream > + char *buf = nullptr; + const int64_t factor = 2; + int64_t buf_len = j_str.length() * factor; + uint32_t result_len = 0; + + if (OB_ISNULL(buf = static_cast(ctx.row_alloc_.alloc(buf_len)))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("alloc memory failed", K(ret)); + } else if (OB_FAIL(ObCharset::charset_convert(doc_cs_type, j_str.ptr(), + j_str.length(), CS_TYPE_UTF8MB4_BIN, buf, + buf_len, result_len))) { + LOG_WARN("charset convert failed", K(ret)); + } else { + ctx.is_charset_converted_ = true; + j_str.assign_ptr(buf, result_len); + } + } + ObJsonInType j_in_type = ObJsonExprHelper::get_json_internal_type(doc_type); + ObJsonInType expect_type = ObJsonInType::JSON_TREE; + uint32_t parse_flag = lib::is_oracle_mode() ? ObJsonParser::JSN_RELAXED_FLAG : ObJsonParser::JSN_DEFAULT_FLAG; + + // json type input, or has is json check + bool is_ensure_json = lib::is_oracle_mode() && (doc_type != ObJsonType); + + + if (OB_FAIL(ret)) { + } else if (OB_FAIL(ObJsonBaseFactory::get_json_base(&ctx.row_alloc_, j_str, j_in_type, expect_type, in, parse_flag)) + || (in->json_type() != ObJsonNodeType::J_ARRAY && in->json_type() != ObJsonNodeType::J_OBJECT)) { + if (OB_FAIL(ret) || (is_ensure_json)) { + in= nullptr; + ret = OB_ERR_JSON_SYNTAX_ERROR; + SET_COVER_ERROR(&ctx, ret); + ctx.is_need_end_ = 1; + if (lib::is_oracle_mode() && jt.get_root_entry()->get_scan_node()->seek_node_.col_info_.on_error_ != JSN_QUERY_ERROR) { + ret = OB_SUCCESS; + } + } else { + ret = OB_SUCCESS; + } + } + if (OB_SUCC(ret)) { + jt.input_ = in; + } + } + return ret; +} + +int JsonTableFunc::container_at(void* in, void *&out, int32_t pos) { + INIT_SUCC(ret); + ObIJsonBase* in_ = static_cast(in); + ObIJsonBase* res = nullptr; + + if (in_->json_type() == ObJsonNodeType::J_ARRAY) { + if (OB_FAIL(in_->get_array_element(pos, res))) { + LOG_WARN("fail to get array element", K(ret), K(pos)); + } + } else if (in_->json_type() == ObJsonNodeType::J_OBJECT) { + if (OB_FAIL(in_->get_object_value(pos, res))) { + LOG_WARN("fail to get object element", K(ret), K(pos)); + } + } + if (OB_SUCC(ret)) { + out = res; + } + + return ret; +} } // end namespace sql } // end namespace oceanbase \ No newline at end of file diff --git a/src/sql/engine/basic/ob_json_table_op.h b/src/sql/engine/basic/ob_json_table_op.h index defcca2c94..4820db47e3 100644 --- a/src/sql/engine/basic/ob_json_table_op.h +++ b/src/sql/engine/basic/ob_json_table_op.h @@ -24,28 +24,48 @@ #include "sql/engine/ob_exec_context.h" #include "sql/engine/expr/ob_expr.h" #include "sql/engine/expr/ob_json_param_type.h" +#include "sql/engine/expr/ob_expr_json_utils.h" +#include "sql/engine/expr/ob_expr_xml_func_helper.h" +#include "deps/oblib/src/lib/xml/ob_xml_util.h" namespace oceanbase { namespace sql { struct JtScanCtx; -static const int64_t DEFAULT_STR_LENGTH = -1; class ObExpr; struct JtScanCtx; class ObJsonTableSpec; +class MulModeTableFunc; +class JoinNode; +class ObRegCol; +class ScanNode; +class UnionNode; +class ObMultiModeTableNode; -typedef enum JtJoinType { - LEFT_TYPE, - RIGHT_TYPE, -} JtJoinType; +enum table_type : int8_t { + OB_INVALID_TABLE = 0, + OB_JSON_TABLE = 1, + OB_XML_TABLE = 2, +}; typedef enum JtNodeType { REG_TYPE, // ordinality && reg type + SCAN_TYPE, // scan node type JOIN_TYPE, // join node - SCAN_TYPE, // scan node + UNION_TYPE, // scan node } JtNodeType; +typedef enum ResultType { + NOT_DATUM, + EMPTY_DATUM, + ERROR_DATUM +} ResultType; + +typedef enum ScanType{ // symbol of execute stage, scan node or col node + SCAN_NODE_TYPE, + COL_NODE_TYPE, +} ScanType; typedef struct ObJtColInfo { @@ -81,269 +101,6 @@ typedef struct ObJtColInfo K_(output_column_idx), K_(col_name), K_(path), K_(parent_id), K_(id)); } ObJtColInfo; -class JtColNode -{ -public: - JtColNode(const ObJtColInfo& col_info) - : node_idx_(-1), - total_(0), - cur_pos_(0), - in_(nullptr), - curr_(nullptr), - iter_(nullptr), - js_path_(nullptr), - is_evaled_(false), - is_sub_evaled_(false), - is_ord_node_(false), - is_null_result_(false), - is_nested_evaled_(false), - is_emp_evaled_(false), - is_err_evaled_(false), - emp_val_(nullptr), - err_val_(nullptr), - emp_datum_(nullptr), - err_datum_(nullptr) { - new (&col_info_) ObJtColInfo(col_info); - node_type_ = REG_TYPE; - is_ord_node_ = col_info_.col_type_ == COL_TYPE_ORDINALITY; - } - virtual void destroy(); - JtColType type() { return static_cast(col_info_.col_type_); } - JtNodeType node_type() { return node_type_; } - void set_node_type(JtNodeType type) { node_type_ = type; } - const ObJtColInfo& get_column_node_def() { return col_info_; } - ObJtColInfo& get_column_def() { return col_info_; } - bool is_ord_node() { return is_ord_node_; } - bool is_null_result() { return is_null_result_; } - int init_js_path(JtScanCtx* ctx); - void set_idx(int64_t idx) { node_idx_ = idx; } - int special_proc_on_input_type(char* buf, size_t len, ObString& res); - int check_default_cast_allowed(ObExpr* expr); - int check_col_res_type(JtScanCtx* ctx); - int set_val_on_empty(JtScanCtx* ctx, bool& need_cast_res); - int set_val_on_empty_mysql(JtScanCtx* ctx, bool& need_cast_res); - int process_default_value_pre_mysql(JtScanCtx* ctx); - int wrapper2_json_array(JtScanCtx* ctx, ObJsonBaseVector &hit); - int get_default_value_pre_mysql(ObExpr* default_expr, - JtScanCtx* ctx, - ObIJsonBase *&res, - ObObjType &dst_type); - int64_t node_idx() { return node_idx_; } - virtual int open(); - virtual int get_next_row(ObIJsonBase* in, JtScanCtx* ctx, bool& is_null_value); - - - void proc_query_on_error(JtScanCtx *ctx, int& err_code, bool& is_null); - - - // fixed member - int64_t node_idx_; - JtNodeType node_type_; - ObJtColInfo col_info_; - - /** - * changable member - */ - int32_t total_; - int32_t cur_pos_; - - ObIJsonBase* in_; - ObIJsonBase* curr_; - ObIJsonBase* iter_; - ObJsonPath *js_path_; - - bool is_evaled_; - bool is_sub_evaled_; - bool is_ord_node_; - bool is_null_result_; - bool is_nested_evaled_; - bool is_emp_evaled_; - bool is_err_evaled_; - ObIJsonBase *emp_val_; - ObIJsonBase *err_val_; - int32_t ord_val_; - ObDatum *emp_datum_; - ObDatum *err_datum_; - - TO_STRING_KV(K_(node_type), - K_(node_idx), - K_(total), - K_(cur_pos), - K_(ord_val), - K_(is_ord_node), - K_(is_evaled), - K_(is_sub_evaled), - K_(node_type), - K_(col_info)); - -}; - -class JtJoinNode : public JtColNode -{ -public: - JtJoinNode(const ObJtColInfo& col_info) - : JtColNode(col_info), - join_type_(LEFT_TYPE), - left_idx_(-1), - right_idx_(-1), - left_(nullptr), - right_(nullptr) { - node_type_ = JOIN_TYPE; - } - void destroy(); - int open(); - int get_next_row(ObIJsonBase* in, JtScanCtx* ctx, bool& is_null_value); - void set_join_type(JtJoinType join_type) { join_type_ = join_type; } - void set_left(JtColNode* node) { - left_ = node; - left_idx_ = node->node_idx(); - } - void set_right(JtColNode* node) { - right_ = node; - right_idx_ = node->node_idx(); - } - JtColNode* left() { return left_; } - JtColNode* right() { return right_; } - JtColNode** left_addr() { return &left_; } - JtColNode** right_addr() { return &right_; } - JtJoinType get_join_type() { return join_type_; } - int64_t left_idx() { return left_idx_; } - int64_t right_idx() { return left_idx_; } - - TO_STRING_KV(K_(node_type), - K_(node_idx), - K_(join_type), - K_(left_idx), - K_(right_idx), - KP_(left), - KP_(right)); - - JtJoinType join_type_; - int64_t left_idx_; - int64_t right_idx_; - JtColNode *left_; - JtColNode *right_; -}; - -class JtScanNode : public JtColNode -{ -public: - JtScanNode(const ObJtColInfo& col_info) - : JtColNode(col_info), - is_regular_done_(false), - is_nested_done_(false), - reg_col_defs_(), - nest_col_def_(nullptr) { - node_type_ = SCAN_TYPE; - } - void destroy(); - int open(); - int get_next_row(ObIJsonBase* in, JtScanCtx* ctx, bool& is_null_value); - int assign(const JtScanNode& other); - int add_reg_column_node(JtColNode* node, bool add_idx = false); - int add_nest_column_node(JtColNode* node, bool add_idx = false) { - nest_col_def_ = node; - return add_idx ? child_idx_.push_back(node->node_idx()) : OB_SUCCESS; - } - size_t reg_column_count() { return reg_col_defs_.count(); } - JtColNode* nest_col_node() { return nest_col_def_; } - JtColNode* reg_col_node(size_t i) { return reg_col_defs_.at(i); } - ObIArray& child_node_ref() { return child_idx_; } - void reset_reg_columns(JtScanCtx* ctx); - - TO_STRING_KV(K_(node_type), - K_(node_idx), - K_(is_regular_done), - K_(is_nested_done), - KP_(nest_col_def), - K(reg_col_defs_.count())); - - bool is_regular_done_; - bool is_nested_done_; - common::ObSEArray reg_col_defs_; - JtColNode* nest_col_def_; - - common::ObSEArray child_idx_; -}; - - -class JtFuncHelpler -{ -public: - static int cast_to_res(JtScanCtx* ctx, ObIJsonBase* js_val, JtColNode& col_info, bool enable_error); - static int cast_json_to_res(JtScanCtx* ctx, ObIJsonBase* js_val, JtColNode& col_node, ObDatum& res, bool enable_error = true); - static int cast_to_json(common::ObIAllocator *allocator, ObIJsonBase *j_base, ObString &val); - static int cast_to_bit(ObIJsonBase *j_base, uint64_t &val, common::ObAccuracy &accuracy); - static int bit_length_check(const ObAccuracy &accuracy, uint64_t &value); - static int cast_to_number(common::ObIAllocator *allocator, - ObIJsonBase *j_base, - common::ObAccuracy &accuracy, - ObObjType dst_type, - number::ObNumber &val); - static int cast_to_double(ObIJsonBase *j_base, ObObjType dst_type, double &val); - static int cast_to_float(ObIJsonBase *j_base, ObObjType dst_type, float &val); - static int cast_to_year(ObIJsonBase *j_base, uint8_t &val); - static int cast_to_time(ObIJsonBase *j_base, common::ObAccuracy &accuracy, int64_t &val); - static int cast_to_date(ObIJsonBase *j_base, int32_t &val); - static int cast_to_datetime(JtColNode* node, - ObIJsonBase *j_base, - common::ObIAllocator *allocator, - const ObBasicSessionInfo *session, - ObEvalCtx *ctx, - const ObExpr *expr, - common::ObAccuracy &accuracy, - int64_t &val); - static int cast_to_otimstamp(ObIJsonBase *j_base, - const ObBasicSessionInfo *session, - ObEvalCtx *ctx, - const ObExpr *expr, - common::ObAccuracy &accuracy, - ObObjType dst_type, - ObOTimestampData &out_val); - static bool type_cast_to_string(JtColNode* node, - ObString &json_string, - common::ObIAllocator *allocator, - ObIJsonBase *j_base, - ObAccuracy &accuracy); - static int cast_to_string(JtColNode* node, - common::ObIAllocator *allocator, - ObIJsonBase *j_base, - ObCollationType in_cs_type, - ObCollationType dst_cs_type, - common::ObAccuracy &accuracy, - ObObjType dst_type, - ObString &val, - bool is_trunc = false, - bool is_quote = false, - bool is_const = false); - static int padding_char_for_cast(int64_t padding_cnt, const ObCollationType &padding_cs_type, - ObIAllocator &alloc, ObString &padding_res); - static int time_scale_check(const ObAccuracy &accuracy, int64_t &value, bool strict = false); - static int datetime_scale_check(const ObAccuracy &accuracy, int64_t &value, bool strict = false); - static int number_range_check(const ObAccuracy &accuracy, - ObIAllocator *allocator, - number::ObNumber &val, - bool strict = false); - static int check_default_val_accuracy(const ObAccuracy &accuracy, - const ObObjType &type, - const ObDatum *obj); - static int check_default_value(JtScanCtx* ctx, - ObJtColInfo &col_info_, - ObExpr* col_expr); - static int cast_to_uint(ObIJsonBase *j_base, ObObjType dst_type, uint64_t &val); - static int cast_to_int(ObIJsonBase *j_base, ObObjType dst_type, int64_t &val); - - static int set_error_val(JtScanCtx* ctx, JtColNode& col_info, int& ret); - static int set_error_val_mysql(JtScanCtx* ctx, JtColNode& col_info, int& ret); - static int check_default_value_inner(JtScanCtx* ctx, - ObJtColInfo &col_info, - ObExpr* col_expr, - ObExpr* default_expr); - static int pre_default_value_check_mysql(JtScanCtx* ctx, - ObIJsonBase* js_val, - JtColNode& col_node); -}; - struct JtColTreeNode { JtColTreeNode(const ObJtColInfo& info) : col_base_info_(info), @@ -369,12 +126,13 @@ public: err_default_exprs_(alloc), has_correlated_expr_(false), alloc_(&alloc), - cols_def_(alloc) {} + cols_def_(alloc), + table_type_(MulModeTableType::OB_ORA_JSON_TABLE_TYPE), + namespace_def_(alloc) {} int dup_origin_column_defs(common::ObIArray& columns); - int construct_tree(common::ObArray all_nodes, JtScanNode* parent); - int construct_tree(common::ObArray all_nodes, JtJoinNode* parent); - + int construct_tree(common::ObArray all_nodes, JoinNode* parent); + int construct_tree(common::ObArray all_nodes, UnionNode* parent); ObExpr *value_expr_; common::ObFixedArray column_exprs_; // 列输出表达式 common::ObFixedArray emp_default_exprs_; @@ -383,6 +141,8 @@ public: ObIAllocator* alloc_; common::ObFixedArray cols_def_; + MulModeTableType table_type_; + common::ObFixedArray namespace_def_; }; class ObJsonTableOp; @@ -390,18 +150,34 @@ class ObJsonTableOp; struct JtScanCtx { JtScanCtx() : row_alloc_(), - op_exec_alloc_(nullptr) {} + op_exec_alloc_(nullptr), + context(nullptr), + table_func_(nullptr), + default_ns() {} + + bool is_xml_table_func() { + return spec_ptr_->table_type_ == OB_ORA_XML_TABLE_TYPE; + } + bool is_json_table_func() { + return spec_ptr_->table_type_ == OB_ORA_JSON_TABLE_TYPE; + } + ObJsonTableSpec* spec_ptr_; ObEvalCtx* eval_ctx_; ObExecContext* exec_ctx_; common::ObArenaAllocator row_alloc_; + ObMulModeMemCtx *mem_ctx_; + ObMulModeMemCtx *xpath_ctx_; ObIAllocator *op_exec_alloc_; ObJsonTableOp* jt_op_; + void *context; // xml ns prefix_ns + MulModeTableFunc *table_func_; + ObString default_ns; bool is_evaled_; bool is_cover_error_; - bool is_need_end_; + bool is_need_end_; // parse input json doc fail will affect by column bool is_charset_converted_; bool is_const_input_; int error_code_; @@ -418,11 +194,11 @@ public: ObJsonTableOp(ObExecContext &exec_ctx, const ObOpSpec &spec, ObOpInput *input) : ObOperator(exec_ctx, spec, input), def_root_(nullptr), - jt_root_(nullptr), allocator_(&exec_ctx.get_allocator()), is_inited_(false), is_evaled_(false), - in_(nullptr), + root_(nullptr), + input_(nullptr), j_null_(), j_arr_(allocator_), j_obj_(allocator_) @@ -441,37 +217,317 @@ public: ObJsonNull* get_js_null() { return &j_null_; } ObJsonArray* get_js_array() { return &j_arr_; } ObJsonObject* get_js_object() { return &j_obj_; } - ObIJsonBase* get_root_param() { return in_; } - JtScanNode* get_root_entry() { return jt_root_; } + void* get_root_param() { return input_; } + JoinNode* get_root_entry() { return root_; } TO_STRING_KV(K_(is_inited), K_(col_count)); private: int init(); + int reset_variable(); // reset var in rescan int init_data_obj(); // allocate data_ array - void reset_columns(); int generate_column_trees(JtColTreeNode*& root); int find_column(int32_t id, JtColTreeNode* root, JtColTreeNode*& col); int generate_table_exec_tree(); int generate_table_exec_tree(ObIAllocator* allocator, const JtColTreeNode& orig_col, - JtScanNode*& scan_col, int64_t& node_idx); + JoinNode*& join_col); + int inner_get_next_row_jt(); // json table function inner private: JtColTreeNode* def_root_; - JtScanNode* jt_root_; common::ObIAllocator *allocator_; uint32_t col_count_; bool is_inited_; bool is_evaled_; - ObIJsonBase* in_; + JoinNode* root_; // mul mode table JtScanCtx jt_ctx_; +public: + void *input_; // mul mode table + void reset_columns(); + private: ObJsonNull j_null_; ObJsonArray j_arr_; ObJsonObject j_obj_; }; +class MulModeTableFunc { +public: + MulModeTableFunc() {} + ~MulModeTableFunc() {} + + virtual int init_ctx(ObRegCol &scan_node, JtScanCtx*& ctx) { return 0; } // init variable before seek value + virtual int eval_default_value(JtScanCtx*& ctx, ObExpr*& default_expr, void*& res, bool need_datum) { return 0; } // eval default value in inner open + virtual int container_at(void* in, void *&out, int32_t pos) { return 0; } + virtual int eval_input(ObJsonTableOp &jt, JtScanCtx& ctx, ObEvalCtx &eval_ctx) { return 0; } // get root input + virtual int reset_path_iter(ObRegCol &scan_node, void* in, JtScanCtx*& ctx, ScanType init_flag, bool &is_null_value) { return 0; } + virtual int get_iter_value(ObRegCol &col_node, JtScanCtx* ctx, bool &is_null_value) { return 0; }; + virtual int eval_seek_col(ObRegCol &col_node, void* in, JtScanCtx* ctx, bool &is_null_value, bool &need_cast_res) { return 0; } + virtual int col_res_type_check(ObRegCol &col_node, JtScanCtx* ctx) { return 0; } + virtual int check_default_value(JtScanCtx* ctx, ObRegCol &col_node, ObExpr* expr) { return 0; } + virtual int set_on_empty(ObRegCol& col_node, JtScanCtx* ctx, bool &need_cast, bool& is_null) { return 0; } + virtual int set_on_error(ObRegCol& col_node, JtScanCtx* ctx, int& ret) { return 0; } + virtual int cast_to_result(ObRegCol& col_node, JtScanCtx* ctx, bool enable_error = false, bool is_pack_result = true) { return 0; } + virtual int set_expr_exec_param(ObRegCol& col_node, JtScanCtx* ctx) { return 0; } + virtual int reset_ctx(ObRegCol &scan_node, JtScanCtx*& ctx) { return 0; } // init variable before seek value +}; + +class JsonTableFunc : public MulModeTableFunc { +public: + JsonTableFunc() + : MulModeTableFunc() {} + ~JsonTableFunc() {} + + int init_ctx(ObRegCol &scan_node, JtScanCtx*& ctx); + int eval_default_value(JtScanCtx*& ctx, ObExpr*& default_expr, void*& res, bool need_datum); + int container_at(void* in, void *&out, int32_t pos); + int eval_input(ObJsonTableOp &jt, JtScanCtx& ctx, ObEvalCtx &eval_ctx); + int reset_path_iter(ObRegCol &scan_node, void* in, JtScanCtx*& ctx, ScanType init_flag, bool &is_null_value); + int get_iter_value(ObRegCol &col_node, JtScanCtx* ctx, bool &is_null_value); + int eval_seek_col(ObRegCol &col_node, void* in, JtScanCtx* ctx, bool &is_null_value, bool &need_cast_res); + int col_res_type_check(ObRegCol &col_node, JtScanCtx* ctx); + int check_default_value(JtScanCtx* ctx, ObRegCol &col_node, ObExpr* expr); + int set_on_empty(ObRegCol& col_node, JtScanCtx* ctx, bool &need_cast, bool& is_null); + int set_on_error(ObRegCol& col_node, JtScanCtx* ctx, int& ret); + int cast_to_result(ObRegCol& col_node, JtScanCtx* ctx, bool enable_error = false, bool is_pack_result = true); + int set_expr_exec_param(ObRegCol& col_node, JtScanCtx* ctx); + int reset_ctx(ObRegCol &scan_node, JtScanCtx*& ctx) { return OB_SUCCESS; } // reset variable if variable is not const +}; + +class XmlTableFunc : public MulModeTableFunc { +public: + XmlTableFunc() + : MulModeTableFunc() {} + ~XmlTableFunc() {} + + int init_ctx(ObRegCol &scan_node, JtScanCtx*& ctx); + int eval_default_value(JtScanCtx*& ctx, ObExpr*& default_expr, void*& res, bool need_datum); + int container_at(void* in, void *&out, int32_t pos); + int eval_input(ObJsonTableOp &jt, JtScanCtx& ctx, ObEvalCtx &eval_ctx); // 获取跟节点输入 xml 解析namespace + int reset_path_iter(ObRegCol &scan_node, void* in, JtScanCtx*& ctx, ScanType init_flag, bool &is_null_value); + int get_iter_value(ObRegCol &col_node, JtScanCtx* ctx, bool &is_null_value); + int eval_seek_col(ObRegCol &col_node, void* in, JtScanCtx* ctx, bool &is_null_value, bool &need_cast_res); + int col_res_type_check(ObRegCol &col_node, JtScanCtx* ctx); + int check_default_value(JtScanCtx* ctx, ObRegCol &col_node, ObExpr* expr); + int set_on_empty(ObRegCol& col_node, JtScanCtx* ctx, bool &need_cast, bool& is_null); + int set_on_error(ObRegCol& col_node, JtScanCtx* ctx, int& ret); + int cast_to_result(ObRegCol& col_node, JtScanCtx* ctx, bool enable_error = false, bool is_pack_result = true); + int set_expr_exec_param(ObRegCol& col_node, JtScanCtx* ctx) { return 0; } + int reset_ctx(ObRegCol &scan_node, JtScanCtx*& ctx); // reset variable if variable is not const +}; + +class ObMultiModeTableNode { +public: + ObMultiModeTableNode() + : in_(nullptr), + is_evaled_(false), + is_null_result_(false) { + node_type_ = REG_TYPE; + } + virtual void destroy(); + virtual int reset(JtScanCtx* ctx) { + UNUSED(ctx); + is_evaled_ = false; + is_null_result_ = false; + return OB_SUCCESS; + } + JtNodeType node_type() { return node_type_; } + virtual int get_next_row(void* in, JtScanCtx* ctx, bool& is_null_value) { return 0; } + bool is_null_result() { return is_null_result_; } + virtual int open(JtScanCtx* ctx) { + return ObMultiModeTableNode::reset(ctx); + } + virtual void* get_curr_iter_value() { return nullptr; } + + JtNodeType node_type_; + void* in_; + bool is_evaled_; // 节点是否被解析过 + bool is_null_result_; // 节点是否返回空值 + + TO_STRING_KV(K_(node_type), + K_(is_evaled), + K_(node_type)); +}; + +class ObRegCol final +{ +public: + ObRegCol(const ObJtColInfo& col_info) + : total_(0), + path_(nullptr), + curr_(nullptr), + iter_(nullptr), + cur_pos_(-1), + ord_val_(-1), + is_path_evaled_(false), + is_emp_evaled_(false), + is_err_evaled_(false), + emp_val_(nullptr), + err_val_(nullptr), + res_flag_(NOT_DATUM), + expr_param_() { + new (&col_info_) ObJtColInfo(col_info); + node_type_ = REG_TYPE; + is_ord_node_ = col_info_.col_type_ == COL_TYPE_ORDINALITY; + } + void destroy(); + int reset(JtScanCtx* ctx); + bool is_ord_node() { return is_ord_node_; } + const ObJtColInfo& get_column_node_def() { return col_info_; } + ObJtColInfo& get_column_def() { return col_info_; } + int open(JtScanCtx* ctx); + int eval_regular_col(void *in, JtScanCtx* ctx, bool& is_null_value); // process regular column + JtColType type() { return static_cast(col_info_.col_type_); } + JtNodeType node_type() { return node_type_; } + + int32_t total_; + void *path_; + void* curr_; + void* iter_; + int32_t cur_pos_; + int32_t ord_val_; + bool is_path_evaled_; + ObJtColInfo col_info_; + bool is_ord_node_; + bool is_emp_evaled_; + bool is_err_evaled_; + void *emp_val_; + void *err_val_; + // data in curr_ is datum type, not json/xml ,0 is not datum, 1 is empty datum, 2 is error datum. + ResultType res_flag_; + MulModeTableType tab_type_; // distinct xml path or json path , due to not common root + JtNodeType node_type_; // distinct regcol in scan node or regcol + ObJsonExprParam expr_param_; + TO_STRING_KV(K_(node_type), + K_(cur_pos), + K_(ord_val), + K_(is_ord_node), + K_(node_type), + K_(col_info)); + +}; + +class ScanNode : public ObMultiModeTableNode +{ + public: + ScanNode(const ObJtColInfo& col_info) + : ObMultiModeTableNode(), + reg_col_defs_(), + child_idx_(), + seek_node_(col_info) + { + new (&seek_node_) ObRegCol(col_info); + seek_node_.node_type_ = SCAN_TYPE; + node_type_ = SCAN_TYPE; + } + void destroy(); + int reset(JtScanCtx* ctx); + int open(JtScanCtx* ctx); + int get_next_row(void* in, JtScanCtx* ctx, bool& is_null_value); + int get_next_iter(void* in, JtScanCtx* ctx, bool& is_null_value); + int assign(const ScanNode& other); + int add_reg_column_node(ObRegCol* node, bool add_idx = false); + size_t reg_column_count() { return reg_col_defs_.count(); } + ObRegCol* reg_col_node(size_t i) { return reg_col_defs_.at(i); } + ObIArray& child_node_ref() { return child_idx_; } + void reset_reg_columns(JtScanCtx* ctx); + void* get_curr_iter_value() { + return seek_node_.iter_; + } + + TO_STRING_KV(K_(node_type), + K(reg_col_defs_.count())); + common::ObSEArray reg_col_defs_; + common::ObSEArray child_idx_; + ObRegCol seek_node_; +}; + +class UnionNode : public ObMultiModeTableNode +{ +public: + UnionNode() + : ObMultiModeTableNode(), // useless + left_(nullptr), + right_(nullptr), + is_left_iter_end_(false), + is_right_iter_end_(true) { + node_type_ = UNION_TYPE; + } + void destroy(); + int reset(JtScanCtx* ctx); + int open(JtScanCtx* ctx); + int get_next_row(void* in, JtScanCtx* ctx, bool& is_null_value); + void set_left(ObMultiModeTableNode* node) { + left_ = node; + } + void set_right(ObMultiModeTableNode* node) { + right_ = node; + } + ObMultiModeTableNode* left() { return left_; } + ObMultiModeTableNode* right() { return right_; } + void* get_curr_iter_value() { + return left_->get_curr_iter_value(); + } + + TO_STRING_KV(K_(node_type), + KP_(left), + KP_(right)); + + ObMultiModeTableNode *left_; + ObMultiModeTableNode *right_; + bool is_left_iter_end_; // judge left child whether evaled_, + bool is_right_iter_end_; // judge right child whether evaled_, +}; + +class JoinNode : public UnionNode +{ + public: + JoinNode() + : UnionNode() + {} + int get_next_row(void* in, JtScanCtx* ctx, bool& is_null_value); + int assign(const JoinNode& other); + ScanNode* get_scan_node() { return dynamic_cast(left_); } + // void* get_curr_iter_value() { return get_scan_node()->get_curr_iter_value(); } + + TO_STRING_KV(K_(node_type)); +}; + +class RegularCol final{ +public: + static int eval_query_col(ObRegCol &col_node, JtScanCtx* ctx, ObExpr* col_expr, bool& is_null); + static int eval_value_col(ObRegCol &col_node, JtScanCtx* ctx, ObExpr* col_expr, bool& is_null); + static int eval_exist_col(ObRegCol &col_node, JtScanCtx* ctx, ObExpr* col_expr, bool& is_null); + static int eval_xml_scalar_col(ObRegCol &col_node, JtScanCtx* ctx, ObExpr* col_expr); + static int eval_xml_type_col(ObRegCol &col_node, JtScanCtx* ctx, ObExpr* col_expr); + static void proc_query_on_error(JtScanCtx* ctx, ObRegCol &col_node, int& ret, bool& is_null); + static int check_default_val_cast_allowed(JtScanCtx* ctx, ObMultiModeTableNode &col_node, ObExpr* expr) { return 0; } // check type of default value + static int set_val_on_empty(JtScanCtx* ctx, ObRegCol &col_node, bool& need_cast_res, bool& is_null); + static bool check_cast_allowed(const ObObjType orig_type, + const ObCollationType orig_cs_type, + const ObObjType expect_type, + const ObCollationType expect_cs_type, + const bool is_explicit_cast); + static int check_default_value_oracle(JtScanCtx* ctx, ObJtColInfo &col_info, ObExpr* expr); + static int check_default_value_inner_oracle(JtScanCtx* ctx, + ObJtColInfo &col_info, + ObExpr* col_expr, + ObExpr* default_expr); + static int check_item_method_json(ObRegCol &col_node, JtScanCtx* ctx); + static int check_default_value_inner_mysql(JtScanCtx* ctx, + ObRegCol &col_node, + ObExpr* default_expr, + ObExpr* expr, + ObIJsonBase* j_base); + static int parse_default_value_2json(ObExpr* default_expr, + JtScanCtx* ctx, + ObDatum*& tmp_datum, + ObIJsonBase *&res); + static int check_default_value_mysql(ObRegCol &col_node, JtScanCtx* ctx, ObExpr* expr); +}; + + } // end namespace sql } // end namespace oceanbase diff --git a/src/sql/engine/dml/ob_dml_service.cpp b/src/sql/engine/dml/ob_dml_service.cpp index aed26b3f56..ce9939f411 100644 --- a/src/sql/engine/dml/ob_dml_service.cpp +++ b/src/sql/engine/dml/ob_dml_service.cpp @@ -351,7 +351,11 @@ int ObDMLService::check_lob_column_changed(ObEvalCtx &eval_ctx, cmp_params.compare_len_ = UINT64_MAX; cmp_params.timeout_ = timeout; cmp_params.tx_desc_ = eval_ctx.exec_ctx_.get_my_session()->get_tx_desc(); - if(OB_FAIL(lob_mngr->equal(old_lob, new_lob, cmp_params, is_equal))) { + if (old_lob.is_persist_lob() && new_lob.is_delta_temp_lob()) { + if (OB_FAIL(ObDeltaLob::has_diff(new_lob, result))) { + LOG_WARN("delata lob has_diff fail", K(ret), K(old_lob), K(new_lob)); + } + } else if(OB_FAIL(lob_mngr->equal(old_lob, new_lob, cmp_params, is_equal))) { LOG_WARN("fail to compare lob", K(ret), K(old_lob), K(new_lob)); } else { result = is_equal ? 0 : 1; diff --git a/src/sql/engine/expr/ob_datum_cast.cpp b/src/sql/engine/expr/ob_datum_cast.cpp index 77909e1a19..14298a5945 100644 --- a/src/sql/engine/expr/ob_datum_cast.cpp +++ b/src/sql/engine/expr/ob_datum_cast.cpp @@ -17,6 +17,7 @@ #include "lib/utility/ob_fast_convert.h" #include "share/object/ob_obj_cast_util.h" #include "share/object/ob_obj_cast.h" +#include "share/ob_json_access_utils.h" #include "sql/engine/expr/ob_datum_cast.h" #include "sql/engine/ob_exec_context.h" #include "sql/engine/expr/ob_expr_util.h" @@ -30,12 +31,10 @@ #include "sql/engine/expr/ob_expr_json_func_helper.h" #include "lib/geo/ob_geometry_cast.h" #include "sql/engine/expr/ob_geo_expr_utils.h" -#ifdef OB_BUILD_ORACLE_XML #include "lib/udt/ob_udt_type.h" #include "sql/engine/expr/ob_expr_sql_udt_utils.h" #include "lib/xml/ob_xml_util.h" #include "sql/engine/expr/ob_expr_xml_func_helper.h" -#endif #include "pl/ob_pl.h" #include "pl/ob_pl_user_type.h" #ifdef OB_BUILD_ORACLE_PL @@ -2295,7 +2294,7 @@ static int common_json_string(const ObExpr &expr, expr.args_[0]->datum_meta_, expr.args_[0]->obj_meta_.has_lob_header(), j_bin_str))) { LOG_WARN("fail to get real data.", K(ret), K(j_bin_str)); } else { - ObJsonBin j_bin(j_bin_str.ptr(), j_bin_str.length()); + ObJsonBin j_bin(j_bin_str.ptr(), j_bin_str.length(), &allocator); ObIJsonBase *j_base = &j_bin; ObJsonBuffer j_buf(&allocator); ObString j_str; @@ -3048,7 +3047,7 @@ CAST_FUNC_NAME(int, json) } ObString raw_bin; - if (OB_FAIL(j_base->get_raw_binary(raw_bin, &temp_allocator))) { + if (OB_FAIL(ObJsonWrapper::get_raw_binary(j_base, raw_bin, &temp_allocator))) { LOG_WARN("fail to get int json binary", K(ret), K(in_type), K(in_val)); } else if (OB_FAIL(common_json_bin(expr, ctx, res_datum, raw_bin))) { LOG_WARN("fail to fill json bin lob locator", K(ret)); @@ -3314,7 +3313,7 @@ CAST_FUNC_NAME(uint, json) common::ObArenaAllocator &temp_allocator = tmp_alloc_g.get_allocator(); ObString raw_bin; - if (OB_FAIL(j_base->get_raw_binary(raw_bin, &temp_allocator))) { + if (OB_FAIL(ObJsonWrapper::get_raw_binary(j_base, raw_bin, &temp_allocator))) { LOG_WARN("fail to get uint json binary", K(ret), K(in_type), K(in_val)); } else if (OB_FAIL(common_json_bin(expr, ctx, res_datum, raw_bin))) { LOG_WARN("fail to fill json bin lob locator", K(ret)); @@ -3707,7 +3706,7 @@ static int common_string_json(const ObExpr &expr, if (OB_SUCC(ret) && !is_null_res) { ObString raw_bin; - if (OB_FAIL(j_base->get_raw_binary(raw_bin, &temp_allocator))) { + if (OB_FAIL(ObJsonWrapper::get_raw_binary(j_base, raw_bin, &temp_allocator))) { LOG_WARN("fail to get string json binary", K(ret), K(in_type), K(raw_bin)); } else if (OB_FAIL(common_json_bin(expr, ctx, res_datum, raw_bin))) { LOG_WARN("fail to fill json bin lob locator", K(ret)); @@ -4365,7 +4364,7 @@ static int common_number_json(const number::ObNumber &nmb, const ObObjType in_ty ObString raw_bin; if (OB_FAIL(tmp_num.from(nmb, temp_allocator))) { LOG_WARN("copy number failed", K(ret)); - } else if (OB_FAIL(j_base->get_raw_binary(raw_bin, &temp_allocator))) { + } else if (OB_FAIL(ObJsonWrapper::get_raw_binary(j_base, raw_bin, &temp_allocator))) { LOG_WARN("fail to get number json binary", K(ret), K(in_type), K(nmb)); } else if (OB_FAIL(common_json_bin(expr, ctx, res_datum, raw_bin))) { LOG_WARN("fail to fill json bin lob locator", K(ret)); @@ -4645,7 +4644,7 @@ CAST_FUNC_NAME(float, json) common::ObArenaAllocator &temp_allocator = tmp_alloc_g.get_allocator(); ObString raw_bin; - if (OB_FAIL(j_base->get_raw_binary(raw_bin, &temp_allocator))) { + if (OB_FAIL(ObJsonWrapper::get_raw_binary(j_base, raw_bin, &temp_allocator))) { LOG_WARN("fail to get float json binary", K(ret), K(in_type), K(in_val)); } else if (OB_FAIL(common_json_bin(expr, ctx, res_datum, raw_bin))) { LOG_WARN("fail to fill json bin lob locator", K(ret)); @@ -5026,7 +5025,7 @@ CAST_FUNC_NAME(double, json) ObEvalCtx::TempAllocGuard tmp_alloc_g(ctx); common::ObArenaAllocator &temp_allocator = tmp_alloc_g.get_allocator(); ObString raw_bin; - if (OB_FAIL(j_base->get_raw_binary(raw_bin, &temp_allocator))) { + if (OB_FAIL(ObJsonWrapper::get_raw_binary(j_base, raw_bin, &temp_allocator))) { LOG_WARN("fail to get double json binary", K(ret), K(in_type), K(in_val)); } else if (OB_FAIL(common_json_bin(expr, ctx, res_datum, raw_bin))) { LOG_WARN("fail to fill json bin lob locator", K(ret)); @@ -5511,7 +5510,7 @@ CAST_FUNC_NAME(datetime, json) common::ObArenaAllocator &temp_allocator = tmp_alloc_g.get_allocator(); ObString raw_bin; - if (OB_FAIL(j_base->get_raw_binary(raw_bin, &temp_allocator))) { + if (OB_FAIL(ObJsonWrapper::get_raw_binary(j_base, raw_bin, &temp_allocator))) { LOG_WARN("fail to get datetime json binary", K(ret), K(in_type), K(in_val)); } else if (OB_FAIL(common_json_bin(expr, ctx, res_datum, raw_bin))) { LOG_WARN("fail to fill json bin lob locator", K(ret)); @@ -5833,7 +5832,7 @@ CAST_FUNC_NAME(date, json) common::ObArenaAllocator &temp_allocator = tmp_alloc_g.get_allocator(); ObString raw_bin; - if (OB_FAIL(j_base->get_raw_binary(raw_bin, &temp_allocator))) { + if (OB_FAIL(ObJsonWrapper::get_raw_binary(j_base, raw_bin, &temp_allocator))) { LOG_WARN("fail to get date json binary", K(ret), K(in_type), K(in_val)); } else if (OB_FAIL(common_json_bin(expr, ctx, res_datum, raw_bin))) { LOG_WARN("fail to fill json bin lob locator", K(ret)); @@ -6120,7 +6119,7 @@ CAST_FUNC_NAME(year, json) common::ObArenaAllocator &temp_allocator = tmp_alloc_g.get_allocator(); ObString raw_bin; - if (OB_FAIL(j_base->get_raw_binary(raw_bin, &temp_allocator))) { + if (OB_FAIL(ObJsonWrapper::get_raw_binary(j_base, raw_bin, &temp_allocator))) { LOG_WARN("fail to get year json binary", K(ret), K(in_type), K(in_val)); } else if (OB_FAIL(common_json_bin(expr, ctx, res_datum, raw_bin))) { LOG_WARN("fail to fill json bin lob locator", K(ret)); @@ -6516,7 +6515,7 @@ CAST_FUNC_NAME(bit, json) ObIJsonBase *j_base = &j_opaque; ObString raw_bin; - if (OB_FAIL(j_base->get_raw_binary(raw_bin, &temp_allocator))) { + if (OB_FAIL(ObJsonWrapper::get_raw_binary(j_base, raw_bin, &temp_allocator))) { LOG_WARN("fail to get int json binary", K(ret), K(in_val), K(buf), K(BUF_LEN)); } else if (OB_FAIL(common_json_bin(expr, ctx, res_datum, raw_bin))) { LOG_WARN("fail to fill json bin lob locator", K(ret)); @@ -7153,7 +7152,7 @@ CAST_FUNC_NAME(time, json) common::ObArenaAllocator &temp_allocator = tmp_alloc_g.get_allocator(); ObString raw_bin; - if (OB_FAIL(j_base->get_raw_binary(raw_bin, &temp_allocator))) { + if (OB_FAIL(ObJsonWrapper::get_raw_binary(j_base, raw_bin, &temp_allocator))) { LOG_WARN("fail to get time json binary", K(ret), K(in_type), K(in_val)); } else if (OB_FAIL(common_json_bin(expr, ctx, res_datum, raw_bin))) { LOG_WARN("fail to fill json bin lob locator", K(ret)); @@ -7908,7 +7907,7 @@ CAST_FUNC_NAME(json, int) expr.args_[0]->datum_meta_, expr.args_[0]->obj_meta_.has_lob_header(), j_bin_str))) { LOG_WARN("fail to get real data.", K(ret), K(j_bin_str)); } else { - ObJsonBin j_bin(j_bin_str.ptr(), j_bin_str.length()); + ObJsonBin j_bin(j_bin_str.ptr(), j_bin_str.length(), &temp_allocator); ObIJsonBase *j_base = &j_bin; if (OB_FAIL(j_bin.reset_iter())) { @@ -7942,7 +7941,7 @@ CAST_FUNC_NAME(json, uint) expr.args_[0]->datum_meta_, expr.args_[0]->obj_meta_.has_lob_header(), j_bin_str))) { LOG_WARN("fail to get real data.", K(ret), K(j_bin_str)); } else { - ObJsonBin j_bin(j_bin_str.ptr(), j_bin_str.length()); + ObJsonBin j_bin(j_bin_str.ptr(), j_bin_str.length(), &temp_allocator); ObIJsonBase *j_base = &j_bin; if (OB_FAIL(j_bin.reset_iter())) { @@ -7977,7 +7976,7 @@ CAST_FUNC_NAME(json, double) expr.args_[0]->datum_meta_, expr.args_[0]->obj_meta_.has_lob_header(), j_bin_str))) { LOG_WARN("fail to get real data.", K(ret), K(j_bin_str)); } else { - ObJsonBin j_bin(j_bin_str.ptr(), j_bin_str.length()); + ObJsonBin j_bin(j_bin_str.ptr(), j_bin_str.length(), &temp_allocator); ObIJsonBase *j_base = &j_bin; if (OB_FAIL(j_bin.reset_iter())) { @@ -8011,7 +8010,7 @@ CAST_FUNC_NAME(json, float) expr.args_[0]->datum_meta_, expr.args_[0]->obj_meta_.has_lob_header(), j_bin_str))) { LOG_WARN("fail to get real data.", K(ret), K(j_bin_str)); } else { - ObJsonBin j_bin(j_bin_str.ptr(), j_bin_str.length()); + ObJsonBin j_bin(j_bin_str.ptr(), j_bin_str.length(), &temp_allocator); ObIJsonBase *j_base = &j_bin; if (OB_FAIL(j_bin.reset_iter())) { @@ -8040,7 +8039,7 @@ static int common_json_number(common::ObDatum &child_res, const ObExpr &expr, j_bin_str))) { LOG_WARN("fail to get real data", K(ret), K(j_bin_str)); } else { - ObJsonBin j_bin(j_bin_str.ptr(), j_bin_str.length()); + ObJsonBin j_bin(j_bin_str.ptr(), j_bin_str.length(), &alloc); ObIJsonBase *j_base = &j_bin; if (OB_FAIL(j_bin.reset_iter())) { @@ -8092,7 +8091,7 @@ CAST_FUNC_NAME(json, datetime) expr.args_[0]->datum_meta_, expr.args_[0]->obj_meta_.has_lob_header(), j_bin_str))) { LOG_WARN("fail to get real data.", K(ret), K(j_bin_str)); } else { - ObJsonBin j_bin(j_bin_str.ptr(), j_bin_str.length()); + ObJsonBin j_bin(j_bin_str.ptr(), j_bin_str.length(), &temp_allocator); ObIJsonBase *j_base = &j_bin; if (OB_FAIL(j_bin.reset_iter())) { @@ -8124,7 +8123,7 @@ CAST_FUNC_NAME(json, date) expr.args_[0]->datum_meta_, expr.args_[0]->obj_meta_.has_lob_header(), j_bin_str))) { LOG_WARN("fail to get real data.", K(ret), K(j_bin_str)); } else { - ObJsonBin j_bin(j_bin_str.ptr(), j_bin_str.length()); + ObJsonBin j_bin(j_bin_str.ptr(), j_bin_str.length(), &temp_allocator); ObIJsonBase *j_base = &j_bin; if (OB_FAIL(j_bin.reset_iter())) { @@ -8155,7 +8154,7 @@ CAST_FUNC_NAME(json, time) expr.args_[0]->datum_meta_, expr.args_[0]->obj_meta_.has_lob_header(), j_bin_str))) { LOG_WARN("fail to get real data.", K(ret), K(j_bin_str)); } else { - ObJsonBin j_bin(j_bin_str.ptr(), j_bin_str.length()); + ObJsonBin j_bin(j_bin_str.ptr(), j_bin_str.length(), &temp_allocator); ObIJsonBase *j_base = &j_bin; if (OB_FAIL(j_bin.reset_iter())) { @@ -8187,7 +8186,7 @@ CAST_FUNC_NAME(json, year) expr.args_[0]->datum_meta_, expr.args_[0]->obj_meta_.has_lob_header(), j_bin_str))) { LOG_WARN("fail to get real data.", K(ret), K(j_bin_str)); } else { - ObJsonBin j_bin(j_bin_str.ptr(), j_bin_str.length()); + ObJsonBin j_bin(j_bin_str.ptr(), j_bin_str.length(), &temp_allocator); ObIJsonBase *j_base = &j_bin; if (OB_FAIL(j_bin.reset_iter())) { @@ -8219,7 +8218,7 @@ CAST_FUNC_NAME(json, raw) ObEvalCtx::TempAllocGuard tmp_alloc_g(ctx); common::ObArenaAllocator &temp_allocator = tmp_alloc_g.get_allocator(); ObString j_bin_str = child_res->get_string(); - ObJsonBin j_bin(j_bin_str.ptr(), j_bin_str.length()); + ObJsonBin j_bin(j_bin_str.ptr(), j_bin_str.length(), &temp_allocator); ObIJsonBase *j_base = &j_bin; ObJsonBuffer j_buf(&temp_allocator); ObDatum t_res_datum; @@ -8270,7 +8269,7 @@ CAST_FUNC_NAME(json, string) expr.args_[0]->datum_meta_, expr.args_[0]->obj_meta_.has_lob_header(), j_bin_str))) { LOG_WARN("fail to get real data.", K(ret), K(j_bin_str)); } else { - ObJsonBin j_bin(j_bin_str.ptr(), j_bin_str.length()); + ObJsonBin j_bin(j_bin_str.ptr(), j_bin_str.length(), &temp_allocator); ObIJsonBase *j_base = &j_bin; ObJsonBuffer j_buf(&temp_allocator); @@ -8325,7 +8324,7 @@ CAST_FUNC_NAME(json, bit) expr.args_[0]->datum_meta_, expr.args_[0]->obj_meta_.has_lob_header(), j_bin_str))) { LOG_WARN("fail to get real data.", K(ret), K(j_bin_str)); } else { - ObJsonBin j_bin(j_bin_str.ptr(), j_bin_str.length()); + ObJsonBin j_bin(j_bin_str.ptr(), j_bin_str.length(), &temp_allocator); ObIJsonBase *j_base = &j_bin; uint64_t out_val; ObObjType out_type = expr.datum_meta_.type_; @@ -8358,7 +8357,7 @@ CAST_FUNC_NAME(json, otimestamp) expr.args_[0]->datum_meta_, expr.args_[0]->obj_meta_.has_lob_header(), j_bin_str))) { LOG_WARN("fail to get real data.", K(ret), K(j_bin_str)); } else { - ObJsonBin j_bin(j_bin_str.ptr(), j_bin_str.length()); + ObJsonBin j_bin(j_bin_str.ptr(), j_bin_str.length(), &temp_allocator); ObIJsonBase *j_base = &j_bin; if (OB_FAIL(j_bin.reset_iter())) { @@ -9015,7 +9014,6 @@ CAST_FUNC_NAME(string, udt) { EVAL_STRING_ARG() { -#ifdef OB_BUILD_ORACLE_XML const ObObjMeta &in_obj_meta = expr.args_[0]->obj_meta_; ObObjType in_type = expr.args_[0]->datum_meta_.type_; ObObjType out_type = expr.datum_meta_.type_; @@ -9063,9 +9061,6 @@ CAST_FUNC_NAME(string, udt) LOG_WARN("pack_xml_res failed", K(ret)); } } -#else - ret = OB_NOT_SUPPORTED; -#endif } return ret; } @@ -9089,7 +9084,6 @@ CAST_FUNC_NAME(udt, string) // udt(xmltype) can be null: select dump(xmlparse(document NULL)) from dual; res_datum.set_null(); } else { -#ifdef OB_BUILD_ORACLE_XML ObString blob_data = child_res->get_string(); ObEvalCtx::TempAllocGuard tmp_alloc_g(ctx); common::ObArenaAllocator &temp_allocator = tmp_alloc_g.get_allocator(); @@ -9120,9 +9114,6 @@ CAST_FUNC_NAME(udt, string) LOG_WARN("fail to deep copy str", K(ret)); } } -#else - ret = OB_NOT_SUPPORTED; -#endif } return ret; } @@ -9151,7 +9142,6 @@ CAST_FUNC_NAME(udt, udt) { EVAL_STRING_ARG() { -#ifdef OB_BUILD_ORACLE_XML const ObObjMeta &in_obj_meta = expr.args_[0]->obj_meta_; const ObObjMeta &out_obj_meta = expr.obj_meta_; uint64_t in_udt_id = T_OBJ_NOT_SUPPORTED; @@ -9171,9 +9161,6 @@ CAST_FUNC_NAME(udt, udt) } else { ret = cast_udt_to_other_not_support(expr, ctx, res_datum); } -#else - ret = OB_NOT_SUPPORTED; -#endif } return ret; } @@ -9182,7 +9169,7 @@ CAST_FUNC_NAME(pl_extend, string) { EVAL_STRING_ARG() { -#ifdef OB_BUILD_ORACLE_XML +#ifdef OB_BUILD_ORACLE_PL const ObObjMeta &in_obj_meta = expr.args_[0]->obj_meta_; const ObObjMeta &out_obj_meta = expr.obj_meta_; if (pl::PL_OPAQUE_TYPE == in_obj_meta.get_extend_type()) { @@ -9402,7 +9389,7 @@ CAST_FUNC_NAME(pl_extend, sql_udt) // should set subschema_id on output obj_meta in code generation EVAL_STRING_ARG() { -#ifdef OB_BUILD_ORACLE_XML +#ifdef OB_BUILD_ORACLE_PL const ObObjMeta &in_obj_meta = expr.args_[0]->obj_meta_; const ObObjType in_type = in_obj_meta.get_type(); const ObObjMeta &out_obj_meta = expr.obj_meta_; diff --git a/src/sql/engine/expr/ob_expr_cmp_func.cpp b/src/sql/engine/expr/ob_expr_cmp_func.cpp index f75a49ff1b..c7ddb5b786 100644 --- a/src/sql/engine/expr/ob_expr_cmp_func.cpp +++ b/src/sql/engine/expr/ob_expr_cmp_func.cpp @@ -559,8 +559,8 @@ struct ObRelationalExtraFunc } else if (OB_FAIL(r_instr_iter.get_full_data(r_data))) { COMMON_LOG(WARN, "LobDebug: get right lob str iter full data failed ", K(ret), K(r_instr_iter)); } else { - ObJsonBin j_bin_l(l_data.ptr(), l_data.length()); - ObJsonBin j_bin_r(r_data.ptr(), r_data.length()); + ObJsonBin j_bin_l(l_data.ptr(), l_data.length(), &allocator); + ObJsonBin j_bin_r(r_data.ptr(), r_data.length(), &allocator); ObIJsonBase *j_base_l = &j_bin_l; ObIJsonBase *j_base_r = &j_bin_r; diff --git a/src/sql/engine/expr/ob_expr_column_conv.cpp b/src/sql/engine/expr/ob_expr_column_conv.cpp index 83edd1db50..3fc5e61c5d 100644 --- a/src/sql/engine/expr/ob_expr_column_conv.cpp +++ b/src/sql/engine/expr/ob_expr_column_conv.cpp @@ -471,7 +471,7 @@ int ObExprColumnConv::column_convert(const ObExpr &expr, ObLobLocatorV2 input_lob(raw_str.ptr(), raw_str.length(), has_lob_header); bool is_delta = input_lob.is_valid() && input_lob.is_delta_temp_lob(); if (is_delta) { // delta lob - if (!(ob_is_text_tc(in_type))) { + if (!(ob_is_text_tc(in_type) || ob_is_json(in_type))) { ret = OB_INVALID_ARGUMENT; LOG_WARN("delta lob can not convert to non-text type", K(ret), K(out_type)); } else { diff --git a/src/sql/engine/expr/ob_expr_eval_functions.cpp b/src/sql/engine/expr/ob_expr_eval_functions.cpp index b7d134cb35..887d0f898c 100644 --- a/src/sql/engine/expr/ob_expr_eval_functions.cpp +++ b/src/sql/engine/expr/ob_expr_eval_functions.cpp @@ -239,6 +239,8 @@ #include "ob_expr_json_object.h" #include "ob_expr_json_extract.h" #include "ob_expr_json_contains.h" +#include "ob_expr_json_schema_valid.h" +#include "ob_expr_json_schema_validation_report.h" #include "ob_expr_json_contains_path.h" #include "ob_expr_json_depth.h" #include "ob_expr_json_keys.h" @@ -250,6 +252,7 @@ #include "ob_expr_json_valid.h" #include "ob_expr_json_remove.h" #include "ob_expr_json_array_append.h" +#include "ob_expr_json_append.h" #include "ob_expr_json_array_insert.h" #include "ob_expr_json_value.h" #include "ob_expr_json_replace.h" @@ -324,6 +327,9 @@ #include "ob_expr_xml_serialize.h" #include "ob_expr_xmlcast.h" #include "ob_expr_update_xml.h" +#include "ob_expr_insert_child_xml.h" +#include "ob_expr_xml_delete_xml.h" +#include "ob_expr_xml_sequence.h" #include "ob_expr_generator_func.h" #include "ob_expr_random.h" #include "ob_expr_randstr.h" @@ -1087,14 +1093,14 @@ static ObExpr::EvalFunc g_expr_eval_functions[] = { NULL, //ObExprInnerIsTrue::number_is_true_start, /* 621 */ NULL, //ObExprInnerIsTrue::number_is_true_end, /* 622 */ NULL, //ObExprInnerDecodeLike::eval_inner_decode_like /* 623 */ - NULL, //ObExprJsonSchemaValid::eval_json_schema_valid /* 624 */ - NULL, //ObExprJsonSchemaValidationReport::eval_json_schema_validation_report /* 625 */ - NULL, //ObExprInsertChildXml::eval_insert_child_xml /* 626 */ - NULL, //ObExprDeleteXml::eval_delete_xml /* 627 */ - NULL, //ObExprExtractValue::eval_mysql_extract_value /* 628 */ - NULL, //ObExprUpdateXml::eval_mysql_update_xml /* 629 */ - NULL, //ObExprXmlSequence::eval_xml_sequence /* 630 */ - NULL, //ObExprJsonAppend::eval_json_array_append /* 631 */ + ObExprJsonSchemaValid::eval_json_schema_valid, /* 624 */ + ObExprJsonSchemaValidationReport::eval_json_schema_validation_report, /* 625 */ + ObExprInsertChildXml::eval_insert_child_xml, /* 626 */ + ObExprDeleteXml::eval_delete_xml, /* 627 */ + ObExprExtractValue::eval_mysql_extract_value, /* 628 */ + ObExprUpdateXml::eval_mysql_update_xml, /* 629 */ + ObExprXmlSequence::eval_xml_sequence, /* 630 */ + ObExprJsonAppend::eval_json_array_append, /* 631 */ NULL, //unused /* 632 */ ObExprUdtConstruct::eval_udt_construct, /* 633 */ ObExprUDTAttributeAccess::eval_attr_access, /* 634 */ diff --git a/src/sql/engine/expr/ob_expr_extra_info_factory.cpp b/src/sql/engine/expr/ob_expr_extra_info_factory.cpp index ef9782e066..12e5fd10d0 100644 --- a/src/sql/engine/expr/ob_expr_extra_info_factory.cpp +++ b/src/sql/engine/expr/ob_expr_extra_info_factory.cpp @@ -33,6 +33,9 @@ #include "sql/engine/expr/ob_expr_sql_udt_construct.h" #include "sql/engine/expr/ob_expr_priv_attribute_access.h" #include "sql/engine/expr/ob_expr_lrpad.h" +#include "sql/engine/expr/ob_expr_json_schema_valid.h" +#include "sql/engine/expr/ob_expr_json_schema_validation_report.h" +#include "sql/engine/expr/ob_expr_json_utils.h" namespace oceanbase { @@ -105,6 +108,10 @@ void ObExprExtraInfoFactory::register_expr_extra_infos() REG_EXTRA_INFO(T_FUN_SYS_PRIV_SQL_UDT_ATTR_ACCESS, ObExprUdtAttrAccessInfo); REG_EXTRA_INFO(T_FUN_SYS_LPAD, ObExprOracleLRpadInfo); REG_EXTRA_INFO(T_FUN_SYS_RPAD, ObExprOracleLRpadInfo); + REG_EXTRA_INFO(T_FUN_SYS_JSON_SCHEMA_VALID, ObExprJsonSchemaValidInfo); + REG_EXTRA_INFO(T_FUN_SYS_JSON_SCHEMA_VALIDATION_REPORT, ObExprJsonSchemaValidInfo); + REG_EXTRA_INFO(T_FUN_SYS_JSON_VALUE, ObExprJsonQueryParamInfo); + REG_EXTRA_INFO(T_FUN_SYS_JSON_QUERY, ObExprJsonQueryParamInfo); } } // end namespace sql diff --git a/src/sql/engine/expr/ob_expr_extract_value.cpp b/src/sql/engine/expr/ob_expr_extract_value.cpp index 9f10a15cc2..771967bfdd 100644 --- a/src/sql/engine/expr/ob_expr_extract_value.cpp +++ b/src/sql/engine/expr/ob_expr_extract_value.cpp @@ -13,11 +13,9 @@ #include "ob_expr_extract_value.h" #include "ob_expr_lob_utils.h" -#ifdef OB_BUILD_ORACLE_XML #include "lib/xml/ob_xml_parser.h" #include "lib/xml/ob_xml_util.h" #include "sql/engine/expr/ob_expr_xml_func_helper.h" -#endif #include "lib/utility/utility.h" #include "sql/session/ob_sql_session_info.h" #include "sql/engine/ob_exec_context.h" @@ -45,7 +43,36 @@ int ObExprExtractValue::calc_result_typeN(ObExprResType &type, common::ObExprTypeCtx &type_ctx) const { int ret = OB_SUCCESS; - if (OB_UNLIKELY(param_num != 2 && param_num != 3)) { + bool is_mysql_mode = lib::is_mysql_mode(); + if (is_mysql_mode) { + if (OB_UNLIKELY(param_num != 2)) { + ret = OB_ERR_PARAM_SIZE; + LOG_WARN("invalid param number", K(ret), K(param_num)); + } else if (ObNullType == types[1].get_type()) { + ret = OB_ERR_INVALID_XPATH_EXPRESSION; + LOG_WARN("xpath syntax get null", K(ret)); + } else if (!ob_is_string_tc(types[1].get_type())) { + ret = OB_ERR_INVALID_XPATH_EXPRESSION; + LOG_WARN("xpath syntax get invalid type", K(ret), K(types[1].get_type())); + } else { + for (int8_t i = 0; OB_SUCC(ret) && i < param_num; i++) { + ObObjType param_type = types[i].get_type(); + if (param_type == ObNullType) { + } else if (ob_is_string_type(param_type)) { + if (ob_is_string_tc(param_type)) { + types[i].set_calc_type(ObVarcharType); + } + if (types[i].get_charset_type() != CHARSET_UTF8MB4) { + types[i].set_calc_collation_type(CS_TYPE_UTF8MB4_BIN); + } + } else { + ret = OB_ERR_INVALID_XPATH_EXPRESSION; + LOG_WARN("xpath syntax get invalid type", K(ret), K(param_type)); + } + } + } + + } else if (OB_UNLIKELY(param_num != 2 && param_num != 3)) { ret = OB_ERR_PARAM_SIZE; LOG_WARN("invalid argument number", K(ret), K(param_num)); } else if (!is_called_in_sql()) { @@ -73,20 +100,26 @@ int ObExprExtractValue::calc_result_typeN(ObExprResType &type, } } - if (OB_SUCC(ret)) { - type.set_type(ObVarcharType); - type.set_collation_type(CS_TYPE_UTF8MB4_BIN); - type.set_collation_level(CS_LEVEL_IMPLICIT); - // length == OB_MAX_ORACLE_VARCHAR_LENGTH is not supported by generated key, use OB_MAX_VARCHAR_LENGTH_KEY instead - // length == OB_MAX_VARCHAR_LENGTH_KEY is not supported by generated column length check , use MAX_ORACLE_COMMENT_LENGTH instead - type.set_length(MAX_ORACLE_COMMENT_LENGTH); - type.set_length_semantics(LS_BYTE); - } + } + if (OB_FAIL(ret)) { + } else if (is_mysql_mode) { + type.set_type(ObLongTextType); + type.set_collation_type(CS_TYPE_UTF8MB4_BIN); + type.set_collation_level(CS_LEVEL_IMPLICIT); + type.set_length(OB_MAX_MYSQL_VARCHAR_LENGTH); + type.set_length_semantics(LS_BYTE); + } else { + type.set_type(ObVarcharType); + type.set_collation_type(CS_TYPE_UTF8MB4_BIN); + type.set_collation_level(CS_LEVEL_IMPLICIT); + // length == OB_MAX_ORACLE_VARCHAR_LENGTH is not supported by generated key, use OB_MAX_VARCHAR_LENGTH_KEY instead + // length == OB_MAX_VARCHAR_LENGTH_KEY is not supported by generated column length check , use MAX_ORACLE_COMMENT_LENGTH instead + type.set_length(MAX_ORACLE_COMMENT_LENGTH); + type.set_length_semantics(LS_BYTE); } return ret; } -#ifdef OB_BUILD_ORACLE_XML int ObExprExtractValue::eval_extract_value(const ObExpr &expr, ObEvalCtx &ctx, ObDatum &res) { int ret = OB_SUCCESS; @@ -142,6 +175,165 @@ int ObExprExtractValue::eval_extract_value(const ObExpr &expr, ObEvalCtx &ctx, O return ret; } +int ObExprExtractValue::eval_mysql_extract_value(const ObExpr &expr, ObEvalCtx &ctx, ObDatum &res) +{ + INIT_SUCC(ret); + ObEvalCtx::TempAllocGuard tmp_alloc_g(ctx); + common::ObArenaAllocator &allocator = tmp_alloc_g.get_allocator(); + ObTextStringDatumResult output_result(expr.datum_meta_.type_, &expr, &ctx, &res); + ObString xml_frag; + ObString xpath_expr; + ObIMulModeBase *xml_base = nullptr; + ObPathExprIter xpath_iter(&allocator); + ObStringBuffer xml_res(&allocator); + + ObMulModeMemCtx* xml_mem_ctx = nullptr; + lib::ObMallocHookAttrGuard malloc_guard(lib::ObMemAttr(ObXMLExprHelper::get_tenant_id(ctx.exec_ctx_.get_my_session()), "XMLModule")); + if (OB_ISNULL(ctx.exec_ctx_.get_my_session())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get session failed.", K(ret)); + } else if (OB_FAIL(ObXmlUtil::create_mulmode_tree_context(&allocator, xml_mem_ctx))) { + LOG_WARN("fail to create tree memory context", K(ret)); + } else if (expr.arg_cnt_ != 2) { + ret = OB_ERR_PARAM_SIZE; + LOG_WARN("invalid arg_cnt_", K(ret), K(expr.arg_cnt_)); + } else if (!expr.args_[1]->is_const_expr()) { + ret = OB_XPATH_EXPRESSION_UNSUPPORTED; + LOG_WARN("args_[1] get const expr invalid", K(ret), K(expr.args_[1])); + } else if (OB_FAIL(ObXMLExprHelper::get_str_from_expr(expr.args_[0], ctx, xml_frag, allocator))) { + LOG_WARN("get xml frag string failed", K(ret)); + } else if (xml_frag.empty()) { + // do nothing + } else if (OB_FAIL(ObXMLExprHelper::get_str_from_expr(expr.args_[1], ctx, xpath_expr, allocator))) { + LOG_WARN("get xpath expr failed.", K(ret)); + } else if (OB_FAIL(ObMulModeFactory::get_xml_base(xml_mem_ctx, xml_frag, ObNodeMemType::TREE_TYPE, ObNodeMemType::BINARY_TYPE, xml_base, M_DOCUMENT))) { + ret = OB_SUCCESS; + if (OB_FAIL(ObMulModeFactory::get_xml_base(xml_mem_ctx, xml_frag, ObNodeMemType::TREE_TYPE, ObNodeMemType::BINARY_TYPE, xml_base, M_CONTENT))) { + LOG_USER_WARN(OB_ERR_XML_PARSE); + ret = OB_SUCCESS; + LOG_WARN("parse xml_frag failed.", K(xml_frag)); + } + } + + if (OB_FAIL(ret) || OB_ISNULL(xml_base)) { + } else if (OB_FAIL(extract_mysql_xpath_result(xml_mem_ctx, xpath_expr, xml_base, xml_res))) { + LOG_WARN("failed to extract xpath result.", K(ret), K(xpath_expr)); + } + + ObString value = xml_res.string(); + if (OB_FAIL(ret)) { + } else if (OB_ISNULL(xml_base)) { + if (xml_frag.empty()) { + if (OB_FAIL(ObXMLExprHelper::set_string_result(expr, ctx, res, value))) { + LOG_WARN("failed to set res when xml frag empty.", K(ret), K(xml_res)); + } + } else { + res.set_null(); + } + } else if (OB_FAIL(ObXMLExprHelper::set_string_result(expr, ctx, res, value))) { + LOG_WARN("failed to set res", K(ret), K(xml_res)); + } + + return ret; +} + +int ObExprExtractValue::get_new_xpath(ObString xpath_str, ObString &new_xpath, bool &cal_count) +{ + INIT_SUCC(ret); + int64_t xpath_start = 0; + int64_t xpath_end = 0; + ObString count_str = ObString(6, "count("); + if (xpath_str.prefix_match_ci(count_str)) { + for (int64_t start = 0; start < xpath_str.length(); start++) { + if (xpath_str.ptr()[start] == '(') { + xpath_start = start + 1; + for (int64_t end = xpath_str.length() - 1; end > start + 1; end--) { + if (xpath_str.ptr()[end] == ')') { + xpath_end = end; + break; + } + } + break; + } + } + if (xpath_end != 0) { + cal_count = true; + new_xpath = ObString(xpath_end - xpath_start, xpath_str.ptr() + xpath_start); + } + } + + return ret; +} + +int ObExprExtractValue::extract_mysql_xpath_result(ObMulModeMemCtx *xml_mem_ctx, ObString& xpath_str, + ObIMulModeBase* xml_doc, ObStringBuffer &xml_res) +{ + INIT_SUCC(ret); + ObPathExprIter xpath_iter(xml_mem_ctx->allocator_); + ObIMulModeBase *node = NULL; + ObSEArray result_nodes; + ObString default_ns; + ObPathVarObject prefix_ns(*(xml_mem_ctx->allocator_)); + ObString new_xpath = xpath_str; + bool cal_count = false; + if (OB_FAIL(get_new_xpath(xpath_str, new_xpath, cal_count))) { + LOG_WARN("get new xpath failed.", K(ret)); + } else if (OB_FAIL(xpath_iter.init(xml_mem_ctx, new_xpath, default_ns, xml_doc, &prefix_ns))) { + LOG_WARN("fail to init xpath iterator", K(new_xpath), K(default_ns), K(ret)); + ObXMLExprHelper::replace_xpath_ret_code(ret); + } else if (OB_FAIL(xpath_iter.open())) { + LOG_WARN("fail to open xpath iterator", K(ret)); + ObXMLExprHelper::replace_xpath_ret_code(ret); + } + + while (OB_SUCC(ret)) { + if (OB_FAIL(xpath_iter.get_next_node(node))) { + if (ret != OB_ITER_END) { + LOG_WARN("fail to get next xpath result node", K(ret)); + } + } else if (OB_ISNULL(node)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("xpath result node is null", K(ret)); + } else if (ObXMLExprHelper::is_xml_text_node(node->type())) { + if (OB_FAIL(result_nodes.push_back(node))) { + LOG_WARN("fail to push back result node", K(ret)); + } + } else if (ObXMLExprHelper::is_xml_element_node(node->type()) || + ObXMLExprHelper::is_xml_attribute_node(node->type())) { + if (OB_FAIL(result_nodes.push_back(node))) { + LOG_WARN("fail to push back result node", K(ret)); + } + } + } + + if (ret == OB_ITER_END) { + ret = OB_SUCCESS; + } + + if (OB_FAIL(ret)) { + if (ret == OB_NOT_IMPLEMENT) { + ret = OB_SUCCESS; + xml_res.append(xpath_str); + } + } else if (cal_count) { + common::ObSqlString sql_string; + sql_string.assign_fmt("%ld", result_nodes.count()); + if (OB_FAIL(xml_res.append(sql_string.ptr(), sql_string.length()))) { + LOG_WARN("failed to append sql str", K(ret), K(sql_string)); + } + } else if (OB_FAIL(append_text_into_buffer(xml_mem_ctx->allocator_, result_nodes, xml_res))) { + LOG_WARN("fail to merge text nodes", K(ret), K(result_nodes.count())); + } + + int tmp_ret = OB_SUCCESS; + if (OB_SUCCESS != (tmp_ret = xpath_iter.close())) { + LOG_WARN("fail to close xpath iter", K(tmp_ret)); + ret = COVER_SUCC(tmp_ret); + } + + return ret; +} + int ObExprExtractValue::extract_xpath_result(ObMulModeMemCtx *xml_mem_ctx, ObString& xpath_str, ObString& default_ns, ObIMulModeBase* xml_doc, ObPathVarObject* prefix_ns, ObString &xml_res) { @@ -272,6 +464,48 @@ int ObExprExtractValue::append_text_value(ObStringBuffer &buffer, ObIMulModeBase return ret; } +int ObExprExtractValue::append_text_into_buffer(ObIAllocator *allocator, + ObIArray &result_nodes, + ObStringBuffer &buffer) +{ + INIT_SUCC(ret); + char space = ' '; + for (int64_t i = 0; OB_SUCC(ret) && i < result_nodes.count(); i++) { + ObIMulModeBase *child_node = result_nodes.at(i); + ObString res_str; + if (OB_ISNULL(child_node)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get child node null", K(ret), K(i)); + } else if (ObXMLExprHelper::is_xml_leaf_node(child_node->type())) { + if (OB_FAIL(child_node->get_value(res_str))) { + LOG_WARN("failed to get node values.", K(ret), K(i)); + } else if (!buffer.empty() && OB_FAIL(buffer.append(&space, 1))) { + LOG_WARN("failed to add space into buffer.", K(ret), K(buffer), K(i)); + } else if (OB_FAIL(buffer.append(res_str))) { + LOG_WARN("append res into buffer failed", K(ret), K(i), K(res_str)); + } + } else if (ObXMLExprHelper::is_xml_element_node(child_node->type())) { + for (int64_t j = 0; OB_SUCC(ret) && j < child_node->size(); j++) { + ObString tmp_str; + ObIMulModeBase *grand_child = child_node->at(j); + if (OB_ISNULL(grand_child)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get grand child null", K(ret), K(i), K(j)); + } else if (grand_child->type() == M_TEXT || grand_child->type() == M_CDATA) { + if (OB_FAIL(grand_child->get_value(tmp_str))) { + LOG_WARN("get grand child value failed", K(ret), K(i), K(j), K(grand_child->type())); + } else if (!buffer.empty() && OB_FAIL(buffer.append(&space, 1))) { + LOG_WARN("failed to add space into buffer.", K(ret), K(buffer), K(i)); + } else if (OB_FAIL(buffer.append(tmp_str))) { + LOG_WARN("failed to append str", K(ret), K(tmp_str), K(i), K(j)); + } + } + } + } + } + + return ret; +} int ObExprExtractValue::extract_node_value(ObIAllocator &allocator, ObIMulModeBase *node, ObString &xml_res) { @@ -367,13 +601,16 @@ int ObExprExtractValue::has_same_parent_node(ObMulModeMemCtx *xml_mem_ctx, ObStr return ret; } -#endif - int ObExprExtractValue::cg_expr(ObExprCGCtx &expr_cg_ctx, const ObRawExpr &raw_expr, ObExpr &rt_expr) const { UNUSED(expr_cg_ctx); UNUSED(raw_expr); - rt_expr.eval_func_ = eval_extract_value; + if (lib::is_oracle_mode()) { + rt_expr.eval_func_ = eval_extract_value; + } else { + rt_expr.eval_func_ = eval_mysql_extract_value; + } + return OB_SUCCESS; } diff --git a/src/sql/engine/expr/ob_expr_extract_value.h b/src/sql/engine/expr/ob_expr_extract_value.h index ad5f1f4fbe..9dd8f5a2ff 100644 --- a/src/sql/engine/expr/ob_expr_extract_value.h +++ b/src/sql/engine/expr/ob_expr_extract_value.h @@ -15,9 +15,7 @@ #define OCEANBASE_SQL_ENGINE_EXPR_OB_EXPR_EXRACTVALUE_H #include "sql/engine/expr/ob_expr_operator.h" -#ifdef OB_BUILD_ORACLE_XML #include "lib/xml/ob_xpath.h" -#endif namespace oceanbase { @@ -34,27 +32,28 @@ class ObExprExtractValue : public ObFuncExprOperator int64_t param_num, common::ObExprTypeCtx &type_ctx) const override; -#ifdef OB_BUILD_ORACLE_XML static int eval_extract_value(const ObExpr &expr, ObEvalCtx &ctx, ObDatum &res); -#else - static int eval_extract_value(const ObExpr &expr, ObEvalCtx &ctx, ObDatum &res) { return OB_NOT_SUPPORTED; } -#endif + static int eval_mysql_extract_value(const ObExpr &expr, ObEvalCtx &ctx, ObDatum &res); virtual int cg_expr(ObExprCGCtx &expr_cg_ctx, const ObRawExpr &raw_expr, ObExpr &rt_expr) const override; -#ifdef OB_BUILD_ORACLE_XML private: static int extract_xpath_result(ObMulModeMemCtx *xml_mem_ctx, ObString& xpath_str, ObString& default_ns, ObIMulModeBase* xml_doc, ObPathVarObject* prefix_ns, ObString &xml_res); +static int extract_mysql_xpath_result(ObMulModeMemCtx *xml_mem_ctx, ObString& xpath_str, + ObIMulModeBase* xml_doc, ObStringBuffer &xml_res); static int extract_node_value(ObIAllocator &allocator, ObIMulModeBase *node, ObString &xml_res); static int has_same_parent_node(ObMulModeMemCtx *xml_mem_ctx, ObString& xpath_str, ObString& default_ns, ObIMulModeBase* xml_doc, ObPathVarObject* prefix_ns, bool &is_same_parent); static int merge_text_nodes_with_same_parent(ObIAllocator *allocator, ObIArray &result_nodes, ObString &xml_res); +static int append_text_into_buffer(ObIAllocator *allocator, + ObIArray &result_nodes, + ObStringBuffer &xml_res); static int append_text_value(ObStringBuffer &buffer, ObIMulModeBase *node); +static int get_new_xpath(ObString xpath_str, ObString &new_xpath, bool &cal_count); -#endif private: DISALLOW_COPY_AND_ASSIGN(ObExprExtractValue); }; diff --git a/src/sql/engine/expr/ob_expr_extract_xml.cpp b/src/sql/engine/expr/ob_expr_extract_xml.cpp index 14eb5cdfe0..ac401d5175 100644 --- a/src/sql/engine/expr/ob_expr_extract_xml.cpp +++ b/src/sql/engine/expr/ob_expr_extract_xml.cpp @@ -12,11 +12,10 @@ */ #include "ob_expr_extract_xml.h" -#ifdef OB_BUILD_ORACLE_XML #include "lib/xml/ob_xml_parser.h" #include "lib/xml/ob_xml_util.h" #include "sql/engine/expr/ob_expr_xml_func_helper.h" -#endif +#include "lib/xml/ob_binary_aggregate.h" #include "lib/utility/utility.h" #include "sql/session/ob_sql_session_info.h" #include "sql/engine/ob_exec_context.h" @@ -79,7 +78,6 @@ int ObExprExtractXml::calc_result_typeN(ObExprResType &type, return ret; } -#ifdef OB_BUILD_ORACLE_XML int ObExprExtractXml::eval_extract_xml(const ObExpr &expr, ObEvalCtx &ctx, ObDatum &res) { int ret = OB_SUCCESS; @@ -126,25 +124,22 @@ int ObExprExtractXml::eval_extract_xml(const ObExpr &expr, ObEvalCtx &ctx, ObDat } if (OB_FAIL(ret)) { - } else if (OB_FAIL(ObXMLExprHelper::get_xml_base(mem_ctx, xml_datum, cs_type, expect_type, xml_doc))) { + } else if (OB_FAIL(ObXMLExprHelper::get_xml_base(mem_ctx, xml_datum, cs_type, expect_type, xml_doc, node_type, ObGetXmlBaseType::OB_SHOULD_CHECK))) { LOG_WARN("fail to parse xml doc", K(ret)); } else if (OB_FAIL(xpath_iter.init(mem_ctx, xpath_str, default_ns, xml_doc, &prefix_ns))) { LOG_WARN("fail to init xpath iterator", K(xpath_str), K(default_ns), K(ret)); ObXMLExprHelper::replace_xpath_ret_code(ret); - } else if (OB_FAIL(concat_xpath_result(xpath_iter, cs_type, root, node_type, mem_ctx))) { + } else if (OB_FAIL(concat_xpath_result(expr, ctx, xpath_iter, cs_type, res, node_type, mem_ctx))) { LOG_WARN("fail to concat xpath result", K(ret)); - } else if (OB_ISNULL(root) || root->size() == 0) { - // root is not null and size = 0 if xpath='/.' or '//.' or '.' or 'self::*' and so on - res.set_null(); - } else if (OB_FAIL(ObXMLExprHelper::pack_xml_res(expr, ctx, res, root, mem_ctx, node_type, input_str))) { - LOG_WARN("fail to set result", K(xml_res), K(ret)); } return ret; } -int ObExprExtractXml::concat_xpath_result(ObPathExprIter &xpath_iter, +int ObExprExtractXml::concat_xpath_result(const ObExpr &expr, + ObEvalCtx &eval_ctx, + ObPathExprIter &xpath_iter, ObCollationType cs_type, - ObXmlDocument *&root, + ObDatum &res, ObMulModeNodeType &node_type, ObMulModeMemCtx* mem_ctx) { @@ -152,13 +147,28 @@ int ObExprExtractXml::concat_xpath_result(ObPathExprIter &xpath_iter, ObStringBuffer buff(mem_ctx->allocator_); ObIMulModeBase *node = NULL; int64_t append_node_num = 0; + int element_count = 0; + int text_count = 0; + ObString version; + ObString encoding; + uint16_t standalone; + ObString blob_locator; + bool first_is_doc = false; + ObIMulModeBase* last_parent = nullptr; + common::hash::ObHashMap ns_map; + if (OB_FAIL(xpath_iter.open())) { LOG_WARN("fail to open xpath iterator", K(ret)); ObXMLExprHelper::replace_xpath_ret_code(ret); + } else if (OB_FAIL(ns_map.create(10, lib::ObMemAttr(MTL_ID(), "XMLModule")))) { + LOG_WARN("ns map create failed", K(ret)); } + ObBinAggSerializer bin_agg(mem_ctx->allocator_, ObBinAggType::AGG_XML, static_cast(M_CONTENT)); + while (OB_SUCC(ret)) { ObIMulModeBase* tmp = nullptr; + ObXmlBin extend; if (OB_FAIL(xpath_iter.get_next_node(node))) { if (ret != OB_ITER_END) { LOG_WARN("fail to get next xml node", K(ret)); @@ -166,23 +176,52 @@ int ObExprExtractXml::concat_xpath_result(ObPathExprIter &xpath_iter, } else if (OB_ISNULL(node)) { ret = OB_ERR_UNEXPECTED; LOG_WARN("xpath result node is null", K(ret)); - } else if (node->is_binary() && OB_FAIL(ObMulModeFactory::transform(mem_ctx, node, TREE_TYPE, node))) { + } else if (node->is_tree() && OB_FAIL(ObMulModeFactory::transform(mem_ctx, node, BINARY_TYPE, node))) { LOG_WARN("fail to transform to tree", K(ret)); - } else if (OB_ISNULL(root) && node->type() == M_DOCUMENT) { - // if the xpath return document node, set it as root - root = static_cast(static_cast(node)); } else { - if (OB_ISNULL(root)) { // if root is NULL, alloc a content node as root - if (OB_ISNULL(root = OB_NEWx(ObXmlDocument, (mem_ctx->allocator_), ObMulModeNodeType::M_CONTENT, (mem_ctx)))) { - ret = OB_ALLOCATE_MEMORY_FAILED; - LOG_WARN("fail to create an xml content node", K(ret)); + ObXmlBin *bin = nullptr; + if (OB_ISNULL(bin = static_cast(node))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get bin failed", K(ret)); + } else if (bin->meta_.len_ == 0) { + // do nothing + } else if (bin->check_extend()) { + bool conflict = false; + // check key conflict + if (OB_FAIL(bin->get_extend(extend))) { + LOG_WARN("fail to get extend", K(ret)); + } else if (OB_FAIL(ObXmlUtil::check_ns_conflict(xpath_iter.get_cur_res_parent(), last_parent, &extend, ns_map, conflict))) { + LOG_WARN("fail to check conflict", K(ret)); + } else if (conflict) { + // if conflict, merge bin + if (OB_FAIL(bin->merge_extend(extend))) { + LOG_WARN("fail to merge extend", K(ret)); + } else { + bin = &extend; + } + } else if (OB_FAIL(bin->remove_extend())) { // if not conflict, erase extend + LOG_WARN("fail to remove extend", K(ret)); } } - if (OB_FAIL(ret)) { - } else if (OB_FAIL(append_node_to_res(*mem_ctx->allocator_, root, node))) { - LOG_WARN("fail to append node text to result buffer", K(ret)); + } else if (OB_FAIL(bin_agg.append_key_and_value(bin))) { + LOG_WARN("failed to append binary", K(ret)); } else { + ObMulModeNodeType type = node->type(); + if (append_node_num == 0 && type == ObMulModeNodeType::M_DOCUMENT) { + version = node->get_version(); + encoding = node->get_encoding(); + standalone = node->get_standalone(); + first_is_doc = version.empty() ? false : true; + } + + if (type == ObMulModeNodeType::M_ELEMENT || type == ObMulModeNodeType::M_DOCUMENT) { + element_count++; + } else if (type == ObMulModeNodeType::M_TEXT || type == ObMulModeNodeType::M_CDATA) { + text_count++; + } else if (type == ObMulModeNodeType::M_CONTENT) { + append_node_num += bin->count() - 1; + } append_node_num++; } } @@ -190,31 +229,19 @@ int ObExprExtractXml::concat_xpath_result(ObPathExprIter &xpath_iter, if (ret == OB_ITER_END) { ret = OB_SUCCESS; - if (OB_NOT_NULL(root)) { // res is NULL, do nothing - int element_count = 0; - int text_count = 0; - int cdata_count = 0; + if (element_count > 1 || element_count == 0) { + node_type = ObMulModeNodeType::M_CONTENT; + } else if (element_count == 1 && text_count > 0) { + node_type = ObMulModeNodeType::M_CONTENT; + } else if (append_node_num == 0) { + // do nothing + } else { + node_type = ObMulModeNodeType::M_DOCUMENT; + } - if (node_type == ObMulModeNodeType::M_CONTENT) { // do nothing - } else if (OB_FAIL(root->get_node_count(ObMulModeNodeType::M_ELEMENT, element_count))) { - LOG_WARN("get element count node failed", K(ret)); - } else if (OB_FAIL(root->get_node_count(ObMulModeNodeType::M_TEXT, text_count))) { - LOG_WARN("get text count node failed", K(ret)); - } else if (OB_FAIL(root->get_node_count(ObMulModeNodeType::M_CDATA, cdata_count))) { - LOG_WARN("get cdata count node failed", K(ret)); - } else if (element_count > 1 || element_count == 0) { - node_type = ObMulModeNodeType::M_CONTENT; - } else if (element_count == 1 && (text_count > 0 || cdata_count > 0)) { - node_type = ObMulModeNodeType::M_CONTENT; - } else if (root->size() == 0) { - // do nothing - } else { - if (append_node_num > 0) { - root->set_has_xml_decl(false); - } - node_type = ObMulModeNodeType::M_DOCUMENT; - } - root->set_xml_type(node_type); + bin_agg.set_header_type(node_type); + if (first_is_doc && append_node_num == 1) { + bin_agg.set_xml_decl(version, encoding, standalone); } } @@ -222,69 +249,20 @@ int ObExprExtractXml::concat_xpath_result(ObPathExprIter &xpath_iter, if (OB_SUCCESS != (tmp_ret = xpath_iter.close())) { LOG_WARN("fail to close xpath iter", K(tmp_ret)); ret = COVER_SUCC(tmp_ret); - } - return ret; -} - -int ObExprExtractXml::append_node_to_res(ObIAllocator &allocator, ObXmlDocument *root, ObIMulModeBase *node) -{ - int ret = OB_SUCCESS; - if (OB_ISNULL(node) || OB_ISNULL(root)) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("the append node is NULL", K(ret)); - } else if (!node->is_tree()) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("node type is not tree type", K(ret)); + } else if (append_node_num == 0) { + res.set_null(); + } else if (OB_FAIL(bin_agg.serialize())) { + LOG_WARN("failed to serialize binary.", K(ret)); + } else if (ns_map.size() > 0 && OB_FAIL(ObXmlUtil::ns_to_extend(mem_ctx, ns_map, bin_agg.get_buffer()))) { + LOG_WARN("failed to serialize extend.", K(ret)); + } else if (OB_FAIL(ObXMLExprHelper::pack_binary_res(expr, eval_ctx, bin_agg.get_buffer()->string(), blob_locator))) { + LOG_WARN("failed to pack binary res.", K(ret)); } else { - ObXmlNode *xml_node = static_cast(node); - ObMulModeNodeType node_type = xml_node->type(); - ObString tmp_str; - if (node_type == M_TEXT || node_type == M_ATTRIBUTE || node_type == M_NAMESPACE) { - // extract attribute node value - ObXmlText *xml_text = NULL; - if (OB_FAIL(xml_node->get_value(tmp_str))) { - LOG_WARN("fail to get node value", K(ret)); - } else { - int64_t child_size = root->size(); - if (child_size > 0 && root->at(child_size-1)->type() == M_TEXT) { - xml_text = static_cast(root->at(child_size-1)); - ObStringBuffer buff(&allocator); - if (OB_FAIL(buff.append(xml_text->get_text()))) { - LOG_WARN("fail to append the orgin text", K(ret)); - } else if (OB_FAIL(buff.append(tmp_str))) { - LOG_WARN("fail to append the new text", K(ret), K(tmp_str)); - } else { - ObString new_str; - new_str.assign_ptr(buff.ptr(), buff.length()); - xml_text->set_text(new_str); - } - } else { - if (OB_ISNULL(xml_text = OB_NEWx(ObXmlText, (&allocator), ObMulModeNodeType::M_TEXT, (root->get_mem_ctx())))) { - ret = OB_ALLOCATE_MEMORY_FAILED; - LOG_WARN("fail to create xml text node", K(ret)); - } else if (FALSE_IT(xml_text->set_text(tmp_str))) { - } else if (OB_FAIL(root->append(xml_text))) { - LOG_WARN("fail to append node value to content", K(ret), K(tmp_str)); - } - } - } - } else if (ObXMLExprHelper::is_xml_root_node(node_type)) { - ObXmlDocument *xml_doc = static_cast(xml_node); - for (int64_t i = 0; OB_SUCC(ret) && i < xml_doc->size(); i++) { - if (OB_ISNULL(xml_doc->at(i)) ) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("xml node is null", K(ret), K(i)); - } else if (OB_FAIL(root->append(xml_doc->at(i)))) { - LOG_WARN("fail to append node to content", K(ret), K(i)); - } - } - } else if (OB_FAIL(root->append(node))) { - LOG_WARN("fail to append node to content", K(ret), K(node_type)); - } + res.set_string(blob_locator.ptr(), blob_locator.length()); } + ns_map.clear(); return ret; } -#endif int ObExprExtractXml::cg_expr(ObExprCGCtx &expr_cg_ctx, const ObRawExpr &raw_expr, ObExpr &rt_expr) const { diff --git a/src/sql/engine/expr/ob_expr_extract_xml.h b/src/sql/engine/expr/ob_expr_extract_xml.h index 938d9dc6eb..5cbfdd969a 100644 --- a/src/sql/engine/expr/ob_expr_extract_xml.h +++ b/src/sql/engine/expr/ob_expr_extract_xml.h @@ -15,9 +15,7 @@ #define OCEANBASE_SQL_ENGINE_EXPR_OB_EXPR_EXRACT_XML_H #include "sql/engine/expr/ob_expr_operator.h" -#ifdef OB_BUILD_ORACLE_XML #include "lib/xml/ob_xpath.h" -#endif namespace oceanbase { @@ -32,21 +30,17 @@ class ObExprExtractXml : public ObFuncExprOperator ObExprResType *types, int64_t param_num, common::ObExprTypeCtx &type_ctx) const override; -#ifdef OB_BUILD_ORACLE_XML static int eval_extract_xml(const ObExpr &expr, ObEvalCtx &ctx, ObDatum &res); -#else - static int eval_extract_xml(const ObExpr &expr, ObEvalCtx &ctx, ObDatum &res) { return OB_NOT_SUPPORTED; } -#endif virtual int cg_expr(ObExprCGCtx &expr_cg_ctx, const ObRawExpr &raw_expr, ObExpr &rt_expr) const override; -#ifdef OB_BUILD_ORACLE_XML private: - static int append_node_to_res(ObIAllocator &allocator,ObXmlDocument *root, ObIMulModeBase *node); - static int concat_xpath_result(ObPathExprIter &xpath_iter, + static int concat_xpath_result(const ObExpr &expr, + ObEvalCtx &eval_ctx, + ObPathExprIter &xpath_iter, ObCollationType cs_type, - ObXmlDocument *&root, + ObDatum &res, ObMulModeNodeType &node_type, ObMulModeMemCtx* ctx); static int check_and_set_res(const ObExpr &expr, @@ -54,7 +48,6 @@ private: ObDatum &res, ObLibTreeNodeVector &node_vector, ObIAllocator &allocator); -#endif private: DISALLOW_COPY_AND_ASSIGN(ObExprExtractXml); }; diff --git a/src/sql/engine/expr/ob_expr_insert_child_xml.cpp b/src/sql/engine/expr/ob_expr_insert_child_xml.cpp new file mode 100644 index 0000000000..eca1bf08b4 --- /dev/null +++ b/src/sql/engine/expr/ob_expr_insert_child_xml.cpp @@ -0,0 +1,456 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + * This file is for func insertchildxml. + */ + +#include "ob_expr_insert_child_xml.h" +#include "ob_expr_lob_utils.h" +#include "lib/xml/ob_xml_parser.h" +#include "lib/xml/ob_xml_util.h" +#include "sql/engine/expr/ob_expr_xml_func_helper.h" +#include "lib/utility/utility.h" +#include "sql/session/ob_sql_session_info.h" +#include "sql/engine/ob_exec_context.h" + +#define USING_LOG_PREFIX SQL_ENG + + +using namespace oceanbase::common; + +namespace oceanbase +{ +namespace sql +{ + +ObExprInsertChildXml::ObExprInsertChildXml(common::ObIAllocator &alloc) + : ObFuncExprOperator(alloc, T_FUN_SYS_INSERTCHILDXML, N_INSERTCHILDXML, MORE_THAN_ONE, VALID_FOR_GENERATED_COL, NOT_ROW_DIMENSION) +{ +} + +ObExprInsertChildXml::~ObExprInsertChildXml() {} + +int ObExprInsertChildXml::calc_result_typeN(ObExprResType &type, + ObExprResType *types, + int64_t param_num, + common::ObExprTypeCtx &type_ctx) const +{ + int ret = OB_SUCCESS; + if (param_num != 5) { + ret = OB_ERR_PARAM_SIZE; + LOG_WARN("invalid argument number.", K(ret), K(param_num)); + } else if (!is_called_in_sql()) { + ret = OB_ERR_SP_LILABEL_MISMATCH; + LOG_WARN("expr call in pl semantics disallowed", K(ret), K(N_INSERTCHILDXML)); + LOG_USER_ERROR(OB_ERR_SP_LILABEL_MISMATCH, static_cast(strlen(N_INSERTCHILDXML)), N_INSERTCHILDXML); + } else { + ObObjType in_type = types[0].get_type(); + if (types[0].is_ext() && types[0].get_udt_id() == T_OBJ_XML) { + types[0].get_calc_meta().set_sql_udt(ObXMLSqlType); + } + + if (ob_is_xml_pl_type(types[3].get_type(), types[3].get_udt_id())) { + types[3].get_calc_meta().set_sql_udt(ObXMLSqlType); + } + + if (OB_FAIL(ret)) { + } else if (!ob_is_xml_sql_type(in_type, types[0].get_subschema_id())) { + ret = OB_ERR_INVALID_TYPE_FOR_OP; + LOG_USER_ERROR(OB_ERR_INVALID_TYPE_FOR_OP, "ANYDATA", "-"); + LOG_WARN("inconsistent datatypes", K(ret), K(ob_obj_type_str(in_type))); + } else if (!types[1].is_string_type()) { + ret = OB_ERR_INVALID_TYPE_FOR_OP; + LOG_WARN("inconsistent datatypes", K(ret), K(types[1].get_type())); + } else if (!types[2].is_string_type()) { + ret = OB_ERR_INVALID_TYPE_FOR_OP; + LOG_WARN("inconsistent datatypes", K(ret), K(types[2].get_type())); + } else if (!ob_is_string_tc(types[3].get_type()) && + !ob_is_xml_sql_type(types[3].get_type(), types[3].get_subschema_id())) { + ret = OB_ERR_INVALID_TYPE_FOR_OP; + LOG_WARN("inconsistent datatypes", K(ret), K(types[3].get_type())); + } else if (!types[4].is_string_type() && !types[4].is_null()) { + ret = OB_ERR_INVALID_TYPE_FOR_OP; + LOG_WARN("inconsistent datatypes", K(ret), K(types[4].get_type())); + } + + for (int8_t i = 1; OB_SUCC(ret) && i < param_num; i++) { + ObObjType param_type = types[i].get_type(); + if (param_type == ObNullType) { + } else if (ob_is_string_type(param_type)) { + if (types[i].get_charset_type() != CHARSET_UTF8MB4) { + types[i].set_calc_collation_type(CS_TYPE_UTF8MB4_BIN); + } + } + } + + if (OB_SUCC(ret)) { + type.set_sql_udt(ObXMLSqlType); + } + } + + return ret; +} + + +int ObExprInsertChildXml::eval_insert_child_xml(const ObExpr &expr, ObEvalCtx &ctx, ObDatum &res) +{ + int ret = OB_SUCCESS; + ObEvalCtx::TempAllocGuard tmp_alloc_g(ctx); + common::ObArenaAllocator &allocator = tmp_alloc_g.get_allocator(); + ObDatum *xml_datum = NULL; + ObIMulModeBase *xml_tree = NULL; + ObString xpath_str; + ObString child_str; + ObString value_str; + ObString namespace_str; + ObString default_ns; + ObPathVarObject prefix_ns(allocator); + ObString xml_res; + ObMulModeNodeType node_type = M_MAX_TYPE; + ObPathExprIter xpath_iter(&allocator); + bool is_insert_attributes = false; + + ObMulModeMemCtx* mem_ctx = nullptr; + lib::ObMallocHookAttrGuard malloc_guard(lib::ObMemAttr(MTL_ID(), "XMLModule")); + if (OB_FAIL(ObXmlUtil::create_mulmode_tree_context(&allocator, mem_ctx))) { + LOG_WARN("fail to create tree memory context", K(ret)); + } else if (OB_UNLIKELY(expr.arg_cnt_ != 5)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("invalid arg_cnt_", K(ret), K(expr.arg_cnt_)); + } else if (ObNullType == expr.args_[1]->datum_meta_.type_) { + ret = OB_ERR_INVALID_XPATH_EXPRESSION; + LOG_WARN("invalid xpath expression", K(ret)); + } else if (OB_FAIL(ObXMLExprHelper::get_xmltype_from_expr(expr.args_[0], ctx, xml_datum))) { + LOG_WARN("fail to get xmltype value", K(ret)); + } else if (OB_FAIL(ObXMLExprHelper::get_str_from_expr(expr.args_[1], ctx, xpath_str, allocator))) { + LOG_WARN("fail to get xpath str", K(ret)); + } else if (OB_FAIL(ObXMLExprHelper::get_str_from_expr(expr.args_[2], ctx, child_str, allocator))) { + LOG_WARN("fail to get xpath str", K(ret)); + } else if (ObNullType == expr.args_[4]->datum_meta_.type_) { + } else if (OB_FAIL(ObXMLExprHelper::get_str_from_expr(expr.args_[4], ctx, namespace_str, allocator))) { + LOG_WARN("fail to get xpath str", K(ret)); + } else if (OB_FAIL(ObXMLExprHelper::construct_namespace_params(namespace_str, default_ns, prefix_ns, allocator))) { + LOG_WARN("fail to construct namespace params", K(ret), K(namespace_str)); + } + + if (OB_FAIL(ret)) { + } else if (xpath_str.empty()) { + // do nothing + } else if (OB_FAIL(check_child_expr(expr, ctx, allocator, mem_ctx, child_str, value_str, is_insert_attributes))) { + LOG_WARN("failed to check child expr.", K(ret)); + } else if (OB_FAIL(ObXMLExprHelper::get_xml_base(mem_ctx, xml_datum, ObCollationType::CS_TYPE_INVALID, ObNodeMemType::TREE_TYPE, xml_tree, node_type, ObGetXmlBaseType::OB_IS_REPARSE))) { + LOG_WARN("fail to parse xml doc", K(ret)); + } else if (OB_FAIL(xpath_iter.init(mem_ctx, xpath_str, default_ns, xml_tree, &prefix_ns))) { + LOG_WARN("fail to init xpath iterator", K(xpath_str), K(default_ns), K(ret)); + ObXMLExprHelper::replace_xpath_ret_code(ret); + } else if (OB_FAIL(insert_child_xml(expr, ctx, mem_ctx, allocator, xpath_iter, child_str, value_str, is_insert_attributes))) { + LOG_WARN("fail to concat xpath result", K(ret)); + } + + if (OB_FAIL(ret)) { + } else if (OB_ISNULL(xml_tree) || xml_tree->count() == 0) { + res.set_null(); + } else { + ObString plain_text; + ObXmlDocument *xml_doc = NULL; + ObXmlDocument *null_doc = NULL; + ObStringBuffer buff(&allocator); + int element_count = 0; + int text_count = 0; + int cdata_count = 0; + if (ObMulModeNodeType::M_CONTENT == node_type) { + node_type = ObMulModeNodeType::M_UNPARSED; + } else if (OB_FAIL(xml_tree->get_node_count(ObMulModeNodeType::M_ELEMENT, element_count))) { + LOG_WARN("get element count node failed", K(ret)); + } else if (OB_FAIL(xml_tree->get_node_count(ObMulModeNodeType::M_TEXT, text_count))) { + LOG_WARN("get text count node failed", K(ret)); + } else if (OB_FAIL(xml_tree->get_node_count(ObMulModeNodeType::M_CDATA, cdata_count))) { + LOG_WARN("get cdata count node failed", K(ret)); + } else if (element_count > 1 || element_count == 0) { + node_type = ObMulModeNodeType::M_UNPARSED; + } else if (element_count == 1 && (text_count > 0 || cdata_count > 0)) { + node_type = ObMulModeNodeType::M_UNPARSED; + } else { + node_type = ObMulModeNodeType::M_DOCUMENT; + } + + if (OB_FAIL(ret)) { + } else if (OB_ISNULL(xml_doc = static_cast(xml_tree))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("xml tree to xmldocument failed", K(ret)); + } else if (ObMulModeNodeType::M_UNPARSED == node_type) { + if (OB_FAIL(xml_doc->print(buff, ObXmlFormatType::NO_FORMAT))) { + LOG_WARN("fail to print xml tree", K(ret)); + } else if (OB_FALSE_IT(plain_text.assign_ptr(buff.ptr(), buff.length()))) { + } else if (OB_FAIL(ObXMLExprHelper::pack_xml_res(expr, ctx, res, null_doc, mem_ctx, node_type, plain_text))) { + LOG_WARN("failed to pack xml res", K(ret)); + } + } else if (OB_FAIL(ObXMLExprHelper::pack_xml_res(expr, ctx, res, xml_doc, mem_ctx, node_type, plain_text))) { + LOG_WARN("failed to pack xml res", K(ret)); + } + + } + + return ret; +} + +int ObExprInsertChildXml::insert_child_xml(const ObExpr &expr, + ObEvalCtx &ctx, + ObMulModeMemCtx* mem_ctx, + ObArenaAllocator &allocator, + ObPathExprIter &xpath_iter, + ObString child_str, + ObString value_str, + bool is_insert_attributes) +{ + int ret = OB_SUCCESS; + ObIMulModeBase *node = NULL; + ObArray res_array; + ObXmlElement *value_ele = NULL; + ObIMulModeBase *value_doc = NULL; + uint64_t ele_count = 0; + uint64_t ele_index = 0; + ObDatum *value_datum = NULL; + CK(OB_NOT_NULL(expr.args_[3])); + if (expr.args_[3]->datum_meta_.type_ == ObUserDefinedSQLType) { + if (OB_FAIL(ObXMLExprHelper::get_xmltype_from_expr(expr.args_[3], ctx, value_datum))) { + LOG_WARN("fail to get xmltype value", K(ret)); + } else if (OB_FAIL(ObXMLExprHelper::get_xml_base(mem_ctx, value_datum, CS_TYPE_INVALID, ObNodeMemType::TREE_TYPE, value_doc))) { + LOG_WARN("fail to parse xml doc", K(ret)); + } + + for (uint64_t i = 0; OB_SUCC(ret) && i < value_doc->count(); i++) { + CK(OB_NOT_NULL(value_doc->at(i))); + ObMulModeNodeType type = value_doc->at(i)->type(); + if (type == M_COMMENT) { + } else if (type == M_ELEMENT) { + ele_count++; + ele_index = i; + } else { + ret = OB_ERR_INVALID_XML_CHILD_NAME; + LOG_WARN("value doc invalid.", K(ret)); + } + } + + if (OB_FAIL(ret)) { + } else if (ele_count > 1) { + ret = OB_ERR_INVALID_XML_CHILD_NAME; + LOG_WARN("value doc invalid.", K(ret)); + } else if (OB_ISNULL(value_ele = static_cast(value_doc->at(ele_index)))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get value element failed.", K(ret)); + } else { + ObString value_key; + if (OB_FAIL(value_ele->get_key(value_key))) { + LOG_WARN("get key failed.", K(ret)); + } else if (child_str.compare(value_key) != 0) { + ret = OB_ERR_INVALID_XML_CHILD_NAME; + LOG_WARN("child str and element key is't equal.", K(ret)); + } + } + } + + if (OB_SUCC(ret) && OB_FAIL(xpath_iter.open())) { + LOG_WARN("fail to open xpath iterator", K(ret)); + ObXMLExprHelper::replace_xpath_ret_code(ret); + } + + while (OB_SUCC(ret)) { + if (OB_FAIL(xpath_iter.get_next_node(node))) { + if (ret != OB_ITER_END) { + LOG_WARN("fail to get next xml node", K(ret)); + } + } else if (node->type() != ObMulModeNodeType::M_ELEMENT && + node->type() != ObMulModeNodeType::M_CONTENT && + node->type() != ObMulModeNodeType::M_DOCUMENT) { + // do nothing + } else if (OB_FAIL(res_array.push_back(node))) { + LOG_WARN("fail to push xml node", K(ret)); + } + } + + if (ret == OB_ITER_END) { + ret = OB_SUCCESS; + } + + for (int i = 0; OB_SUCC(ret) && i < res_array.size(); i++) { + ObIMulModeBase* insert_node = res_array[i]; + if (OB_ISNULL(insert_node)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("xpath result node is null", K(ret)); + } else if (is_insert_attributes) { // add attributes + if (OB_FAIL(insert_attributes_node(child_str, value_str, insert_node))) { + LOG_WARN("fail to insert attributes node", K(ret), K(child_str), K(value_str)); + } + } else { + ObIMulModeBase *value_doc = NULL; + if (OB_FAIL(ObXMLExprHelper::get_xml_base(mem_ctx, value_datum, CS_TYPE_INVALID, ObNodeMemType::TREE_TYPE, value_doc))) { + LOG_WARN("fail to parse xml doc", K(ret)); + } else if (OB_FAIL(insert_element_node(allocator, insert_node, value_doc->at(ele_index)))) { + LOG_WARN("fail to insert element node", K(ret)); + } + } + } + + int tmp_ret = OB_SUCCESS; + if (OB_SUCCESS != (tmp_ret = xpath_iter.close())) { + LOG_WARN("fail to close xpath iter", K(tmp_ret)); + ret = COVER_SUCC(tmp_ret); + } + + return ret; +} + +int ObExprInsertChildXml::insert_element_node(ObArenaAllocator &allocator, + ObIMulModeBase *insert_node, + ObIMulModeBase *value_node) +{ + int ret = OB_SUCCESS; + ObMulModeNodeType insert_type = ObMulModeNodeType::M_MAX_TYPE; + ObXmlElement *value_ele = NULL; + if (OB_ISNULL(insert_node)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get insert node null", K(ret)); + } else if (OB_FALSE_IT(insert_type = insert_node->type())) { + } else if (value_node->type() != M_ELEMENT || OB_ISNULL(value_ele = static_cast(value_node))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get value node null", K(ret)); + } else if (insert_type == M_ELEMENT) { + ObXmlElement *insert = NULL; + if (OB_ISNULL(insert = static_cast(insert_node))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get insert node null", K(ret)); + } else if (OB_FAIL(ObXMLExprHelper::update_new_nodes_ns(allocator, insert, value_ele))) { + LOG_WARN("fail to update new node ns", K(ret)); + } else if (OB_FAIL(insert->add_element(value_ele))) { + LOG_WARN("failed to add element.", K(ret)); + } + } else if (insert_type == M_DOCUMENT || insert_type == M_CONTENT) { + ObXmlDocument *insert = NULL; + if (OB_ISNULL(insert = static_cast(insert_node))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get insert node null", K(ret)); + } else if (OB_FAIL(insert->add_element(value_ele))) { + LOG_WARN("failed to add element.", K(ret)); + } + } else { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("insert node type invalid.", K(ret), K(insert_type)); + } + + return ret; +} + +int ObExprInsertChildXml::insert_attributes_node(ObString key_str, + ObString value_str, + ObIMulModeBase *insert_node) +{ + int ret = OB_SUCCESS; + ObXmlElement *element = NULL; + if (OB_ISNULL(insert_node)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get insert node null", K(ret)); + } else if (OB_ISNULL(element = static_cast(insert_node))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get insert node null", K(ret)); + } else { + int64_t count = insert_node->attribute_size(); + for (int64_t i = 0; OB_SUCC(ret) && i < count; i++) { + ObXmlAttribute *att = nullptr; + if (OB_ISNULL(insert_node->attribute_at(i))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get attribute null", K(ret), K(i)); + } else if (insert_node->attribute_at(i)->type() != M_ATTRIBUTE) { + // do nothing + } else if (OB_ISNULL(att = static_cast(insert_node->attribute_at(i)))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("cast to attribute null", K(ret), K(i)); + } else if (key_str.compare(att->get_key()) == 0) { + ObString ele_err_info = element->get_key(); + ret = OB_ERR_XML_PARENT_ALREADY_CONTAINS_CHILD; + LOG_WARN("Parent already contains child entry", K(ret), K(i), K(value_str), K(key_str)); + LOG_USER_ERROR(OB_ERR_XML_PARENT_ALREADY_CONTAINS_CHILD, ele_err_info.length(), ele_err_info.ptr(), "@", key_str.length(), key_str.ptr()); + } + } + if (OB_SUCC(ret) && OB_FAIL(element->add_attr_by_str(key_str, value_str, ObMulModeNodeType::M_ATTRIBUTE))) { + LOG_WARN("add element failed", K(ret), K(key_str), K(value_str)); + } + } + + return ret; +} + +int ObExprInsertChildXml::check_child_expr(const ObExpr &expr, + ObEvalCtx &ctx, + ObArenaAllocator &allocator, + ObMulModeMemCtx* mem_ctx, + ObString &child_str, + ObString &value_str, + bool &is_insert_attributes) +{ + int ret = OB_SUCCESS; + const ObExpr *value_expr = expr.args_[3]; + ObXmlElement *value_ele = NULL; + ObDatum *value_datum = NULL; + ObIMulModeBase *value_doc = NULL; + if (child_str.empty()) { + ret = OB_ERR_INVALID_XML_CHILD_NAME; + LOG_WARN("invalid xml child name.", K(ret)); + } else if (OB_ISNULL(value_expr)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get value expr unexpected.", K(ret)); + } else if (value_expr->datum_meta_.type_ != ObUserDefinedSQLType) { + if (child_str.ptr()[0] != '@') { + if (is_first_char_attribute(child_str)) { + ret = OB_ERR_XML_PARSE; + LOG_WARN("child str invalid.", K(ret)); + } else { + ret = OB_ERR_INVALID_XPATH_EXPRESSION; + LOG_WARN("get invalid value type.", K(ret)); + } + } else if (OB_FAIL(ObXMLExprHelper::get_str_from_expr(value_expr, ctx, value_str, allocator))) { + LOG_WARN("fail to get value str", K(ret)); + } else { + is_insert_attributes = true; + child_str = ObString(child_str.length() - 1, child_str.ptr() + 1); + } + } + + return ret; +} + +bool ObExprInsertChildXml::is_first_char_attribute(ObString child_str) +{ + bool res = false; + char *ptr = child_str.ptr(); + bool get_next_char = true; + for (int i = 0; get_next_char && i < child_str.length(); i++) { + if (ptr[i] == ' ') { + // do nothing + } else if (ptr[i] == '@') { + res = true; + get_next_char = false; + } else { + get_next_char = false; + } + } + return res; +} + +int ObExprInsertChildXml::cg_expr(ObExprCGCtx &expr_cg_ctx, const ObRawExpr &raw_expr, ObExpr &rt_expr) const +{ + UNUSED(expr_cg_ctx); + UNUSED(raw_expr); + rt_expr.eval_func_ = eval_insert_child_xml; + return OB_SUCCESS; +} + +} // end of sql +} // end of oceanbase \ No newline at end of file diff --git a/src/sql/engine/expr/ob_expr_insert_child_xml.h b/src/sql/engine/expr/ob_expr_insert_child_xml.h new file mode 100644 index 0000000000..c5824216da --- /dev/null +++ b/src/sql/engine/expr/ob_expr_insert_child_xml.h @@ -0,0 +1,73 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + * This file is for func insertchildxml. + */ + +#ifndef OCEANBASE_SQL_ENGINE_EXPR_OB_EXPR_INSERT_CHILD_XML_H +#define OCEANBASE_SQL_ENGINE_EXPR_OB_EXPR_INSERT_CHILD_XML_H + +#include "sql/engine/expr/ob_expr_operator.h" +#include "lib/xml/ob_xpath.h" + +namespace oceanbase +{ + +namespace sql +{ +class ObExprInsertChildXml : public ObFuncExprOperator +{ +public: + explicit ObExprInsertChildXml(common::ObIAllocator &alloc); + virtual ~ObExprInsertChildXml(); + virtual int calc_result_typeN(ObExprResType &type, + ObExprResType *types, + int64_t param_num, + common::ObExprTypeCtx &type_ctx) const override; + + static int eval_insert_child_xml(const ObExpr &expr, ObEvalCtx &ctx, ObDatum &res); + virtual int cg_expr(ObExprCGCtx &expr_cg_ctx, + const ObRawExpr &raw_expr, + ObExpr &rt_expr) + const override; +private: + static int insert_child_xml(const ObExpr &expr, + ObEvalCtx &ctx, + ObMulModeMemCtx* mem_ctx, + ObArenaAllocator &allocator, + ObPathExprIter &xpath_iter, + ObString child_str, + ObString value_str, + bool is_insert_attributes); + + static int check_child_expr(const ObExpr &expr, + ObEvalCtx &ctx, + ObArenaAllocator &allocator, + ObMulModeMemCtx* mem_ctx, + ObString &child_str, + ObString &value_str, + bool &is_insert_attributes); + static bool is_first_char_attribute(ObString child_str); + + static int insert_element_node(ObArenaAllocator &allocator, ObIMulModeBase *insert_node, ObIMulModeBase *value_node); + + static int insert_attributes_node(ObString key_str, + ObString value_str, + ObIMulModeBase *insert_node); + +private: + DISALLOW_COPY_AND_ASSIGN(ObExprInsertChildXml); +}; + +} // sql +} // oceanbase + + +#endif // OCEANBASE_SQL_ENGINE_EXPR_OB_EXPR_EXRACTVALUE_H \ No newline at end of file diff --git a/src/sql/engine/expr/ob_expr_json_append.cpp b/src/sql/engine/expr/ob_expr_json_append.cpp new file mode 100644 index 0000000000..c71a748503 --- /dev/null +++ b/src/sql/engine/expr/ob_expr_json_append.cpp @@ -0,0 +1,42 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + * This file is for implementation of func json_append + */ + + +#define USING_LOG_PREFIX SQL_ENG +#include "ob_expr_json_append.h" +#include "sql/engine/expr/ob_expr_json_func_helper.h" + +using namespace oceanbase::common; +using namespace oceanbase::sql; + +namespace oceanbase +{ +namespace sql +{ + +ObExprJsonAppend::ObExprJsonAppend(ObIAllocator &alloc) + : ObExprJsonArrayAppend(alloc, + T_FUN_SYS_JSON_APPEND, + N_JSON_APPEND, + MORE_THAN_TWO, + VALID_FOR_GENERATED_COL, + NOT_ROW_DIMENSION) +{ +} + +ObExprJsonAppend::~ObExprJsonAppend() +{ +} + +} +} \ No newline at end of file diff --git a/src/sql/engine/expr/ob_expr_json_append.h b/src/sql/engine/expr/ob_expr_json_append.h new file mode 100644 index 0000000000..ad99d6698a --- /dev/null +++ b/src/sql/engine/expr/ob_expr_json_append.h @@ -0,0 +1,37 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + * This file is for define of func json_append + */ + +#ifndef OCEANBASE_SQL_OB_EXPR_JSON_APPEND_H_ +#define OCEANBASE_SQL_OB_EXPR_JSON_APPEND_H_ + +#include "sql/engine/expr/ob_expr_operator.h" +#include "sql/engine/expr/ob_expr_json_array_append.h" + +using namespace oceanbase::common; + +namespace oceanbase +{ +namespace sql +{ +class ObExprJsonAppend : public ObExprJsonArrayAppend +{ +public: + explicit ObExprJsonAppend(common::ObIAllocator &alloc); + virtual ~ObExprJsonAppend(); +private: + DISALLOW_COPY_AND_ASSIGN(ObExprJsonAppend); +}; + +} // sql +} // oceanbase +#endif // OCEANBASE_SQL_OB_EXPR_JSON_APPEND_H_ \ No newline at end of file diff --git a/src/sql/engine/expr/ob_expr_json_array.cpp b/src/sql/engine/expr/ob_expr_json_array.cpp index d03ae7bac7..f2ff05b83f 100644 --- a/src/sql/engine/expr/ob_expr_json_array.cpp +++ b/src/sql/engine/expr/ob_expr_json_array.cpp @@ -14,6 +14,7 @@ #define USING_LOG_PREFIX SQL_ENG #include "ob_expr_json_array.h" +#include "share/ob_json_access_utils.h" #include "sql/engine/ob_exec_context.h" #include "sql/engine/expr/ob_expr_json_func_helper.h" @@ -214,7 +215,7 @@ int ObExprJsonArray::eval_ora_json_array(const ObExpr &expr, ObEvalCtx &ctx, ObD ret = OB_ERR_JSON_OUT_OF_DEPTH; LOG_WARN("current json over depth", K(ret), K(j_arr.depth())); } else if (dst_type == ObJsonType) { - if (OB_FAIL(j_arr.get_raw_binary(res_string, &temp_allocator))) { + if (OB_FAIL(ObJsonWrapper::get_raw_binary(&j_arr, res_string, &temp_allocator))) { LOG_WARN("failed: get json raw binary", K(ret)); } } else { @@ -284,7 +285,7 @@ int ObExprJsonArray::eval_json_array(const ObExpr &expr, ObEvalCtx &ctx, ObDatum if (ObJsonParser::is_json_doc_over_depth(j_arr.depth())) { ret = OB_ERR_JSON_OUT_OF_DEPTH; LOG_WARN("current json over depth", K(ret), K(j_arr.depth())); - } else if (OB_FAIL(j_base->get_raw_binary(raw_bin, &temp_allocator))) { + } else if (OB_FAIL(ObJsonWrapper::get_raw_binary(j_base, raw_bin, &temp_allocator))) { LOG_WARN("failed: json get binary", K(ret)); } else if (OB_FAIL(ObJsonExprHelper::pack_json_str_res(expr, ctx, res, raw_bin))) { LOG_WARN("fail to pack json result", K(ret)); diff --git a/src/sql/engine/expr/ob_expr_json_array_append.cpp b/src/sql/engine/expr/ob_expr_json_array_append.cpp index aecd156b27..7f63149fcd 100644 --- a/src/sql/engine/expr/ob_expr_json_array_append.cpp +++ b/src/sql/engine/expr/ob_expr_json_array_append.cpp @@ -14,6 +14,7 @@ #define USING_LOG_PREFIX SQL_ENG #include "ob_expr_json_array_append.h" +#include "share/ob_json_access_utils.h" #include "sql/engine/expr/ob_expr_json_func_helper.h" using namespace oceanbase::common; @@ -32,6 +33,16 @@ ObExprJsonArrayAppend::ObExprJsonArrayAppend(ObIAllocator &alloc) { } +ObExprJsonArrayAppend::ObExprJsonArrayAppend( + ObIAllocator &alloc, + ObExprOperatorType type, + const char *name, + int32_t param_num, + ObValidForGeneratedColFlag valid_for_generated_col, + int32_t dimension) : ObFuncExprOperator(alloc, type, name, param_num, valid_for_generated_col, dimension) +{ +} + ObExprJsonArrayAppend::~ObExprJsonArrayAppend() { } @@ -86,7 +97,7 @@ int ObExprJsonArrayAppend::eval_json_array_append(const ObExpr &expr, ObEvalCtx common::ObArenaAllocator &temp_allocator = tmp_alloc_g.get_allocator(); ObIJsonBase *j_base = NULL; bool is_null = false; - ObJsonBaseVector hit; + ObJsonSeekResult hit; if (expr.datum_meta_.cs_type_ != CS_TYPE_UTF8MB4_BIN) { ret = OB_ERR_INVALID_JSON_CHARSET; @@ -176,7 +187,7 @@ int ObExprJsonArrayAppend::eval_json_array_append(const ObExpr &expr, ObEvalCtx ObString raw_bin; if (is_null) { res.set_null(); - } else if (OB_FAIL(j_base->get_raw_binary(raw_bin, &temp_allocator))) { + } else if (OB_FAIL(ObJsonWrapper::get_raw_binary(j_base, raw_bin, &temp_allocator))) { LOG_WARN("failed: get json raw binary", K(ret)); } else if (OB_FAIL(ObJsonExprHelper::pack_json_str_res(expr, ctx, res, raw_bin))) { LOG_WARN("fail to pack json result", K(ret)); diff --git a/src/sql/engine/expr/ob_expr_json_array_append.h b/src/sql/engine/expr/ob_expr_json_array_append.h index cfbccaa465..2c1a0c7417 100644 --- a/src/sql/engine/expr/ob_expr_json_array_append.h +++ b/src/sql/engine/expr/ob_expr_json_array_append.h @@ -26,6 +26,12 @@ class ObExprJsonArrayAppend : public ObFuncExprOperator { public: explicit ObExprJsonArrayAppend(common::ObIAllocator &alloc); + explicit ObExprJsonArrayAppend(common::ObIAllocator &alloc, + ObExprOperatorType type, + const char *name, + int32_t param_num, + ObValidForGeneratedColFlag valid_for_generated_col, + int32_t dimension); virtual ~ObExprJsonArrayAppend(); virtual int calc_result_typeN(ObExprResType& type, diff --git a/src/sql/engine/expr/ob_expr_json_array_insert.cpp b/src/sql/engine/expr/ob_expr_json_array_insert.cpp index 04818c6ed6..e13ee34689 100644 --- a/src/sql/engine/expr/ob_expr_json_array_insert.cpp +++ b/src/sql/engine/expr/ob_expr_json_array_insert.cpp @@ -13,6 +13,7 @@ #define USING_LOG_PREFIX SQL_ENG #include "ob_expr_json_array_insert.h" +#include "share/ob_json_access_utils.h" #include "sql/engine/expr/ob_expr_json_func_helper.h" using namespace oceanbase::common; @@ -83,7 +84,7 @@ int ObExprJsonArrayInsert::eval_json_array_insert(const ObExpr &expr, ObEvalCtx common::ObArenaAllocator &temp_allocator = tmp_alloc_g.get_allocator(); ObIJsonBase *j_base = NULL; bool is_null = false; - ObJsonBaseVector hit; + ObJsonSeekResult hit; if (expr.datum_meta_.cs_type_ != CS_TYPE_UTF8MB4_BIN) { ret = OB_ERR_INVALID_JSON_CHARSET; @@ -148,7 +149,7 @@ int ObExprJsonArrayInsert::eval_json_array_insert(const ObExpr &expr, ObEvalCtx ObString raw_bin; if (is_null) { res.set_null(); - } else if (OB_FAIL(j_base->get_raw_binary(raw_bin, &temp_allocator))) { + } else if (OB_FAIL(ObJsonWrapper::get_raw_binary(j_base, raw_bin, &temp_allocator))) { LOG_WARN("failed: get json raw binary", K(ret)); } else if (OB_FAIL(ObJsonExprHelper::pack_json_str_res(expr, ctx, res, raw_bin))) { LOG_WARN("fail to pack json result", K(ret)); diff --git a/src/sql/engine/expr/ob_expr_json_contains.cpp b/src/sql/engine/expr/ob_expr_json_contains.cpp index 7b74425ba0..e1e11a1366 100644 --- a/src/sql/engine/expr/ob_expr_json_contains.cpp +++ b/src/sql/engine/expr/ob_expr_json_contains.cpp @@ -96,7 +96,7 @@ int ObExprJsonContains::eval_json_contains(const ObExpr &expr, ObEvalCtx &ctx, O } else if (expr.args_[2]->datum_meta_.type_ == ObNullType || path_data->is_null()) { is_null_result = true; } else { - ObJsonBaseVector sub_json_targets; + ObJsonSeekResult sub_json_targets; ObString path_val = path_data->get_string(); ObJsonPath *json_path; if (OB_FAIL(ObJsonExprHelper::get_json_or_str_data(expr.args_[2], ctx, temp_allocator, path_val, is_null_result))) { diff --git a/src/sql/engine/expr/ob_expr_json_contains_path.cpp b/src/sql/engine/expr/ob_expr_json_contains_path.cpp index ca9915f0a0..4cb1dbaca7 100644 --- a/src/sql/engine/expr/ob_expr_json_contains_path.cpp +++ b/src/sql/engine/expr/ob_expr_json_contains_path.cpp @@ -113,7 +113,7 @@ int ObExprJsonContainsPath::eval_json_contains_path(const ObExpr &expr, path_cache = ((path_cache != NULL) ? path_cache : &ctx_cache); for (int64_t i = 2; OB_SUCC(ret) && i < expr.arg_cnt_ && !is_null_result; i++) { - ObJsonBaseVector hit; + ObJsonSeekResult hit; ObDatum *path_data = NULL; if (OB_FAIL(expr.args_[i]->eval(ctx, path_data))) { LOG_WARN("eval json path datum failed", K(ret)); diff --git a/src/sql/engine/expr/ob_expr_json_equal.cpp b/src/sql/engine/expr/ob_expr_json_equal.cpp index 39a9689e8b..0f82e7e0b2 100644 --- a/src/sql/engine/expr/ob_expr_json_equal.cpp +++ b/src/sql/engine/expr/ob_expr_json_equal.cpp @@ -109,7 +109,7 @@ int ObExprJsonEqual::eval_json_equal(const ObExpr &expr, ObEvalCtx &ctx, ObDatum if (ret == OB_ERR_JSON_SYNTAX_ERROR) is_cover_by_error = true; LOG_WARN("get_json_doc failed", K(ret)); // if is scalar, must be json type - } else if(!is_null_result && ((is_json_scalar(json_target) && expr.args_[0]->datum_meta_.type_ != ObJsonType) + } else if(!is_null_result && ((is_json_scalar(json_target) && expr.args_[0]->datum_meta_.type_ != ObJsonType) || (is_json_scalar(json_candidate) && expr.args_[1]->datum_meta_.type_ != ObJsonType))) { ret = OB_ERR_JSON_SYNTAX_ERROR; is_cover_by_error = true; @@ -168,7 +168,7 @@ int ObExprJsonEqual::eval_json_equal(const ObExpr &expr, ObEvalCtx &ctx, ObDatum ret = OB_ERR_JSON_SYNTAX_ERROR; } } else if (is_null_result) { - res.set_null(); + res.set_int(static_cast(false)); } else { is_equal = (compare_res == 0); res.set_int(static_cast(is_equal)); diff --git a/src/sql/engine/expr/ob_expr_json_exists.cpp b/src/sql/engine/expr/ob_expr_json_exists.cpp index db8d63cd59..e2e9b0e3bf 100644 --- a/src/sql/engine/expr/ob_expr_json_exists.cpp +++ b/src/sql/engine/expr/ob_expr_json_exists.cpp @@ -148,7 +148,7 @@ int ObExprJsonExists::get_path(const ObExpr &expr, ObEvalCtx &ctx, if (OB_FAIL(json_arg->eval(ctx, json_datum))) { LOG_WARN("eval json arg failed", K(ret)); } else if (type == ObNullType || json_datum->is_null()) { - // path为空时会报错 + // path is null will return error // ORA-40442: JSON path expression syntax error ret = OB_ERR_JSON_PATH_SYNTAX_ERROR; LOG_WARN("JSON path expression syntax error ('')", K(ret)); @@ -176,6 +176,7 @@ int ObExprJsonExists::get_path(const ObExpr &expr, ObEvalCtx &ctx, return ret; } + int ObExprJsonExists::get_var_data(const ObExpr &expr, ObEvalCtx &ctx, common::ObArenaAllocator &allocator, uint16_t index, ObIJsonBase*& j_base) { @@ -191,9 +192,6 @@ int ObExprJsonExists::get_var_data(const ObExpr &expr, ObEvalCtx &ctx, common::O } else if (OB_ISNULL(json_datum)) { ret = OB_ERR_UNEXPECTED; LOG_WARN("eval json arg failed", K(ret)); - } else if (val_type == ObNullType) { - ret = OB_ERR_INVALID_VARIABLE_IN_JSON_PATH; - LOG_USER_ERROR(OB_ERR_INVALID_VARIABLE_IN_JSON_PATH); } else if (json_datum->is_null()) { if (ob_is_string_type(val_type)) { ObJsonString* tmp_ans = static_cast (allocator.alloc(sizeof(ObJsonString))); @@ -214,138 +212,30 @@ int ObExprJsonExists::get_var_data(const ObExpr &expr, ObEvalCtx &ctx, common::O j_base = tmp_ans; } } - } else if (val_type == ObJsonType) { - bool is_json_null = false; - if (OB_FAIL(ObJsonExprHelper::get_json_doc(expr, ctx, allocator, index, j_base, is_json_null, true, true))) { - LOG_WARN("parse json_data fail", K(ret)); - } - } else if (ob_is_string_type(val_type)) { - ObString j_str; - bool is_null = false; - if (OB_FAIL(ObJsonExprHelper::get_json_or_str_data(json_arg, ctx, allocator, j_str, is_null))) { - LOG_WARN("fail to get real data.", K(ret), K(j_str)); - } else if (is_null) { - ObJsonNull* tmp_ans = static_cast (allocator.alloc(sizeof(ObJsonNull))); - if (OB_ISNULL(tmp_ans)) { - ret = OB_ALLOCATE_MEMORY_FAILED; - LOG_WARN("allocate row buffer failed at ObJsonNull", K(ret)); - } else { - tmp_ans = new (tmp_ans) ObJsonNull(); - j_base = tmp_ans; - } - } else { - ObJsonString* tmp_ans = static_cast (allocator.alloc(sizeof(ObJsonString))); - if (OB_ISNULL(tmp_ans)) { - ret = OB_ALLOCATE_MEMORY_FAILED; - LOG_WARN("allocate row buffer failed at ObJsonString", K(ret)); - } else { - tmp_ans = new (tmp_ans) ObJsonString(j_str.ptr(), j_str.length()); - j_base = tmp_ans; - } - } - } else if (ObTinyIntType <= val_type && val_type <= ObUInt64Type) { - // int - ObJsonInt* tmp_ans = static_cast (allocator.alloc(sizeof(ObJsonInt))); - if (OB_ISNULL(tmp_ans)) { - ret = OB_ALLOCATE_MEMORY_FAILED; - LOG_WARN("allocate row buffer failed at ObJsonInt", K(ret)); - } else { - tmp_ans = new (tmp_ans) ObJsonInt(json_datum->get_int()); - j_base = tmp_ans; - } - } else if (ObFloatType <= val_type && val_type <= ObUDoubleType) { - // double - if (val_type == ObUFloatType || val_type == ObFloatType) { - ObJsonOFloat* tmp_ans = static_cast (allocator.alloc(sizeof(ObJsonOFloat))); - if (OB_ISNULL(tmp_ans)) { - ret = OB_ALLOCATE_MEMORY_FAILED; - LOG_WARN("allocate row buffer failed at ObJsonDouble", K(ret)); - } else { - tmp_ans = new (tmp_ans) ObJsonOFloat(json_datum->get_float()); - j_base = tmp_ans; - } - } else { - ObJsonDouble* tmp_ans = static_cast (allocator.alloc(sizeof(ObJsonDouble))); - if (OB_ISNULL(tmp_ans)) { - ret = OB_ALLOCATE_MEMORY_FAILED; - LOG_WARN("allocate row buffer failed at ObJsonDouble", K(ret)); - } else { - tmp_ans = new (tmp_ans) ObJsonDouble(json_datum->get_double()); - j_base = tmp_ans; - } - } - } else if ((ObNumberType <= val_type && val_type <= ObUNumberType) || (val_type == ObDecimalIntType)) { - // decimal - ObJsonDecimal* tmp_ans = static_cast (allocator.alloc(sizeof(ObJsonDecimal))); - if (OB_ISNULL(tmp_ans)) { - ret = OB_ALLOCATE_MEMORY_FAILED; - LOG_WARN("allocate row buffer failed at ObJsonDecimal", K(ret)); - } else if (val_type != ObDecimalIntType){ - tmp_ans = new (tmp_ans) ObJsonDecimal(json_datum->get_number()); - j_base = tmp_ans; - } else { - ObNumber tmp_nmb; - if (OB_FAIL(wide::to_number(json_datum->get_decimal_int(), json_datum->get_int_bytes(), - dec_scale, allocator, tmp_nmb))) { - LOG_WARN("to number faile", K(ret)); - } else { - tmp_ans = new (tmp_ans) ObJsonDecimal(tmp_nmb); - j_base = tmp_ans; - } - } - } else if (val_type == ObDateTimeType || val_type == ObDateType || val_type == ObTimeType) { - // datetime - ObTime ob_time; - int64_t date = json_datum->get_datetime(); - ObString j_str = json_datum->get_string(); - if (OB_FAIL(ObTimeConverter::usec_to_ob_time(json_datum->get_datetime(), ob_time))) { - LOG_WARN("fail to cast int to ob_time", K(ret)); - } else { - ObJsonDatetime* tmp_ans = static_cast (allocator.alloc(sizeof(ObJsonDatetime))); - if (OB_ISNULL(tmp_ans)) { - ret = OB_ALLOCATE_MEMORY_FAILED; - LOG_WARN("allocate row buffer failed at ObJsonDecimal", K(ret)); - } else { - tmp_ans = new (tmp_ans) ObJsonDatetime(ObJsonNodeType::J_DATE, ob_time); - j_base = tmp_ans; - } - } - } else if (ObTimestampTZType == val_type) { - ObTime ob_time; - ObOTimestampData date = json_datum->get_otimestamp_tiny(); - ObString j_str = json_datum->get_string(); - if (OB_FAIL(ObTimeConverter::otimestamp_to_ob_time(val_type, json_datum->get_otimestamp_tz(), NULL, ob_time))) { - LOG_WARN("fail to cast int to ob_time", K(ret)); - } else { - ObJsonDatetime* tmp_ans = static_cast (allocator.alloc(sizeof(ObJsonDatetime))); - if (OB_ISNULL(tmp_ans)) { - ret = OB_ALLOCATE_MEMORY_FAILED; - LOG_WARN("allocate row buffer failed at ObJsonDecimal", K(ret)); - } else { - tmp_ans = new (tmp_ans) ObJsonDatetime(ob_time, ObTimestampType); - j_base = tmp_ans; - } - } - } else if (ObTimestampLTZType == val_type || ObTimestampNanoType == val_type) { - ObTime ob_time; - ObOTimestampData date = json_datum->get_otimestamp_tiny(); - ObString j_str = json_datum->get_string(); - if (OB_FAIL(ObTimeConverter::otimestamp_to_ob_time(val_type, json_datum->get_otimestamp_tiny(), NULL, ob_time))) { - LOG_WARN("fail to cast int to ob_time", K(ret)); - } else { - ObJsonDatetime* tmp_ans = static_cast (allocator.alloc(sizeof(ObJsonDatetime))); - if (OB_ISNULL(tmp_ans)) { - ret = OB_ALLOCATE_MEMORY_FAILED; - LOG_WARN("allocate row buffer failed at ObJsonDecimal", K(ret)); - } else { - tmp_ans = new (tmp_ans) ObJsonDatetime(ob_time, ObTimestampType); - j_base = tmp_ans; - } + } else if (ObJsonExprHelper::is_convertible_to_json(val_type)) { + ObCollationType cs_type = json_arg->datum_meta_.cs_type_; + if (OB_FAIL(ObJsonExprHelper::transform_convertible_2jsonBase(*json_datum, val_type, + &allocator, cs_type, + j_base, ObConv2JsonParam(true, + json_arg->obj_meta_.has_lob_header(), + false, + true, + false)))) { + LOG_WARN("failed: parse value to jsonBase", K(ret), K(val_type)); } } else { - bool is_json_null = false; - if (OB_FAIL(ObJsonExprHelper::get_json_doc(expr, ctx, allocator, index, j_base, is_json_null, true, true))) { - LOG_WARN("parse json_data fail", K(ret)); + ObBasicSessionInfo *session = ctx.exec_ctx_.get_my_session(); + ObScale scale = json_arg->datum_meta_.scale_; + scale = (val_type == ObBitType) ? json_arg->datum_meta_.length_semantics_ : scale; + if (OB_ISNULL(session)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("session is NULL", K(ret)); + } else if (OB_FAIL(ObJsonExprHelper::transform_scalar_2jsonBase(*json_datum, val_type, + &allocator, scale, + session->get_timezone_info(), + session, + j_base, true))) { + LOG_WARN("failed: parse value to jsonBase", K(ret), K(val_type)); } } return ret; @@ -364,8 +254,11 @@ int ObExprJsonExists::get_passing(const ObExpr &expr, ObEvalCtx &ctx, PassingMap ObIJsonBase *json_data = nullptr; // get json_value, value could be null - if (OB_FAIL(get_var_data(expr, ctx, temp_allocator, i, json_data))) { - LOG_WARN("get_json_doc failed", K(ret)); + if (type == ObNullType) { + ret = OB_ERR_INVALID_VARIABLE_IN_JSON_PATH; + LOG_USER_ERROR(OB_ERR_INVALID_VARIABLE_IN_JSON_PATH); + } else if (OB_FAIL(get_var_data(expr, ctx, temp_allocator, i, json_data))) { + LOG_WARN("fail to get json val", K(ret)); } else { // get keyname, keyname can't be null json_arg = expr.args_[i + 1]; @@ -387,7 +280,7 @@ int ObExprJsonExists::get_passing(const ObExpr &expr, ObEvalCtx &ctx, PassingMap ret = OB_ERR_JSON_ILLEGAL_ZERO_LENGTH_IDENTIFIER_ERROR; LOG_USER_ERROR(OB_ERR_JSON_ILLEGAL_ZERO_LENGTH_IDENTIFIER_ERROR); } else { - // Oracle中,若有两个相同keyname,后者的value会覆盖前者 + // In Oracle, if there are two identical keynames, the value of the latter will overwrite the former if (OB_FAIL(pass_map.set_refactored(keyname, json_data, 1))) { LOG_WARN("fail to set k-v for passing", K(i)); } @@ -432,65 +325,87 @@ int ObExprJsonExists::get_error_or_empty(const ObExpr &expr, ObEvalCtx &ctx, uin return ret; } -int ObExprJsonExists::set_result(ObDatum &res, const ObJsonBaseVector& hit, +int ObExprJsonExists::get_empty_option(int8_t option_on_empty, bool& res_val) +{ + INIT_SUCC(ret); + switch (option_on_empty) { + // The on empty clause has two problems + // 1. The on empty clause of json_exists, its track diagram is inconsistent with the document description: + // a. The on empty clause in the document includes: NULL ON EMPTY, ERROR ON EMPTY, DEFAULT literal ON EMPTY + // b. The on empty clause in the track diagram includes: ERROR ON EMPTY, FALSE ON EMPTY, TRUE ON EMPTY + // c. During the actual test, it is found that inputting NULL ON EMPTY will report a syntax error, and the modification clause cannot be recognized, so implement it according to the track diagram + // 2. According to the documentation, the on error of json_exists mainly checks whether the json data is wrong, and returns the response result. + // But the on empty clause cannot find the correct documentation for the corresponding clause due to reason 1. + // According to: whether the query result is empty / whether the path expression is empty / whether the json data is empty These three situations have been tried + // It turns out that error/false/true on empty behaves exactly the same in three cases: + // a. The query result or json data is empty, and all three on empty clauses return false + // b. The path expression is empty, and the three on empty clauses all return the error code when the path is empty + // Therefore, when implementing, follow the behavior of oracle, without distinguishing the three clauses, as long as there is no true result, it will return false + case OB_JSON_ERROR_ON_EMPTY: + case OB_JSON_FALSE_ON_EMPTY: + case OB_JSON_TRUE_ON_EMPTY: + case OB_JSON_DEFAULT_ON_ERROR: { + res_val = false; + break; + } + default: { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("on_empty_option type error", K(option_on_empty), K(ret)); + } + } + return ret; +} + +int ObExprJsonExists::get_error_option(int8_t option_on_error, bool& res_val) +{ + INIT_SUCC(ret); + switch (option_on_error) { + case OB_JSON_ERROR_ON_ERROR: { + ret = OB_ERR_JSON_SYNTAX_ERROR; + LOG_USER_ERROR(OB_ERR_JSON_SYNTAX_ERROR); + break; + } + case OB_JSON_FALSE_ON_ERROR: { + ret = OB_SUCCESS; + res_val = false; + break; + } + case OB_JSON_TRUE_ON_ERROR: { + ret = OB_SUCCESS; + res_val = true; + break; + } + default: { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("on_empty_option type error", K(option_on_error), K(ret)); + } + } + return ret; +} + +int ObExprJsonExists::set_result(ObDatum &res, ObJsonSeekResult& hit, const uint8_t option_on_error, const uint8_t option_on_empty, const bool is_cover_by_error, const bool is_null_json) { INIT_SUCC(ret); + bool res_val = false; if (is_null_json) { - res.set_bool(false); } else if (is_cover_by_error) { - switch (option_on_error) { - case OB_JSON_ERROR_ON_ERROR: { - ret = OB_ERR_JSON_SYNTAX_ERROR; - LOG_USER_ERROR(OB_ERR_JSON_SYNTAX_ERROR); - break; - } - case OB_JSON_FALSE_ON_ERROR: { - ret = OB_SUCCESS; - res.set_bool(false); - break; - } - case OB_JSON_TRUE_ON_ERROR: { - ret = OB_SUCCESS; - res.set_bool(true); - break; - } - default: { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("on_empty_option type error", K(option_on_empty), K(ret)); - } + if (OB_FAIL(get_error_option(option_on_error, res_val))) { + LOG_WARN("fail to get error option", K(ret)); } } else { if (hit.size() == 0) { - switch (option_on_empty) { - // on empty 子句有两个问题 - // 1. json_exists的on empty子句,其轨道图与文档说明不一致: - // a. 文档中on empty子句包括: NULL ON EMPTY, ERROR ON EMPTY, DEFAULT literal ON EMPTY - // b. 轨道图中on empty子句包括: ERROR ON EMPTY, FALSE ON EMPTY, TRUE ON EMPTY - // c. 实际测试时,发现输入NULL ON EMPTY会报语法错误,无法识别改子句,因此按照轨道图实现 - // 2. json_exists的on error依据文档说明,主要针对json数据是否出错进行检查,并返回响应结果。 - // 但on empty子句因为原因1,无法找到对应子句的正确文档说明。 - // 根据:查询结果是否为空 / path表达式是否为空 / json数据是否为空 这三种情况进行了尝试 - // 结果发现,三种情况下,error/false/true on empty 的行为完全一致: - // a. 查询结果或json数据为空,三种 on empty 子句均返回false - // b. path表达式为空,三种 on empty 子句均返回path为空时的错误码 - // 因此在实现时,遵循了oracle的行为,未对三种子句做区分,只要不存在为true的结果,即返回false - case OB_JSON_ERROR_ON_EMPTY: - case OB_JSON_FALSE_ON_EMPTY: - case OB_JSON_TRUE_ON_EMPTY: { - res.set_bool(false); - break; - } - default: { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("on_empty_option type error", K(option_on_empty), K(ret)); - } + if (OB_FAIL(get_empty_option(option_on_empty, res_val))) { + LOG_WARN("fail to get error option", K(ret)); } } else { - res.set_bool(true); + res_val = true; } } + if (OB_SUCC(ret)) { + res.set_bool(res_val); + } return ret; } @@ -510,11 +425,11 @@ int ObExprJsonExists::eval_json_exists(const ObExpr &expr, ObEvalCtx &ctx, ObDat bool is_cover_by_error = false; uint8_t option_on_error = 0; uint8_t option_on_empty = 0; - ObJsonBaseVector hit; + ObJsonSeekResult hit; // get json - // json 数据为空时不报错,无论如何都不报错,且结果为false - // error on error时,json解析错误时报错,否则不报错(默认时false on error) + // No error is reported when the json data is empty, no error is reported anyway, and the result is false + // When error on error, json parses an error and reports an error, otherwise no error is reported (false on error by default) // ORA-40441: JSON syntax error if (OB_FAIL(ObJsonExprHelper::get_json_doc(expr, ctx, temp_allocator, 0, json_data, is_null_json))) { @@ -550,14 +465,14 @@ int ObExprJsonExists::eval_json_exists(const ObExpr &expr, ObEvalCtx &ctx, ObDat } // get on_error && on_empty - // 若json_data是空, 结果无论如何都是false, 即便是 true on error - // 若json_data解析出错, 结果由on error子句决定 + // if json_data is null, return false whatever, even true on error + // if json_data parse fail, result design by on error clause if (OB_SUCC(ret) || is_cover_by_error) { if (is_null_json) { option_on_error = 0; option_on_empty = 0; } else { - // 防止get_error_or_empty导致错误码被吞 + // Prevent get_error_or_empty from causing error codes to be swallowed int tmp_ret = ret; if (OB_FAIL(get_error_or_empty(expr, ctx, param_num - 2, option_on_error))) { LOG_WARN("fail to get option_on_error for json_exists", K(ret)); diff --git a/src/sql/engine/expr/ob_expr_json_exists.h b/src/sql/engine/expr/ob_expr_json_exists.h index ea890500d3..c505ca00c8 100644 --- a/src/sql/engine/expr/ob_expr_json_exists.h +++ b/src/sql/engine/expr/ob_expr_json_exists.h @@ -32,6 +32,7 @@ public: ObExprResType* types, int64_t param_num, common::ObExprTypeCtx& type_ctx) const override; + virtual bool need_rt_ctx() const override { return true; } static int get_path(const ObExpr &expr, ObEvalCtx &ctx, ObJsonPath* &j_path, common::ObArenaAllocator &allocator, ObJsonPathCache &ctx_cache, ObJsonPathCache* &path_cache); @@ -43,7 +44,9 @@ public: static int get_passing(const ObExpr &expr, ObEvalCtx &ctx, PassingMap &pass_map, uint32_t param_num, common::ObArenaAllocator &temp_allocator); static int get_error_or_empty(const ObExpr &expr, ObEvalCtx &ctx, uint32_t idx, uint8_t &result); - static int set_result(ObDatum &res, const ObJsonBaseVector& hit, + static int get_error_option(int8_t option_on_error, bool& res_val); + static int get_empty_option(int8_t option_on_empty, bool& res_val); + static int set_result(ObDatum &res, ObJsonSeekResult& hit, const uint8_t option_on_error, const uint8_t option_on_empty, const bool is_cover_by_error, const bool is_null_json); static int eval_json_exists(const ObExpr &expr, ObEvalCtx &ctx, ObDatum &res); diff --git a/src/sql/engine/expr/ob_expr_json_extract.cpp b/src/sql/engine/expr/ob_expr_json_extract.cpp index 9e40178ec0..cd592c7a1a 100644 --- a/src/sql/engine/expr/ob_expr_json_extract.cpp +++ b/src/sql/engine/expr/ob_expr_json_extract.cpp @@ -14,7 +14,9 @@ #define USING_LOG_PREFIX SQL_ENG #include "ob_expr_json_extract.h" #include "ob_expr_json_func_helper.h" +#include "share/ob_json_access_utils.h" #include "lib/json_type/ob_json_tree.h" +#include "lib/xml/ob_binary_aggregate.h" using namespace oceanbase::common; using namespace oceanbase::sql; @@ -138,11 +140,15 @@ int ObExprJsonExtract::eval_json_extract(const ObExpr &expr, ObEvalCtx &ctx, ObD } LOG_WARN("fail to handle json param 0 in json extract in new sql engine", K(ret)); } else if (is_null_result == false) { - ObJsonBaseVector hit; + ObJsonSeekResult hit; + ObJsonSeekResult hits; + ObJsonBin res_json(&allocator); + hit.res_point_ = &res_json; ObJsonPathCache ctx_cache(&allocator); ObJsonPathCache* path_cache = ObJsonExprHelper::get_path_cache_ctx(expr.expr_ctx_id_, &ctx.exec_ctx_); path_cache = ((path_cache != NULL) ? path_cache : &ctx_cache); for (int64_t i = 1; OB_SUCC(ret) && (!is_null_result) && i < expr.arg_cnt_; i++) { + hit.reset(); ObDatum *path_data = NULL; if (OB_FAIL(expr.args_[i]->eval(ctx, path_data))) { LOG_WARN("eval json path datum failed", K(ret)); @@ -161,12 +167,18 @@ int ObExprJsonExtract::eval_json_extract(const ObExpr &expr, ObEvalCtx &ctx, ObD if (j_path->can_match_many()) { may_match_many = true; } + for (int64_t j = 0; OB_SUCC(ret) && j < hit.size(); j++) { + if (OB_FAIL(hits.push_node(hit[j]))) { + LOG_WARN("push hit into hits failed.", K(ret), K(j)); + } + } } } } - int32_t hit_size = hit.size(); + int32_t hit_size = hits.size(); ObJsonArray j_arr_res(&allocator); + ObStringBuffer value(&allocator); ObIJsonBase *jb_res = NULL; if (OB_UNLIKELY(OB_FAIL(ret))) { LOG_WARN("json seek failed", K(ret)); @@ -174,30 +186,37 @@ int ObExprJsonExtract::eval_json_extract(const ObExpr &expr, ObEvalCtx &ctx, ObD res.set_null(); } else { if (hit_size == 1 && (may_match_many == false)) { - jb_res = hit[0]; + jb_res = hits[0]; + ObString raw_str; + if (OB_FAIL(ret)) { + LOG_WARN("json extarct get results failed", K(ret)); + } else if (OB_FAIL(jb_res->get_raw_binary(raw_str, &allocator))) { + LOG_WARN("json extarct get result binary failed", K(ret)); + } else if (OB_FAIL(ObJsonExprHelper::pack_json_str_res(expr, ctx, res, raw_str))) { + LOG_WARN("fail to pack json result", K(ret)); + } } else { - jb_res = &j_arr_res; - ObJsonNode *j_node = NULL; + ObBinAggSerializer bin_agg(&allocator, AGG_JSON, static_cast(ObJsonNodeType::J_ARRAY)); + ObJsonBin *j_node = NULL; ObIJsonBase *jb_node = NULL; for (int32_t i = 0; OB_SUCC(ret) && i < hit_size; i++) { - if (ObJsonBaseFactory::transform(&allocator, hit[i], ObJsonInType::JSON_TREE, jb_node)) { // to tree - LOG_WARN("fail to transform to tree", K(ret), K(i), K(*(hit[i]))); + if (OB_FAIL(ObJsonBaseFactory::transform(&allocator, hits[i], ObJsonInType::JSON_BIN, jb_node))) { // to binary + LOG_WARN("fail to transform to tree", K(ret), K(i), K(*(hits[i]))); } else { - j_node = static_cast(jb_node); - if (OB_FAIL(jb_res->array_append(j_node->clone(&allocator)))) { - LOG_WARN("result array append failed", K(ret), K(i), K(*j_node)); + j_node = static_cast(jb_node); + ObString key; + if (OB_FAIL(bin_agg.append_key_and_value(key, value, j_node))) { + LOG_WARN("failed to append key and value", K(ret)); } } } - } - ObString raw_str; - if (OB_FAIL(ret)) { - LOG_WARN("json extarct get results failed", K(ret)); - } else if (OB_FAIL(jb_res->get_raw_binary(raw_str, &allocator))) { - LOG_WARN("json extarct get result binary failed", K(ret)); - } else if (OB_FAIL(ObJsonExprHelper::pack_json_str_res(expr, ctx, res, raw_str))) { - LOG_WARN("fail to pack json result", K(ret)); + if (OB_FAIL(ret)) { + } else if (OB_FAIL(bin_agg.serialize())) { + LOG_WARN("failed to serialize bin agg.", K(ret)); + } else if (OB_FAIL(ObJsonExprHelper::pack_json_str_res(expr, ctx, res, bin_agg.get_buffer()->string()))) { + LOG_WARN("failed to pack json res.", K(ret)); + } } } } else if (OB_SUCC(ret) && is_null_result) { diff --git a/src/sql/engine/expr/ob_expr_json_func_helper.cpp b/src/sql/engine/expr/ob_expr_json_func_helper.cpp index b8e8ac7981..472a395c94 100644 --- a/src/sql/engine/expr/ob_expr_json_func_helper.cpp +++ b/src/sql/engine/expr/ob_expr_json_func_helper.cpp @@ -13,6 +13,7 @@ #define USING_LOG_PREFIX SQL_ENG #include "lib/ob_errno.h" +#include "share/ob_json_access_utils.h" #include "sql/engine/expr/ob_expr_cast.h" #include "sql/engine/ob_exec_context.h" #include "sql/engine/expr/ob_datum_cast.h" @@ -21,6 +22,9 @@ #include "lib/utility/ob_fast_convert.h" // ObFastFormatInt::format_unsigned #include "lib/charset/ob_dtoa.h" // ob_gcvt_opt #include "rpc/obmysql/ob_mysql_global.h" // DOUBLE_TO_STRING_CONVERSION_BUFFER_SIZE +#include "sql/ob_result_set.h" +#include "sql/ob_spi.h" +#include "storage/lob/ob_lob_manager.h" using namespace oceanbase::common; using namespace oceanbase::sql; @@ -87,7 +91,7 @@ int ObJsonExprHelper::ensure_collation(ObObjType type, ObCollationType cs_type) } int ObJsonExprHelper::get_json_or_str_data(ObExpr *expr, ObEvalCtx &ctx, - common::ObArenaAllocator &allocator, + common::ObIAllocator &allocator, ObString& str, bool& is_null) { INIT_SUCC(ret); @@ -109,10 +113,51 @@ int ObJsonExprHelper::get_json_or_str_data(ObExpr *expr, ObEvalCtx &ctx, } return ret; } + +int ObJsonExprHelper::get_json_schema(const ObExpr &expr, ObEvalCtx &ctx, + common::ObArenaAllocator &allocator, + uint16_t index, ObIJsonBase*& j_schema, + bool &is_null) +{ + INIT_SUCC(ret); + + ObDatum *schema_datum = nullptr; + ObExpr *schema_arg = expr.args_[index]; + ObObjType type = schema_arg->datum_meta_.type_; + ObCollationType cs_type = schema_arg->datum_meta_.cs_type_; + if (OB_FAIL(schema_arg->eval(ctx, schema_datum))) { + LOG_WARN("eval json arg failed", K(ret)); + } else if (type == ObNullType || schema_datum->is_null()) { + is_null = true; + } else if (!ob_is_string_type(type) && type != ObJsonType) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("input type error", K(type)); + } else if (lib::is_mysql_mode() && OB_FAIL(ObJsonExprHelper::ensure_collation(type, cs_type))) { + LOG_WARN("fail to ensure collation", K(ret), K(type), K(cs_type)); + } else { + ObString j_str; + if (OB_FAIL(get_json_or_str_data(schema_arg, ctx, allocator, j_str, is_null))) { + LOG_WARN("fail to get real data.", K(ret), K(j_str)); + } else if (is_null) { + } else { + ObJsonInType j_in_type = ObJsonExprHelper::get_json_internal_type(type); + ObJsonSchemaCache ctx_cache(&allocator); + ObJsonSchemaCache* schema_cache = ObJsonExprHelper::get_schema_cache_ctx(expr.expr_ctx_id_, &ctx.exec_ctx_); + schema_cache = ((schema_cache != nullptr) ? schema_cache : &ctx_cache); + + if (OB_SUCC(ret) && OB_FAIL(ObJsonExprHelper::find_and_add_schema_cache(schema_cache, j_schema, j_str, 1, j_in_type))) { + LOG_WARN("invalid json schema", K(ret)); + } + } + } + return ret; +} + int ObJsonExprHelper::get_json_doc(const ObExpr &expr, ObEvalCtx &ctx, common::ObArenaAllocator &allocator, uint16_t index, ObIJsonBase*& j_base, - bool &is_null, bool need_to_tree, bool relax) + bool &is_null, bool need_to_tree, + bool relax, bool preserve_dup) { INIT_SUCC(ret); ObDatum *json_datum = NULL; @@ -121,6 +166,7 @@ int ObJsonExprHelper::get_json_doc(const ObExpr &expr, ObEvalCtx &ctx, ObCollationType cs_type = json_arg->datum_meta_.cs_type_; bool is_oracle = lib::is_oracle_mode(); + bool allow_partial_update = false; if (OB_UNLIKELY(OB_FAIL(json_arg->eval(ctx, json_datum)))) { LOG_WARN("eval json arg failed", K(ret)); @@ -131,6 +177,13 @@ int ObJsonExprHelper::get_json_doc(const ObExpr &expr, ObEvalCtx &ctx, LOG_WARN("input type error", K(val_type)); } else if (lib::is_mysql_mode() && OB_FAIL(ObJsonExprHelper::ensure_collation(val_type, cs_type))) { LOG_WARN("fail to ensure collation", K(ret), K(val_type), K(cs_type)); + } else if (ob_is_json(val_type) + && OB_FAIL(ObJsonExprHelper::is_allow_partial_update(expr, ctx, json_datum->get_string(), allow_partial_update))) { + LOG_WARN("get partial updaet setting fail", K(ret)); + } else if (allow_partial_update) { + if (OB_FAIL(get_json_for_partial_update(expr, *json_arg, ctx, allocator, *json_datum, j_base))) { + LOG_WARN("get_json_for_partial_update fail", K(ret), K(val_type)); + } } else { ObString j_str; if (OB_FAIL(get_json_or_str_data(json_arg, ctx, allocator, j_str, is_null))) { @@ -141,6 +194,7 @@ int ObJsonExprHelper::get_json_doc(const ObExpr &expr, ObEvalCtx &ctx, ObJsonInType expect_type = need_to_tree ? ObJsonInType::JSON_TREE : j_in_type; bool relax_json = (lib::is_oracle_mode() && relax); uint32_t parse_flag = relax_json ? ObJsonParser::JSN_RELAXED_FLAG : 0; + ADD_FLAG_IF_NEED(preserve_dup, parse_flag, ObJsonParser::JSN_PRESERVE_DUP_FLAG); if (is_oracle && j_str.length() == 0) { is_null = true; } else if (OB_FAIL(ObJsonBaseFactory::get_json_base(&allocator, j_str, j_in_type, @@ -158,6 +212,160 @@ int ObJsonExprHelper::get_json_doc(const ObExpr &expr, ObEvalCtx &ctx, return ret; } +int ObJsonExprHelper::get_const_json_schema(const common::ObObj &data, const char* func_name, + common::ObIAllocator *allocator, ObIJsonBase*& j_schema) +{ + INIT_SUCC(ret); + ObObjType val_type = data.get_type(); + if (data.is_null()) { + void *json_node_buf = allocator->alloc(sizeof(ObJsonNull)); + if (OB_ISNULL(json_node_buf)) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("failed: alloscate jsonboolean", K(ret)); + } else { + ObJsonNull *null_node = static_cast(new(json_node_buf) ObJsonNull()); + if (OB_FAIL(ObJsonBaseFactory::transform(allocator, null_node, ObJsonInType::JSON_BIN, j_schema))) { + LOG_WARN("failed: json tree to bin", K(ret)); + } + } + } else if (ObJsonExprHelper::is_convertible_to_json(val_type)) { + ObCollationType cs_type = data.get_collation_type(); + ObIJsonBase* j_base = nullptr; + // whether it is Oracle or MySQL, only lowercase true/false is considered a Boolean value + // so, use strict mode + if (OB_FAIL(ObJsonExprHelper::transform_convertible_2jsonBase(data, val_type, allocator, + cs_type, j_base, ObConv2JsonParam(false, + data.has_lob_header(), false, + lib::is_oracle_mode(), true, true)))) { + if (ret == OB_ERR_NULL_VALUE || ret == OB_ERR_INVALID_JSON_TEXT) { // json_str is null + if (lib::is_oracle_mode()) { + ret = OB_ERR_JSON_SYNTAX_ERROR; + } else { + ret = OB_ERR_INVALID_JSON_TEXT_IN_PARAM; + LOG_USER_ERROR(OB_ERR_INVALID_JSON_TEXT_IN_PARAM); + } + } + LOG_WARN("failed: parse value to jsonBase", K(ret), K(val_type)); + } else if (j_base->json_type() != common::ObJsonNodeType::J_OBJECT) { + ret = OB_ERR_TYPE_OF_JSON_SCHEMA; + LOG_WARN("json schema must be object", K(ret), K(j_base->json_type())); + } else { + ObJsonSchemaTree json_schema(allocator); + ObJsonArray* schema_map = nullptr; + if (OB_FAIL(json_schema.build_schema_tree(j_base))) { + LOG_WARN("invalid json schema", K(ret)); + } else if (OB_ISNULL(schema_map = json_schema.get_schema_map())) { + ret = OB_BAD_NULL_ERROR; + LOG_WARN("should not be null", K(ret)); + } else if (OB_FAIL(ObJsonBaseFactory::transform(allocator, schema_map, ObJsonInType::JSON_BIN, j_schema))) { + LOG_WARN("failed: json schema to bin", K(ret)); + } + } + } else { + ret = OB_ERR_INVALID_TYPE_FOR_JSON; + LOG_USER_ERROR(OB_ERR_INVALID_TYPE_FOR_JSON, 1, func_name); + } + return ret; +} + +int ObJsonExprHelper::get_partial_json_bin( + ObIAllocator &allocator, + ObILobCursor *cursor, + ObJsonBinUpdateCtx *update_ctx, + ObIJsonBase *&j_base) +{ + INIT_SUCC(ret); + ObJsonBinCtx *bin_ctx = nullptr; + ObJsonBin *j_bin = nullptr; + if (OB_ISNULL(cursor)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("cursor is null", KR(ret)); + } else if (OB_ISNULL(update_ctx) && OB_ISNULL(update_ctx = OB_NEWx(ObJsonBinUpdateCtx, &allocator, &allocator))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("alloc update ctx fail", K(ret)); + } else if (OB_FALSE_IT(update_ctx->set_lob_cursor(cursor))) { + // build json bin + } else if (OB_ISNULL(bin_ctx = OB_NEWx(ObJsonBinCtx, &allocator))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("alloc ctx fail", K(ret), K(sizeof(ObJsonBinCtx))); + } else if (OB_FALSE_IT(bin_ctx->update_ctx_ = update_ctx)) { + } else if (OB_FALSE_IT(bin_ctx->is_update_ctx_alloc_ = true)) { + } else if (OB_ISNULL(j_bin = OB_NEWx(ObJsonBin, &allocator, &allocator, bin_ctx, true))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("alloc update ctx fail", K(ret)); + } else if (OB_FAIL(j_bin->reset_iter())) { + LOG_WARN("fail to reset iter", K(ret)); + } else if (OB_FALSE_IT(j_bin->set_seek_flag(false))) { + } else { + j_base = j_bin; + } + return ret; +} + +int ObJsonExprHelper::get_json_for_partial_update( + const ObExpr &expr, + const ObExpr &json_expr, + ObEvalCtx &ctx, + ObIAllocator &allocator, + ObDatum &json_datum, + ObIJsonBase *&j_base) +{ + INIT_SUCC(ret); + ObString lob_str = json_datum.get_string(); + ObLobLocatorV2 locator(lob_str); + bool allow_partial_update = false; + ObString metas; + storage::ObLobCursor *cursor = nullptr; + int64_t query_timeout_ts = ObTimeUtility::current_time() + 60 * USECS_PER_SEC; + storage::ObLobManager *lob_mgr = MTL(storage::ObLobManager*); + uint8_t root_type = 0; + if (lob_str.empty()) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("lob locator is empty", KR(ret)); + } else if (OB_FAIL(get_session_query_timeout_ts(ctx, query_timeout_ts))) { + LOG_WARN("get_session_query_timeout fail", K(ret), K(locator)); + } else if (locator.is_delta_temp_lob()) { + ObJsonDeltaLob delta_lob; + if (OB_FAIL(delta_lob.init(&allocator, locator, query_timeout_ts))) { + LOG_WARN("init json delta lob fail", K(ret), K(locator)); + } else { + j_base = delta_lob.get_json_bin(); + } + } else if (! locator.is_persist_lob() || locator.is_inrow()) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("persis lob or no-delta inrow lob locator not support", KR(ret), K(locator)); + } else if (OB_FAIL(lob_mgr->query(&allocator, locator, query_timeout_ts, false, nullptr, cursor))) { + LOG_WARN("build lob cursor fail", K(ret), K(locator)); + } else if (OB_FAIL(cursor->read_i8(0, reinterpret_cast(&root_type)))) { + LOG_WARN("read root type fail", KR(ret), KPC(cursor)); + } else if (! ObJsonBin::is_doc_header(root_type)) { + // if root type not doc header, means that old json data. + // old json is not binary charset in lob, so can not use partial lob + ObString j_str; + if (cursor->has_one_chunk_with_all_data()) { + LOG_DEBUG("one chunk will all data", K(lob_str), K(root_type), KPC(cursor), K(json_datum), K(json_expr)); + if (OB_FAIL(cursor->get_one_chunk_with_all_data(j_str))) { + LOG_WARN("get real data fail", KR(ret), K(json_datum), K(json_expr)); + } + } else if (OB_FAIL(ObTextStringHelper::read_real_string_data(allocator, json_datum, + json_expr.datum_meta_, json_expr.obj_meta_.has_lob_header(), j_str))) { + LOG_WARN("get real data fail", KR(ret), K(json_datum), K(json_expr)); + } + if (OB_FAIL(ret)) { + } else if (OB_FAIL(ObJsonBaseFactory::get_json_base( + &allocator, j_str, ObJsonInType::JSON_BIN, ObJsonInType::JSON_TREE, j_base, 0))) { + LOG_WARN("get json base fail", K(ret), K(j_str)); + } + cursor->~ObLobCursor(); + cursor = nullptr; + } else if (OB_FAIL(get_partial_json_bin(allocator, cursor, nullptr, j_base))) { + LOG_WARN("fail to reset iter", K(ret)); + } else if (OB_ISNULL(j_base)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get j_base is null", KR(ret), K(locator)); + } + return ret; +} int ObJsonExprHelper::get_json_val(const common::ObObj &data, ObExprCtx &ctx, bool is_bool, common::ObIAllocator *allocator, @@ -198,8 +406,8 @@ int ObJsonExprHelper::get_json_val(const common::ObObj &data, ObExprCtx &ctx, } else if (ObJsonExprHelper::is_convertible_to_json(val_type)) { ObCollationType cs_type = data.get_collation_type(); if (OB_FAIL(ObJsonExprHelper::transform_convertible_2jsonBase(data, val_type, allocator, - cs_type, j_base, to_bin, - data.has_lob_header(), false))) { + cs_type, j_base, ObConv2JsonParam(to_bin, + data.has_lob_header(), false)))) { LOG_WARN("failed: parse value to jsonBase", K(ret), K(val_type)); } } else { @@ -219,6 +427,55 @@ int ObJsonExprHelper::get_json_val(const common::ObObj &data, ObExprCtx &ctx, return ret; } +int ObJsonExprHelper::get_json_val(const common::ObDatum &data, + ObExecContext &ctx, + ObExpr* expr, + common::ObIAllocator *allocator, + ObObjType val_type, + ObCollationType &cs_type, + ObIJsonBase*& j_base, bool to_bin) +{ + INIT_SUCC(ret); + if (data.is_null()) { + void *json_node_buf = allocator->alloc(sizeof(ObJsonNull)); + if (OB_ISNULL(json_node_buf)) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("failed: alloscate jsonboolean", K(ret)); + } else { + ObJsonNull *null_node = static_cast(new(json_node_buf) ObJsonNull()); + if (to_bin) { + if (OB_FAIL(ObJsonBaseFactory::transform(allocator, null_node, ObJsonInType::JSON_BIN, j_base))) { + LOG_WARN("failed: json tree to bin", K(ret)); + } + } else { + j_base = null_node; + } + } + } else if (ObJsonExprHelper::is_convertible_to_json(val_type)) { + if (OB_FAIL(ObJsonExprHelper::transform_convertible_2jsonBase(data, val_type, allocator, + cs_type, j_base, ObConv2JsonParam(to_bin, + expr->obj_meta_.has_lob_header(), + false, lib::is_oracle_mode(), true)))) { + LOG_WARN("failed: parse value to jsonBase", K(ret), K(val_type)); + } + } else { + ObBasicSessionInfo *session = ctx.get_my_session(); + if (OB_ISNULL(session)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("session is NULL", K(ret)); + } else if (OB_FAIL(ObJsonExprHelper::transform_scalar_2jsonBase(data, val_type, + allocator, expr->datum_meta_.scale_, + session->get_timezone_info(), + session, + j_base, to_bin, + expr->is_boolean_))) { + LOG_WARN("failed: parse value to jsonBase", K(ret), K(val_type)); + } + } + + return ret; +} + int ObJsonExprHelper::cast_to_json_tree(ObString &text, common::ObIAllocator *allocator, uint32_t parse_flag) { INIT_SUCC(ret); @@ -239,6 +496,97 @@ int ObJsonExprHelper::cast_to_json_tree(ObString &text, common::ObIAllocator *al return ret; } +int ObJsonExprHelper::cast_to_res(ObIAllocator &allocator, + ObDatum &src_datum, + const ObExpr &expr, + const ObExpr &default_expr, + ObEvalCtx &ctx, + ObDatum &res, + bool xt_need_acc_check) +{ + int ret = OB_SUCCESS; + ObCastMode def_cm = CM_NONE; + ObSQLSessionInfo *session = NULL; + ObObj dst_obj, buf_obj, src_obj; + const ObObj *res_obj = NULL; + ObAccuracy out_acc; + if (src_datum.is_null()) { + res.set_null(); + } else if (OB_FAIL(src_datum.to_obj(src_obj, default_expr.obj_meta_, default_expr.obj_datum_map_))) { + LOG_WARN("fail cast datum to obj", K(ret)); + } else { + // to type + if (OB_ISNULL(session = ctx.exec_ctx_.get_my_session())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("sessioninfo is NULL"); + } else if (OB_FAIL(ObSQLUtils::get_default_cast_mode(session->get_stmt_type(), + session, def_cm))) { + LOG_WARN("get_default_cast_mode failed", K(ret)); + } else { + ObObjType obj_type = expr.datum_meta_.type_; + ObCollationType cs_type = expr.datum_meta_.cs_type_; + ObPhysicalPlanCtx *phy_plan_ctx = ctx.exec_ctx_.get_physical_plan_ctx(); + const ObDataTypeCastParams dtc_params = ObBasicSessionInfo::create_dtc_params(session); + ObCastCtx cast_ctx(&allocator, &dtc_params, get_cur_time(phy_plan_ctx), def_cm, + cs_type, NULL, NULL); + if (OB_FAIL(ObObjCaster::to_type(obj_type, cs_type, cast_ctx, src_obj, dst_obj))) { + LOG_WARN("failed to cast object to ", K(ret), K(src_obj), K(obj_type)); + } else if (FALSE_IT(get_accuracy_from_expr(expr, out_acc))) { + } else if (FALSE_IT(res_obj = &dst_obj)) { + } else if (OB_FAIL(obj_accuracy_check(cast_ctx, out_acc, cs_type, dst_obj, buf_obj, res_obj))) { + if (!xt_need_acc_check && (ob_is_varchar_or_char(obj_type, cs_type) || ob_is_nchar(obj_type)) && ret == OB_ERR_DATA_TOO_LONG) { + ObLengthSemantics ls = lib::is_oracle_mode() ? + expr.datum_meta_.length_semantics_ : LS_CHAR; + const char* str = dst_obj.get_string_ptr(); + int32_t str_len_byte = dst_obj.get_string_len(); + int64_t char_len = 0; + int32_t trunc_len_byte = 0; + trunc_len_byte = (ls == LS_BYTE ? + ObCharset::max_bytes_charpos(cs_type, str, str_len_byte, + expr.max_length_, char_len): + ObCharset::charpos(cs_type, str, str_len_byte, expr.max_length_)); + if (trunc_len_byte == 0) { + (const_cast(res_obj))->set_null(); + } else { + (const_cast(res_obj))->set_common_value(ObString(trunc_len_byte, str)); + } + ret = OB_SUCCESS; + } else { + LOG_WARN("accuracy check failed", K(ret), K(out_acc), K(res_obj)); + } + } else if (OB_FAIL(ObSPIService::spi_pad_char_or_varchar(session, obj_type, out_acc, &allocator, const_cast(res_obj)))) { + LOG_WARN("fail to pad char", K(ret), K(*res_obj)); + } + + if (OB_SUCC(ret)) { + if (OB_NOT_NULL(res_obj)) { + res.from_obj(*res_obj); + ObExprStrResAlloc res_alloc(expr, ctx); + if (OB_FAIL(res.deep_copy(res, res_alloc))) { + LOG_WARN("fail to deep copy for res datum", K(ret), KPC(res_obj), K(res)); + } + } else { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("res obj is NULL", K(ret)); + } + } + } + } + return ret; +} + +void ObJsonExprHelper::get_accuracy_from_expr(const ObExpr &expr, ObAccuracy &accuracy) +{ + accuracy.set_length(expr.max_length_); + accuracy.set_scale(expr.datum_meta_.scale_); + const ObObjTypeClass &dst_tc = ob_obj_type_class(expr.datum_meta_.type_); + if (ObStringTC == dst_tc || ObTextTC == dst_tc) { + accuracy.set_length_semantics(expr.datum_meta_.length_semantics_); + } else { + accuracy.set_precision(expr.datum_meta_.precision_); + } +} + int ObJsonExprHelper::get_json_val(const ObExpr &expr, ObEvalCtx &ctx, common::ObIAllocator *allocator, uint16_t index, ObIJsonBase*& j_base, bool to_bin, bool format_json, @@ -284,11 +632,11 @@ int ObJsonExprHelper::get_json_val(const ObExpr &expr, ObEvalCtx &ctx, ObCollationType cs_type = json_arg->datum_meta_.cs_type_; if (OB_FAIL(ObJsonExprHelper::transform_convertible_2jsonBase(*json_datum, val_type, allocator, cs_type, - j_base, to_bin, + j_base, ObConv2JsonParam(to_bin, json_arg->obj_meta_.has_lob_header(), false, HAS_FLAG(parse_flag, ObJsonParser::JSN_RELAXED_FLAG), - format_json))) { + format_json)))) { LOG_WARN("failed: parse value to jsonBase", K(ret), K(val_type)); } } else { @@ -612,30 +960,69 @@ int ObJsonExprHelper::json_base_replace(ObIJsonBase *json_old, ObIJsonBase *json ObIJsonBase *&json_doc) { INIT_SUCC(ret); - if (json_old == json_doc) { - json_doc = json_new; - } else { - ObIJsonBase *json_old_tree = json_old; - ObIAllocator *allocator = json_doc->get_allocator(); - if (!json_old->is_tree() && - ObJsonBaseFactory::transform(allocator, json_old, ObJsonInType::JSON_TREE, json_old_tree)) { - LOG_WARN("fail to transform to tree", K(ret), K(*json_old)); + ObIAllocator *allocator = nullptr; + ObIJsonBase *parent = nullptr; + ObIJsonBase *new_node = json_new; + + if (OB_ISNULL(json_old) || OB_ISNULL(json_new) || OB_ISNULL(json_doc)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("json_new or json_old or json_doc is null", K(ret), KP(json_old), KP(json_new), KP(json_doc)); + } else if (OB_ISNULL(allocator = json_doc->get_allocator())) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("allocator is null", K(ret), KPC(json_doc)); + } else if (OB_FAIL(json_old->get_parent(parent))) { + LOG_WARN("get old parent fail", K(ret)); + } else if (json_old == json_doc || OB_ISNULL(parent)) { + // for json binary, json_old and json_doc may be at same position, + if (OB_FAIL(json_doc->reset())) { + LOG_WARN("reset fail", K(ret), KPC(json_doc)); } else { - ObIJsonBase *parent = static_cast(json_old_tree)->get_parent(); - if(OB_NOT_NULL(parent) && parent != json_doc) { - if (OB_FAIL(parent->replace(json_old_tree, json_new))) { - LOG_WARN("json base replace failed", K(ret)); - } - } else { - if (OB_FAIL(json_doc->replace(json_old_tree, json_new))) { - LOG_WARN("json base replace failed", K(ret)); - } + json_doc = json_new; + // json_new may no allocator, so update + if (OB_ISNULL(json_doc->get_allocator())) { + json_doc->set_allocator(allocator); } } + } else { + if (json_doc->is_bin()) { + if (OB_NOT_NULL(json_new) && ! json_new->is_bin() && OB_FAIL(ObJsonBaseFactory::transform(allocator, json_new, ObJsonInType::JSON_BIN, new_node))) { + LOG_WARN("fail to transform to tree", K(ret), K(json_new)); + } + } else { + if (OB_NOT_NULL(json_new) && ! json_new->is_tree() && ObJsonBaseFactory::transform(allocator, json_new, ObJsonInType::JSON_TREE, new_node)) { + LOG_WARN("fail to transform to tree", K(ret), K(*json_old)); + } + } + + if (OB_FAIL(ret)) { + } else if (OB_FAIL(parent->replace(json_old, new_node))) { + LOG_WARN("json base replace failed", K(ret)); + } else if (OB_FAIL(refresh_root_when_bin_rebuild_all(json_doc))) { + LOG_WARN("refresh_root_when_bin_rebuild_all fail", K(ret)); + } } return ret; } +// get json expr param cache context, if not exists cache context do nothing +ObJsonParamCacheCtx* ObJsonExprHelper::get_param_cache_ctx(const uint64_t& id, ObExecContext *exec_ctx) +{ + INIT_SUCC(ret); + ObJsonParamCacheCtx* cache_ctx = NULL; + if (ObExpr::INVALID_EXP_CTX_ID != id) { + cache_ctx = static_cast(exec_ctx->get_expr_op_ctx(id)); + if (OB_ISNULL(cache_ctx)) { + // if pathcache not exist, create one + void *cache_ctx_buf = NULL; + ret = exec_ctx->create_expr_op_ctx(id, sizeof(ObJsonParamCacheCtx), cache_ctx_buf); + if (OB_SUCC(ret) && OB_NOT_NULL(cache_ctx_buf)) { + cache_ctx = new (cache_ctx_buf) ObJsonParamCacheCtx(&exec_ctx->get_allocator()); + } + } + } + return (cache_ctx == NULL) ? NULL : cache_ctx; +} + // get json expr path cache context, if not exists cache context do nothing ObJsonPathCache* ObJsonExprHelper::get_path_cache_ctx(const uint64_t& id, ObExecContext *exec_ctx) { @@ -655,11 +1042,48 @@ ObJsonPathCache* ObJsonExprHelper::get_path_cache_ctx(const uint64_t& id, ObExec return (cache_ctx == NULL) ? NULL : cache_ctx->get_path_cache(); } -int ObJsonExprHelper::find_and_add_cache(ObJsonPathCache* path_cache, ObJsonPath*& res_path, - ObString& path_str, int arg_idx, bool enable_wildcard) +// get json expr schema cache context, if not exists cache context do nothing +ObJsonSchemaCache* ObJsonExprHelper::get_schema_cache_ctx(const uint64_t& id, ObExecContext *exec_ctx) { INIT_SUCC(ret); - if (OB_FAIL(path_cache->find_and_add_cache(res_path, path_str, arg_idx))) { + ObJsonSchemaCacheCtx* cache_ctx = NULL; + if (ObExpr::INVALID_EXP_CTX_ID != id) { + cache_ctx = static_cast< ObJsonSchemaCacheCtx*>(exec_ctx->get_expr_op_ctx(id)); + if (OB_ISNULL(cache_ctx)) { + // if pathcache not exist, create one + void *cache_ctx_buf = NULL; + ret = exec_ctx->create_expr_op_ctx(id, sizeof( ObJsonSchemaCacheCtx), cache_ctx_buf); + if (OB_SUCC(ret) && OB_NOT_NULL(cache_ctx_buf)) { + cache_ctx = new (cache_ctx_buf) ObJsonSchemaCacheCtx(&exec_ctx->get_allocator()); + } + } + } + return (cache_ctx == NULL) ? NULL : cache_ctx->get_schema_cache(); +} + +int ObJsonExprHelper::find_and_add_schema_cache(ObJsonSchemaCache* schema_cache, ObIJsonBase*& res_schema, + ObString& schema_str, int arg_idx, const ObJsonInType& in_type) +{ + INIT_SUCC(ret); + if (OB_FAIL(schema_cache->find_and_add_cache(res_schema, schema_str, arg_idx, in_type))) { + if (ret == OB_ERR_INVALID_JSON_TEXT) { + if (lib::is_oracle_mode()) { + ret = OB_ERR_JSON_SYNTAX_ERROR; + } else { + ret = OB_ERR_INVALID_JSON_TEXT_IN_PARAM; + LOG_USER_ERROR(OB_ERR_INVALID_JSON_TEXT_IN_PARAM); + } + } + } + return ret; +} + +int ObJsonExprHelper::find_and_add_cache(ObJsonPathCache* path_cache, ObJsonPath*& res_path, + ObString& path_str, int arg_idx, bool enable_wildcard, + bool is_const) +{ + INIT_SUCC(ret); + if (OB_FAIL(path_cache->find_and_add_cache(res_path, path_str, arg_idx, is_const))) { ret = OB_ERR_INVALID_JSON_PATH; LOG_USER_ERROR(OB_ERR_INVALID_JSON_PATH); } else if (!enable_wildcard && res_path->can_match_many()) { @@ -781,7 +1205,8 @@ int ObJsonExprHelper::is_json_zero(const ObString& data, int& result) { INIT_SUCC(ret); int tmp_result = 0; - ObJsonBin j_bin(data.ptr(), data.length()); + ObJsonBinCtx ctx; + ObJsonBin j_bin(data.ptr(), data.length(), &ctx); if (data.length() == 0) { result = 1; } else if (OB_FAIL(j_bin.reset_iter())) { @@ -798,8 +1223,8 @@ int ObJsonExprHelper::is_json_true(const ObString& data, int& result) { INIT_SUCC(ret); int tmp_result = 0; - - ObJsonBin j_bin(data.ptr(), data.length()); + ObJsonBinCtx ctx; + ObJsonBin j_bin(data.ptr(), data.length(), &ctx); if (data.length() == 0) { result = 0; } else if (OB_FAIL(j_bin.reset_iter())) { @@ -1198,11 +1623,7 @@ int ObJsonExprHelper::transform_convertible_2jsonBase(const T &datum, common::ObIAllocator *allocator, ObCollationType cs_type, ObIJsonBase*& j_base, - bool to_bin, - bool has_lob_header, - bool deep_copy, - bool relax_type, - bool format_json) + ObConv2JsonParam flags) { int ret = OB_SUCCESS; void* buf = NULL; @@ -1231,23 +1652,27 @@ int ObJsonExprHelper::transform_convertible_2jsonBase(const T &datum, LOG_WARN("Invalid collation type for input string.", K(ret)); } else { j_str = datum.get_string(); - if (OB_FAIL(ObTextStringHelper::read_real_string_data(allocator, type, cs_type, has_lob_header, j_str))) { + if (OB_FAIL(ObTextStringHelper::read_real_string_data(allocator, type, cs_type, flags.has_lob_header_, j_str))) { LOG_WARN("fail to get real data.", K(ret), K(j_str)); - } else if (deep_copy) { + } else if (flags.deep_copy_) { ret = deep_copy_ob_string(*allocator, j_str, j_str); } } if (OB_SUCC(ret)) { - if (format_json) { - uint32_t parse_flag = relax_type ? ObJsonParser::JSN_RELAXED_FLAG : ObJsonParser::JSN_STRICT_FLAG; + if (flags.format_json_) { + uint32_t parse_flag = flags.relax_type_ ? ObJsonParser::JSN_RELAXED_FLAG : ObJsonParser::JSN_STRICT_FLAG; + ADD_FLAG_IF_NEED(flags.is_schema_, parse_flag, ObJsonParser::JSN_SCHEMA_FLAG); if(OB_FAIL(ObJsonExprHelper::cast_to_json_tree(j_str, allocator, parse_flag))) { LOG_WARN("cast to json tree fail", K(ret)); } else { - ObJsonInType to_type = to_bin ? ObJsonInType::JSON_BIN : ObJsonInType::JSON_TREE; + ObJsonInType to_type = flags.to_bin_ ? ObJsonInType::JSON_BIN : ObJsonInType::JSON_TREE; if (OB_FAIL(ObJsonBaseFactory::get_json_base(allocator, j_str, ObJsonInType::JSON_TREE, to_type, json_node, parse_flag))) { - ret = OB_ERR_INVALID_JSON_TEXT_IN_PARAM; + if (flags.is_schema_ && ret == OB_ERR_UNSUPPROTED_REF_IN_JSON_SCHEMA) { + } else { + ret = OB_ERR_INVALID_JSON_TEXT_IN_PARAM; + } LOG_WARN("fail to get json base", K(ret), K(j_str)); } } @@ -1269,9 +1694,9 @@ int ObJsonExprHelper::transform_convertible_2jsonBase(const T &datum, ObString j_str = datum.get_string(); if (OB_SUCC(ret)) { ObString tmp_str = j_str; - if (OB_FAIL(ObTextStringHelper::read_real_string_data(allocator, type, cs_type, has_lob_header, tmp_str))) { + if (OB_FAIL(ObTextStringHelper::read_real_string_data(allocator, type, cs_type, flags.has_lob_header_, tmp_str))) { LOG_WARN("fail to get real data.", K(ret), K(j_str)); - } else if (deep_copy) { + } else if (flags.deep_copy_) { if (OB_FAIL(deep_copy_ob_string(*allocator, tmp_str, j_str))) { LOG_WARN("do deep copy failed", K(ret)); } @@ -1281,11 +1706,15 @@ int ObJsonExprHelper::transform_convertible_2jsonBase(const T &datum, } if (OB_SUCC(ret)) { - ObJsonInType to_type = to_bin ? ObJsonInType::JSON_BIN : ObJsonInType::JSON_TREE; - uint32_t parse_flag = relax_type ? ObJsonParser::JSN_RELAXED_FLAG : ObJsonParser::JSN_STRICT_FLAG; + ObJsonInType to_type = flags.to_bin_ ? ObJsonInType::JSON_BIN : ObJsonInType::JSON_TREE; + uint32_t parse_flag = flags.relax_type_ ? ObJsonParser::JSN_RELAXED_FLAG : ObJsonParser::JSN_STRICT_FLAG; + ADD_FLAG_IF_NEED(flags.is_schema_, parse_flag, ObJsonParser::JSN_SCHEMA_FLAG); if (OB_FAIL(ObJsonBaseFactory::get_json_base(allocator, j_str, ObJsonInType::JSON_BIN, to_type, json_node, parse_flag))) { - ret = OB_ERR_INVALID_JSON_TEXT_IN_PARAM; + if (flags.is_schema_ && ret == OB_ERR_UNSUPPROTED_REF_IN_JSON_SCHEMA) { + } else { + ret = OB_ERR_INVALID_JSON_TEXT_IN_PARAM; + } LOG_WARN("fail to get json base", K(ret)); } } @@ -1299,7 +1728,7 @@ int ObJsonExprHelper::transform_convertible_2jsonBase(const T &datum, } if (OB_SUCC(ret)) { - if (to_bin) { + if (flags.to_bin_) { if (OB_FAIL(ObJsonBaseFactory::transform(allocator, json_node, ObJsonInType::JSON_BIN, j_base))) { LOG_WARN("failed: json tree to bin", K(ret)); } @@ -1313,34 +1742,61 @@ int ObJsonExprHelper::transform_convertible_2jsonBase(const T &datum, } int ObJsonExprHelper::get_cast_type(const ObExprResType param_type2, - ObExprResType &dst_type) + ObExprResType &dst_type, + ObExprTypeCtx &type_ctx) { INIT_SUCC(ret); - if (!param_type2.is_int() && !param_type2.get_param().is_int()) { + if (!param_type2.is_int() && !param_type2.get_param().is_int()) { ret = OB_ERR_UNEXPECTED; LOG_WARN("cast param type is unexpected", K(param_type2)); } else { const ObObj ¶m = param_type2.get_param(); ParseNode parse_node; - ObObjType obj_type; parse_node.value_ = param.get_int(); - obj_type = static_cast(parse_node.int16_values_[OB_NODE_CAST_TYPE_IDX]); - ObCollationType collation_type = static_cast(parse_node.int16_values_[OB_NODE_CAST_COLL_IDX]); - - dst_type.set_collation_type(collation_type); + ObObjType obj_type = static_cast(parse_node.int16_values_[OB_NODE_CAST_TYPE_IDX]); + dst_type.set_collation_type(static_cast(parse_node.int16_values_[OB_NODE_CAST_COLL_IDX])); dst_type.set_type(obj_type); if (ob_is_string_type(obj_type) || ob_is_lob_locator(obj_type)) { // cast(x as char(10)) or cast(x as binary(10)) dst_type.set_full_length(parse_node.int32_values_[OB_NODE_CAST_C_LEN_IDX], param_type2.get_accuracy().get_length_semantics()); - // if (collation_type != CS_TYPE_UTF8MB4_BIN && obj_type == ObLongTextType) { - // dst_type.set_collation_type(CS_TYPE_UTF8MB4_BIN); - // } - } else if (ObJsonType == dst_type.get_type()) { + } else if (ob_is_raw(obj_type)) { + dst_type.set_length(parse_node.int32_values_[OB_NODE_CAST_C_LEN_IDX]); + } else if (ObFloatType == dst_type.get_type()) { + // Compatible with mysql. If the precision p is not specified, produces a result of type FLOAT. + // If p is provided and 0 <= < p <= 24, the result is of type FLOAT. If 25 <= p <= 53, + // the result is of type DOUBLE. If p < 0 or p > 53, an error is returned + // however, ob use -1 as default precision, so it is a valid value + ObPrecision float_precision = parse_node.int16_values_[OB_NODE_CAST_N_PREC_IDX]; + if (float_precision < -1 || float_precision > OB_MAX_DOUBLE_FLOAT_PRECISION) { + ret = OB_ERR_TOO_BIG_PRECISION; + LOG_USER_ERROR(OB_ERR_TOO_BIG_PRECISION, float_precision, "CAST", OB_MAX_DOUBLE_FLOAT_PRECISION); + } else if (float_precision <= OB_MAX_FLOAT_PRECISION) { + dst_type.set_type(ObFloatType); + } else { + dst_type.set_type(ObDoubleType); + } + dst_type.set_precision(-1); + dst_type.set_scale(parse_node.int16_values_[OB_NODE_CAST_N_SCALE_IDX]); + } else if (lib::is_mysql_mode() && ObJsonType == dst_type.get_type()) { dst_type.set_collation_type(CS_TYPE_UTF8MB4_BIN); } else { dst_type.set_precision(parse_node.int16_values_[OB_NODE_CAST_N_PREC_IDX]); dst_type.set_scale(parse_node.int16_values_[OB_NODE_CAST_N_SCALE_IDX]); + if (ObNumberType == dst_type.get_type() + && is_decimal_int_accuracy_valid(dst_type.get_precision(), dst_type.get_scale())) { + bool enable_decimalint = false; + if (OB_ISNULL(type_ctx.get_session())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("type_ctx.get_session() is null", K(ret)); + } else if (OB_FAIL(ObSQLUtils::check_enable_decimalint(type_ctx.get_session(), + enable_decimalint))) { + LOG_WARN("fail to check_enable_decimalint_type", + K(ret), K(type_ctx.get_session()->get_effective_tenant_id())); + } else if (enable_decimalint) { + dst_type.set_type(ObDecimalIntType); + } + } } LOG_DEBUG("get_cast_type", K(dst_type), K(param_type2)); } @@ -1359,10 +1815,23 @@ int ObJsonExprHelper::set_dest_type(ObExprResType &type1, LOG_WARN("ptr is NULL", K(ret), KP(session)); } else { // always cast to user requested type - type.set_type(dst_type.get_type()); - type.set_collation_type(dst_type.get_collation_type()); + if (!lib::is_oracle_mode() && + ObCharType == dst_type.get_type()) { + // cast(x as binary(10)), in parser,binary->T_CHAR+bianry, but, result type should be varchar, so set it. + type.set_type(ObVarcharType); + } else { + type.set_type(dst_type.get_type()); + type.set_collation_type(dst_type.get_collation_type()); + } int16_t scale = dst_type.get_scale(); + if (!lib::is_oracle_mode() + && (ObTimeType == dst_type.get_type() || ObDateTimeType == dst_type.get_type()) + && scale > MAX_SCALE_FOR_TEMPORAL) { + ret = OB_ERR_TOO_BIG_PRECISION; + LOG_USER_ERROR(OB_ERR_TOO_BIG_PRECISION, scale, "CAST", OB_MAX_DATETIME_PRECISION); + } if (OB_SUCC(ret)) { + ObCompatibilityMode compatibility_mode = get_compatibility_mode(); ObCollationType collation_connection = type_ctx.get_coll_type(); ObCollationType collation_nation = session->get_nls_collation_nation(); int32_t length = 0; @@ -1392,20 +1861,75 @@ int ObJsonExprHelper::set_dest_type(ObExprResType &type1, } } else { type.set_length(length); + if (ObNumberTC == dst_type.get_type_class() && 0 == dst_type.get_precision()) { + // MySql:cast (1 as decimal(0)) = cast(1 as decimal) + // Oracle: cast(1.4 as number) = cast(1.4 as number(-1, -1)) + type.set_precision(ObAccuracy::DDL_DEFAULT_ACCURACY2[compatibility_mode][ObNumberType].get_precision()); + } else if (ObDecimalIntTC == dst_type.get_type_class() && 0 == dst_type.get_precision()) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("dst type is decimal int, but precision is zero", K(ret), K(dst_type)); + } else if (ObIntTC == dst_type.get_type_class() || ObUIntTC == dst_type.get_type_class()) { + // for int or uint , the precision = len + int32_t len = 0; + int16_t length_semantics = LS_BYTE;//unused + if (OB_FAIL(get_cast_inttc_len(type1, dst_type, type_ctx, len, length_semantics, collation_connection))) { + LOG_WARN("fail to get cast inttc length", K(ret)); + } else { + len = len > OB_LITERAL_MAX_INT_LEN ? OB_LITERAL_MAX_INT_LEN : len; + type.set_precision(static_cast(len)); + } + } else if (ORACLE_MODE == compatibility_mode && ObDoubleType == dst_type.get_type()) { + ObAccuracy acc = ObAccuracy::DDL_DEFAULT_ACCURACY2[compatibility_mode][dst_type.get_type()]; + type.set_accuracy(acc); + } else { + type.set_precision(dst_type.get_precision()); + } type.set_scale(dst_type.get_scale()); } - - if (OB_SUCC(ret) && ob_is_json(dst_type.get_type())) { - type.set_length((ObAccuracy::DDL_DEFAULT_ACCURACY[ObJsonType]).get_length()); - } } } + + return ret; +} + +int ObJsonExprHelper::get_cast_inttc_len(ObExprResType &type1, + ObExprResType &type2, + ObExprTypeCtx &type_ctx, + int32_t &res_len, + int16_t &length_semantics, + ObCollationType conn) +{ + INIT_SUCC(ret); + + if (type1.is_literal()) { // literal + if (ObStringTC == type1.get_type_class()) { + res_len = type1.get_accuracy().get_length(); + length_semantics = type1.get_length_semantics(); + } else if (OB_FAIL(ObField::get_field_mb_length(type1.get_type(), + type1.get_accuracy(), type1.get_collation_type(), res_len))) { + LOG_WARN("failed to get filed mb length"); + } + } else { + res_len = CAST_STRING_DEFUALT_LENGTH[type1.get_type()]; + ObObjTypeClass tc1 = type1.get_type_class(); + int16_t scale = type1.get_accuracy().get_scale(); + if (ObDoubleTC == tc1) { + res_len -= 1; + } else if (ObDateTimeTC == tc1 && scale > 0) { + res_len += scale - 1; + } else if (OB_FAIL(get_cast_string_len(type1, type2, type_ctx, res_len, length_semantics, conn))) { + LOG_WARN("fail to get cast string length", K(ret)); + } else { + // do nothing + } + } + return ret; } int ObJsonExprHelper::get_dest_type(const ObExpr &expr, int64_t pos, ObEvalCtx& ctx, - ObObjType &dest_type, int32_t &dst_len) + ObObjType &dest_type, int64_t &dst_len) { INIT_SUCC(ret); ParseNode node; @@ -1423,6 +1947,21 @@ int ObJsonExprHelper::get_dest_type(const ObExpr &expr, int64_t pos, return ret; } +int ObJsonExprHelper::get_clause_opt(ObExpr *expr, + ObEvalCtx &ctx, + int8_t &type) +{ + INIT_SUCC(ret); + ObDatum *json_datum = NULL; + if (OB_FAIL(expr->eval(ctx, json_datum))) { + LOG_WARN("eval json arg failed", K(ret)); + } else { + int64_t option_type = json_datum->get_int(); + type = static_cast(option_type); + } + return ret; +} + int ObJsonExprHelper::get_cast_string_len(ObExprResType &type1, ObExprResType &type2, common::ObExprTypeCtx &type_ctx, @@ -1432,20 +1971,81 @@ int ObJsonExprHelper::get_cast_string_len(ObExprResType &type1, { INIT_SUCC(ret); const ObObj &val = type1.get_param(); + if (!type1.is_literal()) { // column res_len = CAST_STRING_DEFUALT_LENGTH[type1.get_type()]; int16_t prec = type1.get_accuracy().get_precision(); int16_t scale = type1.get_accuracy().get_scale(); switch(type1.get_type()) { - case ObTextType: - case ObLongTextType: - case ObVarcharType: - case ObCharType: { + case ObTinyIntType: + case ObSmallIntType: + case ObMediumIntType: + case ObInt32Type: + case ObIntType: + case ObUTinyIntType: + case ObUSmallIntType: + case ObUMediumIntType: + case ObUInt32Type: + case ObUInt64Type: { + int32_t prec = static_cast(type1.get_accuracy().get_precision()); + res_len = prec > res_len ? prec : res_len; + break; + } + case ObNumberType: + case ObDecimalIntType: + case ObUNumberType: { + if (lib::is_oracle_mode()) { + if (0 < prec) { + if (0 < scale) { + res_len = prec + 2; + } else if (0 == scale) { + res_len = prec + 1; + } else { + res_len = prec - scale; + } + } + } else { + if (0 < prec) { + if (0 < scale) { + res_len = prec + 2; + } else { + res_len = prec + 1; + } + } + } + break; + } + case ObTimestampTZType: + case ObTimestampLTZType: + case ObTimestampNanoType: + case ObDateTimeType: + case ObTimestampType: { + if (scale > 0) { + res_len += scale + 1; + } + break; + } + case ObTimeType: { + if (scale > 0) { + res_len += scale + 1; + } + break; + } + case ObTinyTextType: + case ObTextType: + case ObMediumTextType: + case ObLongTextType: + case ObVarcharType: + case ObCharType: + case ObHexStringType: + case ObRawType: + case ObNVarchar2Type: + case ObNCharType: { res_len = type1.get_length(); length_semantics = type1.get_length_semantics(); break; } - default: { + default: { break; } } @@ -1511,7 +2111,7 @@ int ObJsonExprHelper::parse_res_type(ObExprResType& type1, result_type.set_length((ObAccuracy::DDL_DEFAULT_ACCURACY[ObJsonType]).get_length()); result_type.set_collation_level(CS_LEVEL_IMPLICIT); - } else if (OB_FAIL(ObJsonExprHelper::get_cast_type(res_type, dst_type))) { + } else if (OB_FAIL(ObJsonExprHelper::get_cast_type(res_type, dst_type, type_ctx))) { LOG_WARN("get cast dest type failed", K(ret)); } else if (OB_FAIL(ObJsonExprHelper::set_dest_type(type1, result_type, dst_type, type_ctx))) { LOG_WARN("set dest type failed", K(ret)); @@ -1578,7 +2178,7 @@ int ObJsonExprHelper::check_item_func_with_return(ObJsonPathNodeType path_type, case JPN_NUMBER : case JPN_NUM_ONLY : case JPN_SIZE :{ - if (JSON_EXPR_FLAG == 1 || (JSON_EXPR_FLAG == 0 && dst_type == ObNumberType)) { + if (JSON_EXPR_FLAG == 1 || (JSON_EXPR_FLAG == 0 && ob_is_numeric_type(dst_type))) { } else { ret = OB_ERR_INVALID_DATA_TYPE_RETURNING; LOG_WARN("item func is lower/upper, but return type is ", K(dst_type), K(ret)); @@ -1618,7 +2218,7 @@ int ObJsonExprHelper::check_item_func_with_return(ObJsonPathNodeType path_type, return ret; } -int ObJsonExprHelper::get_expr_option_value(const ObExprResType param_type2, int64_t &dst_type) +int ObJsonExprHelper::get_expr_option_value(const ObExprResType param_type2, int8_t &dst_type) { INIT_SUCC(ret); if (!param_type2.is_int() && !param_type2.get_param().is_int()) { @@ -1708,7 +2308,7 @@ int ObJsonExprHelper::parse_asc_option(ObExprResType& asc_type, INIT_SUCC(ret); ObExprResType temp_type; ObObjType doc_type = type1.get_type(); - int64_t asc_option = 0; + int8_t asc_option = 0; if (asc_type.get_type() != ObIntType) { ret = OB_ERR_UNEXPECTED; @@ -1771,7 +2371,7 @@ int ObJsonExprHelper::character2_ascii_string(common::ObIAllocator *allocator, return ret; } -int ObJsonExprHelper::pre_default_value_check(ObObjType dst_type, ObString time_str, ObObjType val_type) { +int ObJsonExprHelper::pre_default_value_check(ObObjType dst_type, ObString val_str, ObObjType val_type, size_t length) { INIT_SUCC(ret); size_t len; switch (dst_type) { @@ -1785,9 +2385,15 @@ int ObJsonExprHelper::pre_default_value_check(ObObjType dst_type, ObString time_ case ObRawType: case ObNVarchar2Type: case ObNCharType: { - if (lib::is_oracle_mode() && val_type != ObVarcharType && val_type != ObCharType) { + if (lib::is_mysql_mode()) { + } else if (val_type != ObVarcharType && val_type != ObCharType) { ret = OB_ERR_DEFAULT_VALUE_NOT_MATCH; LOG_WARN("default value not match",K(ret)); + } else if (dst_type == ObVarcharType) { + if (val_str.length() > length) { + ret = OB_ERR_VALUE_EXCEEDED_MAX; + LOG_USER_ERROR(OB_ERR_VALUE_EXCEEDED_MAX, static_cast(val_str.length()), static_cast(length)); + } } break; } @@ -1795,10 +2401,10 @@ int ObJsonExprHelper::pre_default_value_check(ObObjType dst_type, ObString time_ case ObDecimalIntType: { if (val_type == ObNumberType || val_type == ObDecimalIntType) { } else { - len = time_str.length(); + len = val_str.length(); for(size_t i = 0; i < len; i++) { - if (time_str[i] == '-' || time_str[i] == '+') { - } else if (time_str[i] > '9' || time_str[i] < '0') { + if (val_str[i] == '-' || val_str[i] == '+') { + } else if (val_str[i] > '9' || val_str[i] < '0') { ret = OB_ERR_INVALID_DEFAULT_VALUE_PROVIDED; LOG_WARN("number check fail"); } @@ -1811,23 +2417,382 @@ int ObJsonExprHelper::pre_default_value_check(ObObjType dst_type, ObString time_ case ObTimestampTZType: case ObDateTimeType: case ObTimestampLTZType: { - if (val_type != ObCharType) { + if (dst_type == val_type) { + } else if (val_type != ObCharType) { ret = OB_ERR_INVALID_DEFAULT_VALUE_PROVIDED; } else { - len = time_str.length(); - if(len >= 5 && time_str[4] != '-') { + len = val_str.length(); + if(len >= 5 && val_str[4] != '-') { ret = OB_ERR_INVALID_DEFAULT_VALUE_PROVIDED; - } else if(len >= 8 && time_str[7] != '-') { + } else if(len >= 8 && val_str[7] != '-') { ret = OB_ERR_INVALID_DEFAULT_VALUE_PROVIDED; } } break; } + case ObJsonType: { + if (ob_is_number_tc(val_type)) { + ret = OB_ERR_DEFAULT_VALUE_NOT_MATCH; + } + break; + } default: break; } return ret; } +/********** ObJsonExprHelper for json partial update ****************/ +int ObJsonExprHelper::pack_json_diff_res( + const ObExpr &expr, + ObEvalCtx &ctx, + ObIAllocator &temp_allocator, + ObIJsonBase *json_doc, + ObDatum &res) +{ + INIT_SUCC(ret); + ObJsonBin *bin = nullptr; + ObJsonDeltaLob json_delta_lob; + char *res_buf = nullptr; + int64_t res_buf_len = 0; + int64_t pos = 0; + if (OB_ISNULL(json_doc)) { + res.set_null(); + } else if (! json_doc->is_bin()) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("not json binary", K(ret), K(json_doc)); + } else if (OB_FALSE_IT(bin = static_cast(json_doc))) { + } else if (OB_FAIL(json_delta_lob.init(bin))) { + LOG_WARN("init fail", K(ret), K(bin)); + } else if (OB_FAIL(json_delta_lob.check_binary_diff())) { + LOG_WARN("init fail", K(ret), K(bin)); + } else if (OB_FALSE_IT(res_buf_len = json_delta_lob.get_serialize_size())) { + } else if (OB_ISNULL(res_buf = expr.get_str_res_mem(ctx, res_buf_len))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("alloc memory for delta lob locator fail", K(ret), K(res_buf_len)); + } else if (OB_FAIL(json_delta_lob.serialize(res_buf, res_buf_len, pos))) { + LOG_WARN("serialize fail", KR(ret), K(res_buf_len)); + } else { + res.set_string(res_buf, res_buf_len); + } + return ret; +} + +int ObJsonExprHelper::pack_json_res( + const ObExpr &expr, + ObEvalCtx &ctx, + ObIAllocator &temp_allocator, + ObIJsonBase *json_doc, + ObDatum &res) +{ + INIT_SUCC(ret); + bool shoudl_pack_diff = false; + ObJsonBin *json_bin = nullptr; + if (OB_ISNULL(json_doc)) { + } else if (! json_doc->is_bin()) { + } else if (OB_FALSE_IT(json_bin = static_cast(json_doc))) { + } else if (OB_FAIL(json_bin->should_pack_diff(shoudl_pack_diff))) { + LOG_WARN("get should_pack_diff fail", K(ret)); + } + + if (OB_ISNULL(json_doc)) { + res.set_null(); + } else if (shoudl_pack_diff) { + if (OB_FAIL(ObJsonExprHelper::pack_json_diff_res(expr, ctx, temp_allocator, json_doc, res))) { + LOG_WARN("pack diff fail", K(ret)); + } + } else { + ObString str; + if (OB_FAIL(ObJsonWrapper::get_raw_binary(json_doc, str, &temp_allocator))) { + LOG_WARN("json_set result to binary failed", K(ret)); + } else if (OB_FAIL(ObJsonExprHelper::pack_json_str_res(expr, ctx, res, str))) { + LOG_WARN("fail to pack json result", K(ret)); + } + } + return ret; +} + + +int ObJsonExprHelper::is_allow_partial_update( + const ObExpr &expr, + ObEvalCtx &ctx, + const ObString &locator_str, + bool &allow_partial_update) +{ + INIT_SUCC(ret); + ObString option; + ObLobLocatorV2 locator(locator_str); + sql::ObSQLSessionInfo *session = nullptr; + if (OB_ISNULL(session = ctx.exec_ctx_.get_my_session())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("session is NULL", K(ret)); + // 1. is delta lob + // 2. perisit outrow lob + } else if (! locator.is_delta_temp_lob() && ! (locator.is_persist_lob() && ! locator.is_inrow())) { + } else if (is_json_partial_update_mode(expr)) { + option = session->get_log_row_value_option(); + allow_partial_update = option.case_compare(OB_LOG_ROW_VALUE_PARTIAL_JSON) == 0 + || option.case_compare(OB_LOG_ROW_VALUE_PARTIAL_ALL) == 0; + } + return ret; +} + +bool ObJsonExprHelper::is_json_partial_update_mode(const ObExpr &expr) +{ + return (expr.extra_ & OB_JSON_PARTIAL_UPDATE_ALLOW) != 0; +} + +int ObJsonExprHelper::refresh_root_when_bin_rebuild_all(ObIJsonBase *j_base) +{ + INIT_SUCC(ret); + ObJsonBin *j_bin = nullptr; + ObJsonBinUpdateCtx *update_ctx = nullptr; + if (OB_ISNULL(j_base)) { + } else if (! j_base->is_bin()) { + } else if (OB_FALSE_IT(j_bin = static_cast(j_base))) { + } else if (OB_ISNULL(update_ctx = j_bin->get_update_ctx())) { + } else if (! update_ctx->is_rebuild_all()) { + } else if (OB_FAIL(j_bin->reset(0))) { + LOG_WARN("reset fail", K(ret), K(*j_bin)); + } + return ret; +} + +int ObJsonExprHelper::init_json_expr_extra_info( + ObIAllocator *allocator, + const ObRawExpr &raw_expr, + const ObExprOperatorType type, + ObExpr &rt_expr) +{ + int ret = OB_SUCCESS; + const uint64_t extra = raw_expr.get_extra(); + if (! is_json_partial_update_mode(extra)) { // only used for json partial update now + } else { + rt_expr.extra_ = extra; + } + return ret; +} + +int ObJsonExprHelper::get_session_query_timeout_ts(ObEvalCtx &ctx, int64_t &timeout_ts) +{ + int ret = OB_SUCCESS; + sql::ObSQLSessionInfo *session = nullptr; + if (OB_ISNULL(session = ctx.exec_ctx_.get_my_session())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("session is NULL", K(ret)); + } else { + timeout_ts = session->get_query_timeout_ts(); + } + return ret; +} + +/********** ObJsonExprHelper for json partial update ****************/ + + +/********** ObJsonDeltaLob ****************/ + +int ObJsonDeltaLob::init(ObJsonBin *j_bin) +{ + int ret = OB_SUCCESS; + if (OB_ISNULL(update_ctx_ = j_bin->get_update_ctx())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("update ctx is null", K(ret), KPC(j_bin)); + } else { + cursor_ = static_cast(update_ctx_->cursor_); + partial_data_ = cursor_->partial_data_; + } + return ret; +} + +int ObJsonDeltaLob::init(ObIAllocator *allocator, ObLobLocatorV2 locator, int64_t query_timeout_ts) +{ + int ret = OB_SUCCESS; + allocator_ = allocator; + query_timeout_ts_ = query_timeout_ts; + if (OB_FAIL(deserialize(locator))) { + LOG_WARN("deserialize json delta lob fail", K(ret), K(locator)); + } + return ret; +} + +int64_t ObJsonDeltaLob::get_lob_diff_serialize_size() const +{ + int64_t len = 0; + // binary diff (lob diff) + const ObJsonBinaryDiffArray &binary_diffs = update_ctx_->binary_diffs_; + len += sizeof(storage::ObLobDiff) * binary_diffs.count(); + + // json diff + const ObJsonDiffArray &json_diffs = update_ctx_->json_diffs_; + ObJsonDiffHeader json_diff_header; + json_diff_header.cnt_ = json_diffs.count(); + len += json_diff_header.get_serialize_size(); + for (int i = 0; i < json_diffs.count(); ++i) { + const ObJsonDiff& diff = json_diffs[i]; + len += json_diffs[i].get_serialize_size(); + } + return len; +} + +uint32_t ObJsonDeltaLob::get_lob_diff_cnt() const +{ + return update_ctx_->binary_diffs_.count(); +} + +int64_t ObJsonDeltaLob::get_partial_data_serialize_size() const +{ + return nullptr == partial_data_ ? 0 : partial_data_->get_serialize_size(); +} + +int ObJsonDeltaLob::check_binary_diff() const +{ + INIT_SUCC(ret); + for (int i = 0; OB_SUCC(ret) && i < partial_data_->index_.count(); ++i) { + ObLobChunkIndex &chunk_index = partial_data_->index_[i]; + uint64_t chunk_start_offset = chunk_index.offset_; + uint64_t chunk_end_offset = chunk_index.offset_ + chunk_index.byte_len_; + uint64_t max_end_offset = chunk_start_offset + partial_data_->chunk_size_; + bool is_chunk_updated = false; + for (int j = 0; ! chunk_index.is_add_ && j < update_ctx_->binary_diffs_.count(); ++j) { + const ObJsonBinaryDiff &diff = update_ctx_->binary_diffs_[j]; + uint64_t diff_start_offset = diff.dst_offset_; + uint64_t diff_end_offset = diff.dst_offset_ + diff.dst_len_; + if (diff_start_offset >= chunk_start_offset && diff_start_offset < chunk_end_offset) { + is_chunk_updated = true; + } else if (diff_end_offset > chunk_start_offset && diff_end_offset <= chunk_end_offset) { + is_chunk_updated = true; + } else if (diff_start_offset <= chunk_start_offset && chunk_end_offset <= diff_end_offset) { + is_chunk_updated = true; + } + } + // if it should be updated and it's exist chunk, but no modified flag and old data, this is unexpected. + if (is_chunk_updated + && ! chunk_index.is_add_ // exist chunk + && ! (chunk_index.is_modified_ && chunk_index.old_data_idx_ >= 0)) { // no modified flag and old data + ret = OB_ERR_UNEXPECTED; + LOG_WARN("old data not set", KR(ret), K(i), K(chunk_index)); + } + } + return ret; +} + +int ObJsonDeltaLob::serialize_partial_data(char* buf, const int64_t buf_len, int64_t& pos) const +{ + INIT_SUCC(ret); + if (OB_ISNULL(partial_data_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("partial_data_ is null", KR(ret)); + } else if (OB_FAIL(partial_data_->serialize(buf, buf_len, pos))) { + LOG_WARN("serialize fail", K(ret), K(buf_len), K(pos)); + } + return ret; +} + +int ObJsonDeltaLob::deserialize_partial_data(storage::ObLobDiffHeader *diff_header) +{ + INIT_SUCC(ret); + storage::ObLobAccessParam *param = nullptr; + char *buf = diff_header->data_; + int64_t data_len = diff_header->persist_loc_size_; + int64_t pos = 0; + ObLobLocatorV2 locator; + ObLobManager* lob_mgr = MTL(ObLobManager*); + if (OB_ISNULL(partial_data_ = OB_NEWx(storage::ObLobPartialData, allocator_))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("alloc lob param fail", K(ret), "size", sizeof(ObLobPartialData)); + } else if (OB_FAIL(partial_data_->init())) { + LOG_WARN("map create fail", K(ret)); + } else if (OB_FAIL(partial_data_->deserialize(buf, data_len, pos))) { + LOG_WARN("deserialize fail", K(ret), K(data_len), K(pos), KPC(diff_header)); + } else if (OB_FALSE_IT(locator.assign_buffer(partial_data_->locator_.ptr(), partial_data_->locator_.length()))) { + } else if (OB_FAIL(lob_mgr->query(allocator_, locator, + query_timeout_ts_, false, partial_data_, cursor_))) { + LOG_WARN("build_lob_param fail", K(ret)); + } + return ret; +} + +int ObJsonDeltaLob::serialize_lob_diffs(char* buf, const int64_t buf_len, storage::ObLobDiffHeader *diff_header) const +{ + int ret = OB_SUCCESS; + char *diff_data_ptr = diff_header->get_inline_data_ptr(); + storage::ObLobDiff *lob_diffs = diff_header->get_diff_ptr(); + int64_t data_len = buf_len - (diff_data_ptr - buf); + int64_t data_pos = 0; + + for (int i = 0; OB_SUCC(ret) && i < diff_header->diff_cnt_; ++i) { + const ObJsonBinaryDiff &diff = update_ctx_->binary_diffs_[i]; + storage::ObLobDiff *lob_diff = new (lob_diffs + i) storage::ObLobDiff(); + lob_diff->type_ = get_diff_type(); + lob_diff->dst_offset_ = diff.dst_offset_; + lob_diff->dst_len_ = diff.dst_len_; + } + + const ObJsonDiffArray &json_diffs = update_ctx_->json_diffs_; + ObJsonDiffHeader json_diff_header; + json_diff_header.cnt_ = json_diffs.count(); + if (OB_FAIL(json_diff_header.serialize(diff_data_ptr, data_len, data_pos))) { + LOG_WARN("serialize json diff header fail", KR(ret), K(buf_len), K(data_pos)); + } + for (int i = 0; OB_SUCC(ret) && i < json_diffs.count(); ++i) { + const ObJsonDiff& diff = json_diffs[i]; + if (OB_FAIL(diff.serialize(diff_data_ptr, data_len, data_pos))) { + LOG_WARN("serialize json diff fail", KR(ret), K(i), K(buf_len), K(data_pos), K(json_diffs)); + } + } + return ret; +} + +int ObJsonDeltaLob::deserialize_lob_diffs(char* buf, const int64_t buf_len, storage::ObLobDiffHeader *diff_header) +{ + INIT_SUCC(ret); + ObJsonBinUpdateCtx *update_ctx = nullptr; + storage::ObLobDiff *lob_diffs = nullptr; + char *data_ptr = nullptr; + if (OB_ISNULL(update_ctx_ = OB_NEWx(ObJsonBinUpdateCtx, allocator_, allocator_))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("alloc update ctx fail", K(ret), "size", sizeof(ObJsonBinUpdateCtx)); + } else if (OB_ISNULL(data_ptr = diff_header->get_inline_data_ptr())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("data_ptr is null", K(ret), KPC(diff_header)); + } else if (OB_ISNULL(lob_diffs = diff_header->get_diff_ptr())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("lob_diffs is null", K(ret), KPC(diff_header)); + } else { + int64_t data_len = buf_len - (data_ptr - buf); + int64_t data_pos = 0; + for (int64_t i = 0 ; OB_SUCC(ret) && i < diff_header->diff_cnt_; ++i) { + ObLobDiff &lob_diff = lob_diffs[i]; + ObJsonBinaryDiff binary_diff; + binary_diff.dst_offset_ = lob_diff.dst_offset_; + binary_diff.dst_len_ = lob_diff.dst_len_; + if (OB_FAIL(update_ctx_->binary_diffs_.push_back(binary_diff))) { + LOG_WARN("push diff fail", KR(ret), K(lob_diff), K(binary_diff), K(i), KPC(diff_header)); + } + } + + ObJsonDiffHeader json_diff_header; + if (OB_FAIL(ret)) { + } else if (OB_FAIL(json_diff_header.deserialize(data_ptr, data_len, data_pos))) { + LOG_WARN("deserialize json diff header fail", K(ret), K(data_len), K(data_pos), K(json_diff_header)); + } + for (int64_t i = 0 ; OB_SUCC(ret) && i < json_diff_header.cnt_; ++i) { + ObJsonDiff json_diff; + if (OB_FAIL(json_diff.deserialize(data_ptr, data_len, data_pos))) { + LOG_WARN("deserialize fail", K(ret), K(i), K(json_diff_header), K(data_len), K(data_pos)); + } else if (OB_FAIL(update_ctx_->json_diffs_.push_back(json_diff))) { + LOG_WARN("push diff fail", KR(ret), K(i), K(json_diff), K(i), K(json_diff_header), KPC(diff_header)); + } + } + if (OB_FAIL(ret)) { + } else if (OB_FAIL(ObJsonExprHelper::get_partial_json_bin(*allocator_, cursor_, update_ctx_, j_base_))) { + LOG_WARN("get_partial_json_bin fail", K(ret)); + } + } + return ret; +} + +/********** ObJsonDeltaLob ****************/ + } } diff --git a/src/sql/engine/expr/ob_expr_json_func_helper.h b/src/sql/engine/expr/ob_expr_json_func_helper.h index ceb6e7d015..59ae107587 100644 --- a/src/sql/engine/expr/ob_expr_json_func_helper.h +++ b/src/sql/engine/expr/ob_expr_json_func_helper.h @@ -19,18 +19,145 @@ #include "share/object/ob_obj_cast.h" #include "objit/common/ob_item_type.h" #include "sql/session/ob_sql_session_info.h" +#include "lib/lob/ob_lob_base.h" #include "lib/json_type/ob_json_tree.h" #include "lib/json_type/ob_json_base.h" #include "lib/json_type/ob_json_bin.h" #include "lib/json_type/ob_json_parse.h" +#include "lib/json_type/ob_json_schema.h" +#include "lib/json_type/ob_json_diff.h" #include "sql/engine/expr/ob_expr_result_type_util.h" +#include "storage/lob/ob_lob_util.h" using namespace oceanbase::common; namespace oceanbase { +namespace storage +{ +class ObLobCursor; +} + namespace sql { +const static int32_t OB_LITERAL_MAX_INT_LEN = 21; + +struct ObJsonExprParam { +public: + ObJsonExprParam() + : truncate_(0), + format_json_(0), + wrapper_(0), + empty_type_(0), + error_type_(0), + is_empty_default_const_(false), + is_error_default_const_(false), + empty_val_(), + error_val_(), + on_mismatch_(), + on_mismatch_type_(), + accuracy_(), + dst_type_(), + pretty_type_(0), + ascii_type_(0), + scalars_type_(0), + path_str_(), + json_path_(nullptr), + is_init_from_cache_(false) + {} + virtual ~ObJsonExprParam() {} +public: + int8_t truncate_; + int8_t format_json_; + int8_t wrapper_; + int8_t empty_type_; + int8_t error_type_; + bool is_empty_default_const_; + bool is_error_default_const_; + ObDatum *empty_val_; + ObDatum *error_val_; + common::ObSEArray on_mismatch_; + common::ObSEArray on_mismatch_type_; + ObAccuracy accuracy_; + ObObjType dst_type_; + int8_t pretty_type_; + int8_t ascii_type_; + int8_t scalars_type_; + ObString path_str_; + ObJsonPath* json_path_; + bool is_init_from_cache_; +}; + +class ObJsonParamCacheCtx : public ObExprOperatorCtx +{ + public: + ObJsonParamCacheCtx(common::ObIAllocator *allocator) + : ObExprOperatorCtx(), + path_cache_(allocator), + is_first_exec_(true), + is_json_path_const_(false), + json_param_() + {} + virtual ~ObJsonParamCacheCtx() {} + ObJsonPathCache *get_path_cache() { return &path_cache_; } + +private: + ObJsonPathCache path_cache_; +public: + bool is_first_exec_; + bool is_json_path_const_; + ObJsonExprParam json_param_; +}; + +struct ObConv2JsonParam { + ObConv2JsonParam(bool to_bin, bool has_lob_header) : + to_bin_(to_bin), + has_lob_header_(has_lob_header), + deep_copy_(false), + relax_type_(true), + format_json_(false), + is_schema_(false) + {} + ObConv2JsonParam(bool to_bin, bool has_lob_header, bool deep_copy) : + to_bin_(to_bin), + has_lob_header_(has_lob_header), + deep_copy_(deep_copy), + relax_type_(true), + format_json_(false), + is_schema_(false) + {} + ObConv2JsonParam(bool to_bin, bool has_lob_header, bool deep_copy, bool relax_type) : + to_bin_(to_bin), + has_lob_header_(has_lob_header), + deep_copy_(deep_copy), + relax_type_(relax_type), + format_json_(false), + is_schema_(false) + {} + ObConv2JsonParam(bool to_bin, bool has_lob_header, bool deep_copy, bool relax_type, bool format_json) : + to_bin_(to_bin), + has_lob_header_(has_lob_header), + deep_copy_(deep_copy), + relax_type_(relax_type), + format_json_(format_json), + is_schema_(false) + {} + ObConv2JsonParam(bool to_bin, bool has_lob_header, bool deep_copy, bool relax_type, bool format_json, bool is_schema) : + to_bin_(to_bin), + has_lob_header_(has_lob_header), + deep_copy_(deep_copy), + relax_type_(relax_type), + format_json_(format_json), + is_schema_(is_schema) + {} + bool to_bin_; + bool has_lob_header_; + bool deep_copy_ = false; + bool relax_type_ = true; + bool format_json_ = false; + bool is_schema_ = false; +}; + class ObJsonExprHelper final { class ObJsonPathCacheCtx : public ObExprOperatorCtx @@ -45,9 +172,22 @@ class ObJsonExprHelper final private: ObJsonPathCache path_cache_; }; + class ObJsonSchemaCacheCtx : public ObExprOperatorCtx + { + public: + ObJsonSchemaCacheCtx(common::ObIAllocator *allocator) + : ObExprOperatorCtx(), + schema_cache_(allocator) + {} + virtual ~ObJsonSchemaCacheCtx() {} + ObJsonSchemaCache *get_schema_cache() { return &schema_cache_; } + private: + ObJsonSchemaCache schema_cache_; + }; public: + static ObJsonParamCacheCtx* get_param_cache_ctx(const uint64_t& id, ObExecContext *exec_ctx); static int get_json_or_str_data(ObExpr *expr, ObEvalCtx &ctx, - common::ObArenaAllocator &allocator, + common::ObIAllocator &allocator, ObString& str, bool& is_null); /* get json doc to JsonBase in static_typing_engine @@ -62,7 +202,35 @@ public: static int get_json_doc(const ObExpr &expr, ObEvalCtx &ctx, common::ObArenaAllocator &allocator, uint16_t index, ObIJsonBase*& j_base, - bool &is_null, bool need_to_tree=true, bool relax = true); + bool &is_null, bool need_to_tree=true, + bool relax = true, bool preserve_dup = false); + static int get_json_schema(const ObExpr &expr, ObEvalCtx &ctx, + common::ObArenaAllocator &allocator, + uint16_t index, ObIJsonBase*& j_base, + bool &is_null); + + + static int get_json_for_partial_update( + const ObExpr &expr, + const ObExpr &json_expr, + ObEvalCtx &ctx, + ObIAllocator &allocator, + ObDatum &json_datum, + ObIJsonBase *&j_base); + + static int get_partial_json_bin( + ObIAllocator &allocator, + ObILobCursor *cursor, + ObJsonBinUpdateCtx *update_ctx, + ObIJsonBase *&j_base); + static int get_json_for_partial_update_with_curosr( + const ObExpr &expr, + const ObExpr &json_expr, + ObEvalCtx &ctx, + ObIAllocator &allocator, + ObDatum &json_datum, + ObIJsonBase *&j_base); + static int cast_to_json_tree(ObString &text, common::ObIAllocator *allocator, uint32_t parse_flag = 0); /* @@ -79,6 +247,8 @@ public: common::ObIAllocator *allocator, uint16_t index, ObIJsonBase*& j_base, bool to_bin = false, bool format_json = false, uint32_t parse_flag = ObJsonParser::JSN_RELAXED_FLAG); + static int get_const_json_schema(const common::ObObj &data, const char* func_name, + common::ObIAllocator *allocator, ObIJsonBase*& j_schema); /* get json value to JsonBase in old_typing_engine @@ -93,6 +263,13 @@ public: static int get_json_val(const common::ObObj &data, ObExprCtx &ctx, bool is_bool, common::ObIAllocator *allocator, ObIJsonBase*& j_base, bool to_bin = false); + static int get_json_val(const common::ObDatum &data, + ObExecContext &ctx, + ObExpr* expr, + common::ObIAllocator *allocator, + ObObjType val_type, + ObCollationType &cs_type, + ObIJsonBase*& j_base, bool to_bin = false); static int oracle_datum2_json_val(const ObDatum *json_datum, ObObjMeta& data_meta, common::ObIAllocator *allocator, ObBasicSessionInfo *session, ObIJsonBase*& j_base, bool is_bool_data_type, bool format_json = false, bool is_strict = false, bool is_bin = false); @@ -111,9 +288,13 @@ public: ObIJsonBase *&json_doc); static int find_and_add_cache(ObJsonPathCache* path_cache, ObJsonPath*& res_path, - ObString& path_str, int arg_idx, bool enable_wildcard); + ObString& path_str, int arg_idx, bool enable_wildcard, + bool is_const = false); + static int find_and_add_schema_cache(ObJsonSchemaCache* schema_cache, ObIJsonBase*& j_schema, + ObString& schema_str, int arg_idx, const ObJsonInType& in_type); static ObJsonPathCache* get_path_cache_ctx(const uint64_t& id, ObExecContext *exec_ctx); + static ObJsonSchemaCache* get_schema_cache_ctx(const uint64_t& id, ObExecContext *exec_ctx); static int is_json_zero(const ObString& data, int& result); @@ -157,11 +338,7 @@ public: common::ObIAllocator *allocator, ObCollationType cs_type, ObIJsonBase*& j_base, - bool to_bin, - bool has_lob_header, - bool deep_copy = false, - bool relax_type = true, - bool format_json = false); + ObConv2JsonParam flags); static bool is_cs_type_bin(ObCollationType &cs_type); static int get_timestamp_str_in_oracle_mode(ObEvalCtx &ctx, @@ -202,6 +379,13 @@ public: return ret; } + static int pack_json_res( + const ObExpr &expr, + ObEvalCtx &ctx, + ObIAllocator &temp_allocator, + ObIJsonBase *json_doc, + ObDatum &res); + /** * the following 3 functions is used for json_query and json_mergepatch * as the returning type is the same @@ -211,19 +395,26 @@ public: * get_cast_string_len * */ - static int get_cast_type(const ObExprResType param_type2, ObExprResType &dst_type); + static int get_cast_type(const ObExprResType param_type2, ObExprResType &dst_type, ObExprTypeCtx &type_ctx); // check item function and returning type static int check_item_func_with_return(ObJsonPathNodeType path_type, ObObjType dst_type, common::ObCollationType dst_coll_type, int8_t JSON_EXPR_FLAG); static int set_dest_type(ObExprResType &type1, ObExprResType &type, ObExprResType &dst_type, ObExprTypeCtx &type_ctx); - static int get_expr_option_value(const ObExprResType param_type2, int64_t &dst_type); + static int get_expr_option_value(const ObExprResType param_type2, int8_t &dst_type); static int calc_asciistr_in_expr(const ObString &src, const ObCollationType src_cs_type, const ObCollationType dst_cs_type, char* buf, const int64_t buf_len, int32_t &pos); static int get_dest_type(const ObExpr &expr, int64_t pos, ObEvalCtx& ctx, - ObObjType &dest_type, int32_t &dst_len); + ObObjType &dest_type, int64_t &dst_len); + + static int get_cast_inttc_len(ObExprResType &type1, + ObExprResType &type2, + common::ObExprTypeCtx &type_ctx, + int32_t &res_len, + int16_t &length_semantics, + common::ObCollationType conn); static int get_cast_string_len(ObExprResType &type1, ObExprResType &type2, common::ObExprTypeCtx &type_ctx, @@ -239,18 +430,90 @@ public: ObExprResType& type1, ObExprResType& res_type, ObExprTypeCtx& type_ctx); - static int pre_default_value_check(ObObjType dst_type, ObString time_str, ObObjType val_type); + static int pre_default_value_check(ObObjType dst_type, ObString val_str, ObObjType val_type, size_t length); static int character2_ascii_string(common::ObIAllocator *allocator, const ObExpr &expr, ObEvalCtx &ctx, ObString& result, int32_t reserve_len = 0); + static int cast_to_res(ObIAllocator &allocator, + ObDatum &src_datum, + const ObExpr &expr, + const ObExpr &default_expr, + ObEvalCtx &ctx, + ObDatum &res, + bool xt_need_acc_check); + static void get_accuracy_from_expr(const ObExpr &expr, ObAccuracy &accuracy); + static int get_clause_opt(ObExpr *expr, + ObEvalCtx &ctx, + int8_t &type); + + static int is_allow_partial_update(const ObExpr &expr, ObEvalCtx &ctx, const ObString &locator_str, bool &allow_partial_update); + static bool is_json_partial_update_mode(const ObExpr &expr); + static bool is_json_partial_update_mode(const uint64_t flag) { return (flag & OB_JSON_PARTIAL_UPDATE_ALLOW) != 0; } + static bool is_json_partial_update_last_expr(const uint64_t flag) { return (flag & OB_JSON_PARTIAL_UPDATE_LAST_EXPR) != 0; } + static bool is_json_partial_update_first_expr(const uint64_t flag) { return (flag & OB_JSON_PARTIAL_UPDATE_FIRST_EXPR) != 0; } + + static int pack_json_diff_res( + const ObExpr &expr, + ObEvalCtx &ctx, + ObIAllocator &temp_allocator, + ObIJsonBase *json_doc, + ObDatum &res); + + static int refresh_root_when_bin_rebuild_all(ObIJsonBase *j_base); + + static int init_json_expr_extra_info( + ObIAllocator *allocator, + const ObRawExpr &raw_expr, + const ObExprOperatorType type, + ObExpr &rt_expr); + + static int get_session_query_timeout_ts(ObEvalCtx &ctx, int64_t &timeout_ts); + private: const static uint32_t RESERVE_MIN_BUFF_SIZE = 32; DISALLOW_COPY_AND_ASSIGN(ObJsonExprHelper); }; +class ObJsonDeltaLob : public ObDeltaLob { +public: + ObJsonDeltaLob(): + allocator_(nullptr), + update_ctx_(nullptr), + j_base_(nullptr), + cursor_(nullptr), + partial_data_(nullptr), + query_timeout_ts_(0) + {} + + int init(ObJsonBin *j_bin); + int init(ObIAllocator *allocator, ObLobLocatorV2 locator, int64_t query_timeout_ts); + + int64_t get_partial_data_serialize_size() const; + int64_t get_lob_diff_serialize_size() const; + uint32_t get_lob_diff_cnt() const; + + int serialize_partial_data(char* buf, const int64_t buf_len, int64_t& pos) const; + int deserialize_partial_data(storage::ObLobDiffHeader *diff_header); + int serialize_lob_diffs(char* buf, const int64_t buf_len, storage::ObLobDiffHeader *diff_header) const; + int deserialize_lob_diffs(char* buf, const int64_t buf_len, storage::ObLobDiffHeader *diff_header); + + int check_binary_diff() const; + ObIJsonBase* get_json_bin() { return j_base_; } + storage::ObLobDiff::DiffType get_diff_type() const { return storage::ObLobDiff::DiffType::WRITE_DIFF; } + +protected: + ObIAllocator *allocator_; + ObJsonBinUpdateCtx *update_ctx_; + + ObIJsonBase *j_base_; + storage::ObLobCursor *cursor_; + storage::ObLobPartialData *partial_data_; + int64_t query_timeout_ts_; +}; + struct ObJsonZeroVal { static const int32_t OB_JSON_ZERO_VAL_LENGTH = sizeof(ObLobCommon) + 2; diff --git a/src/sql/engine/expr/ob_expr_json_insert.cpp b/src/sql/engine/expr/ob_expr_json_insert.cpp index 0ff828a020..6cf2941ce7 100644 --- a/src/sql/engine/expr/ob_expr_json_insert.cpp +++ b/src/sql/engine/expr/ob_expr_json_insert.cpp @@ -15,6 +15,7 @@ #include "ob_expr_json_insert.h" #include "ob_expr_json_func_helper.h" +#include "share/ob_json_access_utils.h" using namespace oceanbase::common; using namespace oceanbase::sql; @@ -120,7 +121,7 @@ int ObExprJsonInsert::eval_json_insert(const ObExpr &expr, ObEvalCtx &ctx, ObDat } else if (j_path->path_node_cnt() == 0) { // do nothing } else { - ObJsonBaseVector hit; + ObJsonSeekResult hit; // if target exists continue, don't replace if (OB_FAIL(j_base->seek(*j_path, j_path->path_node_cnt(), true, true, hit))) { LOG_WARN("failed: json seek.", K(j_path_text), K(ret)); @@ -136,7 +137,7 @@ int ObExprJsonInsert::eval_json_insert(const ObExpr &expr, ObEvalCtx &ctx, ObDat ret = OB_ERR_INVALID_JSON_TEXT_IN_PARAM; LOG_WARN("failed: get_json_val.", K(ret)); } else { - ObIJsonBase *j_pos_node = *hit.last(); + ObIJsonBase *j_pos_node = hit.last(); ObJsonPathBasicNode *path_last = j_path->last_path_node(); if (path_last->get_node_type() == JPN_ARRAY_CELL) { if (j_pos_node->json_type() == ObJsonNodeType::J_ARRAY) { @@ -185,7 +186,7 @@ int ObExprJsonInsert::eval_json_insert(const ObExpr &expr, ObEvalCtx &ctx, ObDat ObString raw_bin; if (is_null) { res.set_null(); - } else if (OB_FAIL(j_base->get_raw_binary(raw_bin, &temp_allocator))) { + } else if (OB_FAIL(ObJsonWrapper::get_raw_binary(j_base, raw_bin, &temp_allocator))) { LOG_WARN("failed: get json raw binary", K(ret)); } else if (OB_FAIL(ObJsonExprHelper::pack_json_str_res(expr, ctx, res, raw_bin))) { LOG_WARN("fail to pack json result", K(ret)); diff --git a/src/sql/engine/expr/ob_expr_json_keys.cpp b/src/sql/engine/expr/ob_expr_json_keys.cpp index 4f045406ff..23c3115e02 100644 --- a/src/sql/engine/expr/ob_expr_json_keys.cpp +++ b/src/sql/engine/expr/ob_expr_json_keys.cpp @@ -14,6 +14,7 @@ #define USING_LOG_PREFIX SQL_ENG #include "ob_expr_json_keys.h" #include "ob_expr_json_func_helper.h" +#include "share/ob_json_access_utils.h" using namespace oceanbase::common; using namespace oceanbase::sql; @@ -87,7 +88,8 @@ int ObExprJsonKeys::get_keys_from_wrapper(ObIJsonBase *json_doc, iter.next(); } - if (OB_FAIL(res_array.get_raw_binary(str, allocator))) { + if (OB_FAIL(ret)) { + } else if (OB_FAIL(ObJsonWrapper::get_raw_binary(&res_array, str, allocator))) { LOG_WARN("json_keys get result binary failed", K(ret)); } return ret; @@ -120,7 +122,7 @@ int ObExprJsonKeys::eval_json_keys(const ObExpr &expr, ObEvalCtx &ctx, ObDatum & } else if (OB_FAIL(expr.args_[1]->eval(ctx, path_data))) { LOG_WARN("eval json path datum failed", K(ret)); } else { - ObJsonBaseVector sub_json_targets; + ObJsonSeekResult sub_json_targets; ObJsonPath *json_path; ObString path_val = path_data->get_string(); if (OB_FAIL(ObJsonExprHelper::get_json_or_str_data(expr.args_[1], ctx, temp_allocator, path_val, is_null_result))) { diff --git a/src/sql/engine/expr/ob_expr_json_length.cpp b/src/sql/engine/expr/ob_expr_json_length.cpp index 9844f61ee8..c0948a119a 100644 --- a/src/sql/engine/expr/ob_expr_json_length.cpp +++ b/src/sql/engine/expr/ob_expr_json_length.cpp @@ -103,7 +103,7 @@ int ObExprJsonLength::calc(ObEvalCtx &ctx, const ObDatum &data1, ObDatumMeta met if (type2 == ObNullType) { // null should display "NULL" is_null = true; } else { // ObLongTextType - ObJsonBaseVector hit; + ObJsonSeekResult hit; ObString j_path_text = data2->get_string(); ObJsonPath *j_path = NULL; if (OB_FAIL(ObTextStringHelper::read_real_string_data(*allocator, *data2, meta2, has_lob_header2, j_path_text))) { diff --git a/src/sql/engine/expr/ob_expr_json_merge_patch.cpp b/src/sql/engine/expr/ob_expr_json_merge_patch.cpp index 350e55a265..16e0aaefd8 100644 --- a/src/sql/engine/expr/ob_expr_json_merge_patch.cpp +++ b/src/sql/engine/expr/ob_expr_json_merge_patch.cpp @@ -14,6 +14,7 @@ #define USING_LOG_PREFIX SQL_ENG #include "ob_expr_json_merge_patch.h" #include "sql/engine/expr/ob_expr_json_func_helper.h" +#include "share/ob_json_access_utils.h" using namespace oceanbase::common; using namespace oceanbase::sql; @@ -163,7 +164,7 @@ int ObExprJsonMergePatch::eval_json_merge_patch(const ObExpr &expr, ObEvalCtx &c ObString raw_bin; if (has_null) { res.set_null(); - } else if (OB_FAIL(j_base->get_raw_binary(raw_bin, &temp_allocator))) { + } else if (OB_FAIL(ObJsonWrapper::get_raw_binary(j_base, raw_bin, &temp_allocator))) { LOG_WARN("failed: get json raw binary", K(ret)); } else if (OB_FAIL(ObJsonExprHelper::pack_json_str_res(expr, ctx, res, raw_bin))) { LOG_WARN("fail to pack json result", K(ret)); @@ -302,7 +303,7 @@ int ObExprJsonMergePatch::eval_ora_json_merge_patch(const ObExpr &expr, ObEvalCt ret = OB_ALLOCATE_MEMORY_FAILED; LOG_WARN("failed to construct jbuf", K(ret)); } else if (dst_type == ObJsonType) { - if (OB_FAIL(j_base->get_raw_binary(res_string, &temp_allocator))) { + if (OB_FAIL(ObJsonWrapper::get_raw_binary(j_base, res_string, &temp_allocator))) { LOG_WARN("failed: get json raw binary", K(ret)); } } else { diff --git a/src/sql/engine/expr/ob_expr_json_merge_preserve.cpp b/src/sql/engine/expr/ob_expr_json_merge_preserve.cpp index 4086868744..38c884804a 100644 --- a/src/sql/engine/expr/ob_expr_json_merge_preserve.cpp +++ b/src/sql/engine/expr/ob_expr_json_merge_preserve.cpp @@ -15,6 +15,7 @@ #define USING_LOG_PREFIX SQL_ENG #include "ob_expr_json_merge_preserve.h" #include "sql/engine/expr/ob_expr_json_func_helper.h" +#include "share/ob_json_access_utils.h" using namespace oceanbase::common; using namespace oceanbase::sql; @@ -103,7 +104,7 @@ int ObExprJsonMergePreserve::eval_json_merge_preserve(const ObExpr &expr, ObEval ObString raw_bin; if (has_null) { res.set_null(); - } else if (OB_FAIL(j_base->get_raw_binary(raw_bin, &temp_allocator))) { + } else if (OB_FAIL(ObJsonWrapper::get_raw_binary(j_base, raw_bin, &temp_allocator))) { LOG_WARN("failed: get json raw binary", K(ret)); } else if (OB_FAIL(ObJsonExprHelper::pack_json_str_res(expr, ctx, res, raw_bin))) { LOG_WARN("fail to pack json result", K(ret)); diff --git a/src/sql/engine/expr/ob_expr_json_object.cpp b/src/sql/engine/expr/ob_expr_json_object.cpp index 21be62d4c0..90b50d2fbc 100644 --- a/src/sql/engine/expr/ob_expr_json_object.cpp +++ b/src/sql/engine/expr/ob_expr_json_object.cpp @@ -16,7 +16,7 @@ #include "sql/engine/expr/ob_expr_json_func_helper.h" #include "sql/engine/ob_exec_context.h" #include "sql/engine/expr/ob_expr_cast.h" - +#include "share/ob_json_access_utils.h" #include "lib/hash/ob_hashset.h" using namespace oceanbase::common; @@ -117,7 +117,7 @@ int ObExprJsonObject::calc_result_typeN(ObExprResType& type, // returning type : param_num - 3 ObExprResType dst_type; if (OB_SUCC(ret)) { - if (OB_FAIL(ObJsonExprHelper::get_cast_type(types_stack[param_num - 3], dst_type))) { + if (OB_FAIL(ObJsonExprHelper::get_cast_type(types_stack[param_num - 3], dst_type, type_ctx))) { LOG_WARN("get cast dest type failed", K(ret)); } else if (OB_FAIL(ObJsonExprHelper::set_dest_type(types_stack[param_num - 3], type, dst_type, type_ctx))) { LOG_WARN("set dest type failed", K(ret)); @@ -233,7 +233,7 @@ int ObExprJsonObject::eval_json_object(const ObExpr &expr, ObEvalCtx &ctx, ObDat ObString raw_bin; j_obj.stable_sort(); j_obj.unique(); - if (OB_FAIL(j_base->get_raw_binary(raw_bin, &temp_allocator))) { + if (OB_FAIL(ObJsonWrapper::get_raw_binary(j_base, raw_bin, &temp_allocator))) { LOG_WARN("failed: get json raw binary", K(ret)); } else if (OB_FAIL(ObJsonExprHelper::pack_json_str_res(expr, ctx, res, raw_bin))) { LOG_WARN("fail to pack json result", K(ret)); @@ -351,7 +351,7 @@ int ObExprJsonObject::eval_ora_json_object(const ObExpr &expr, ObEvalCtx &ctx, O if (OB_SUCC(ret)) { if (dst_type == ObJsonType) { ObString raw_bin; - if (OB_FAIL(j_base->get_raw_binary(raw_bin, &temp_allocator))) { + if (OB_FAIL(ObJsonWrapper::get_raw_binary(j_base, raw_bin, &temp_allocator))) { LOG_WARN("failed: get json raw binary", K(ret)); } else if (OB_FAIL(ObJsonExprHelper::pack_json_str_res(expr, ctx, res, raw_bin))) { LOG_WARN("fail to pack json result", K(ret), K(raw_bin)); @@ -509,7 +509,7 @@ int ObExprJsonObjectStar::calc_result_typeN(ObExprResType& type, if (OB_FAIL(ObJsonExprHelper::set_dest_type(types_stack[0], type, dst_type, type_ctx))) { LOG_WARN("set dest type failed", K(ret)); } else { - type.set_calc_collation_type(type.get_collation_type()); + type.set_calc_collation_type(CS_TYPE_UTF8MB4_BIN); } } return ret; diff --git a/src/sql/engine/expr/ob_expr_json_query.cpp b/src/sql/engine/expr/ob_expr_json_query.cpp index 0014ad5c7b..411efaa058 100644 --- a/src/sql/engine/expr/ob_expr_json_query.cpp +++ b/src/sql/engine/expr/ob_expr_json_query.cpp @@ -19,14 +19,18 @@ #include "sql/session/ob_sql_session_info.h" #include "share/object/ob_obj_cast_util.h" #include "share/object/ob_obj_cast.h" +#include "share/ob_json_access_utils.h" #include "sql/engine/expr/ob_expr_cast.h" #include "sql/engine/expr/ob_datum_cast.h" #include "sql/resolver/expr/ob_raw_expr_util.h" #include "lib/oblog/ob_log_module.h" #include "ob_expr_json_func_helper.h" #include "ob_expr_json_value.h" +#include "lib/xml/ob_binary_aggregate.h" +#include "ob_expr_json_utils.h" // from sql_parser_base.h #define DEFAULT_STR_LENGTH -1 +#define VARCHAR2_DEFAULT_LEN 4000 using namespace oceanbase::common; using namespace oceanbase::sql; @@ -36,14 +40,6 @@ namespace oceanbase namespace sql { -#define GET_SESSION() \ - ObBasicSessionInfo *session = ctx.exec_ctx_.get_my_session(); \ - if (OB_ISNULL(session)) { \ - ret = OB_ERR_UNEXPECTED; \ - LOG_WARN("session is NULL", K(ret)); \ - } else - - ObExprJsonQuery::ObExprJsonQuery(ObIAllocator &alloc) : ObFuncExprOperator(alloc, T_FUN_SYS_JSON_QUERY, N_JSON_QUERY, MORE_THAN_TWO, VALID_FOR_GENERATED_COL, NOT_ROW_DIMENSION) { @@ -65,76 +61,28 @@ int ObExprJsonQuery::calc_result_typeN(ObExprResType& type, ret = OB_ERR_PARAM_SIZE; LOG_WARN("invalid param number", K(ret), K(param_num)); } else { - bool input_judge_json_type = false; - ObObjType doc_type = types_stack[0].get_type(); - if (types_stack[0].get_type() == ObNullType) { - } else if (!ObJsonExprHelper::is_convertible_to_json(doc_type)) { - ret = OB_ERR_INVALID_TYPE_FOR_OP; - LOG_USER_ERROR(OB_ERR_INVALID_TYPE_FOR_OP, ob_obj_type_str(types_stack[0].get_type()), "JSON"); - } else if (ob_is_string_type(doc_type)) { - if (types_stack[0].get_collation_type() == CS_TYPE_BINARY) { - // suport string type with binary charset - types_stack[0].set_calc_collation_type(CS_TYPE_BINARY); - } else if (types_stack[0].get_charset_type() != CHARSET_UTF8MB4) { - types_stack[0].set_calc_collation_type(CS_TYPE_UTF8MB4_BIN); + bool is_json_input = false; + if (OB_FAIL(ObExprJsonValue::calc_input_type(types_stack[JSN_QUE_DOC], is_json_input))) { + LOG_WARN("fail to calc input type", K(ret)); + } else if (types_stack[JSN_QUE_PATH].get_type() == ObNullType) { // json path : 1 + ret = OB_ERR_PATH_EXPRESSION_NOT_LITERAL; + LOG_USER_ERROR(OB_ERR_PATH_EXPRESSION_NOT_LITERAL); + } else if (ob_is_string_type(types_stack[JSN_QUE_PATH].get_type())) { + if (types_stack[JSN_QUE_PATH].get_charset_type() != CHARSET_UTF8MB4) { + types_stack[JSN_QUE_PATH].set_calc_collation_type(types_stack[JSN_QUE_PATH].get_collation_type()); } - } else if (doc_type == ObJsonType) { - input_judge_json_type = true; - // do nothing - // types_stack[0].set_calc_type(ObJsonType); - // types_stack[0].set_calc_collation_type(CS_TYPE_UTF8MB4_BIN); } else { - types_stack[0].set_calc_type(ObLongTextType); - types_stack[0].set_calc_collation_type(CS_TYPE_UTF8MB4_BIN); + types_stack[JSN_QUE_PATH].set_calc_type(ObLongTextType); + types_stack[JSN_QUE_PATH].set_calc_collation_type(CS_TYPE_UTF8MB4_BIN); } - - // json path : 1 - if (OB_SUCC(ret)) { - if (types_stack[1].get_type() == ObNullType) { - ret = OB_ERR_PATH_EXPRESSION_NOT_LITERAL; - LOG_USER_ERROR(OB_ERR_PATH_EXPRESSION_NOT_LITERAL); - } else if (ob_is_string_type(types_stack[1].get_type())) { - if (types_stack[1].get_charset_type() != CHARSET_UTF8MB4) { - types_stack[1].set_calc_collation_type(CS_TYPE_UTF8MB4_BIN); - } - } else { - types_stack[1].set_calc_type(ObLongTextType); - types_stack[1].set_calc_collation_type(CS_TYPE_UTF8MB4_BIN); - } - } - // returning type : 2 判断default + // returning type : 2 ObExprResType dst_type; - if (OB_SUCC(ret)) { - if (types_stack[2].get_type() == ObNullType) { - ObString j_path_text(types_stack[1].get_param().get_string().length(), types_stack[1].get_param().get_string().ptr()); - ObJsonPath j_path(j_path_text, &allocator); - if (j_path_text.length() == 0) { - } else if (OB_FAIL(j_path.parse_path())) { - ret = OB_ERR_JSON_PATH_EXPRESSION_SYNTAX_ERROR; - LOG_USER_ERROR(OB_ERR_JSON_PATH_EXPRESSION_SYNTAX_ERROR, j_path_text.length(), j_path_text.ptr()); - } - if (OB_FAIL(ret)) { - } else if (input_judge_json_type && !j_path.is_last_func()) { - dst_type.set_type(ObObjType::ObJsonType); - dst_type.set_collation_type(CS_TYPE_UTF8MB4_BIN); - } else { - dst_type.set_type(ObObjType::ObVarcharType); - dst_type.set_collation_type(CS_TYPE_UTF8MB4_BIN); - dst_type.set_full_length(4000, 1); - } - } else if (OB_FAIL(ObJsonExprHelper::get_cast_type(types_stack[2], dst_type))) { - LOG_WARN("get cast dest type failed", K(ret)); - } - if (OB_SUCC(ret)) { - if (OB_FAIL(ObJsonExprHelper::set_dest_type(types_stack[0], type, dst_type, type_ctx))) { - LOG_WARN("set dest type failed", K(ret)); - } else { - type.set_calc_collation_type(type.get_collation_type()); - } - } + if (OB_SUCC(ret) && OB_FAIL(calc_returning_type(type, types_stack, type_ctx, + dst_type, &allocator, is_json_input))) { + LOG_WARN("fail to calc returning type", K(ret)); } // truncate 3 , scalars 4, pretty 5, ascii 6, wrapper 7, error 8, empty 9, mismatch 10 - for (int64_t i = 3; i < param_num && OB_SUCC(ret); ++i) { + for (int64_t i = JSN_QUE_TRUNC; i < param_num && OB_SUCC(ret); ++i) { if (types_stack[i].get_type() == ObNullType) { ret = OB_ERR_UNEXPECTED; LOG_WARN("param type is unexpected", K(types_stack[i].get_type()), K(i)); @@ -142,10 +90,9 @@ int ObExprJsonQuery::calc_result_typeN(ObExprResType& type, types_stack[i].set_calc_type(ObIntType); } } - // ASCII clause if (OB_SUCC(ret)) { - if (OB_FAIL(ObJsonExprHelper::parse_asc_option(types_stack[6], types_stack[0], type, type_ctx))) { + if (OB_FAIL(ObJsonExprHelper::parse_asc_option(types_stack[JSN_QUE_ASCII], types_stack[JSN_QUE_DOC], type, type_ctx))) { LOG_WARN("fail to parse asc option.", K(ret)); } } @@ -153,529 +100,577 @@ int ObExprJsonQuery::calc_result_typeN(ObExprResType& type, return ret; } +int ObExprJsonQuery::calc_returning_type(ObExprResType& type, + ObExprResType* types_stack, + ObExprTypeCtx& type_ctx, + ObExprResType& dst_type, + common::ObIAllocator *allocator, + bool is_json_input) +{ + INIT_SUCC(ret); + if (types_stack[JSN_QUE_RET].get_type() == ObNullType) { + ObString j_path_text(types_stack[JSN_QUE_PATH].get_param().get_string().length(), types_stack[JSN_QUE_PATH].get_param().get_string().ptr()); + ObJsonPath j_path(j_path_text, allocator); + + if (j_path_text.length() == 0) { + dst_type.set_type(ObObjType::ObVarcharType); + dst_type.set_collation_type(CS_TYPE_UTF8MB4_BIN); + dst_type.set_full_length(VARCHAR2_DEFAULT_LEN, 1); + } else if (OB_FAIL(ObJsonExprHelper::convert_string_collation_type( + types_stack[JSN_QUE_PATH].get_collation_type(), + CS_TYPE_UTF8MB4_BIN, + allocator, + j_path_text, + j_path_text))) { + LOG_WARN("convert string memory failed", K(ret), K(j_path_text)); + } else if (OB_FAIL(j_path.parse_path())) { + ret = OB_ERR_JSON_PATH_EXPRESSION_SYNTAX_ERROR; + LOG_USER_ERROR(OB_ERR_JSON_PATH_EXPRESSION_SYNTAX_ERROR, j_path_text.length(), j_path_text.ptr()); + } else if (is_json_input && !j_path.is_last_func()) { + dst_type.set_type(ObObjType::ObJsonType); + dst_type.set_collation_type(CS_TYPE_UTF8MB4_BIN); + } else { + dst_type.set_type(ObObjType::ObVarcharType); + dst_type.set_collation_type(CS_TYPE_UTF8MB4_BIN); + dst_type.set_full_length(VARCHAR2_DEFAULT_LEN, 1); + } + } else if (OB_FAIL(ObJsonExprHelper::get_cast_type(types_stack[JSN_QUE_RET], dst_type, type_ctx))) { + LOG_WARN("get cast dest type failed", K(ret)); + } else if (dst_type.get_type() != ObVarcharType + && dst_type.get_type() != ObLongTextType + && dst_type.get_type() != ObJsonType) { + ret = OB_ERR_INVALID_DATA_TYPE_RETURNING; + LOG_USER_ERROR(OB_ERR_INVALID_DATA_TYPE_RETURNING); + } + if (OB_SUCC(ret)) { + if (OB_FAIL(ObJsonExprHelper::set_dest_type(types_stack[JSN_QUE_DOC], type, dst_type, type_ctx))) { + LOG_WARN("set dest type failed", K(ret)); + } else { + type.set_calc_collation_type(type.get_collation_type()); + } + } + return ret; +} + +int ObExprJsonQuery::extract_plan_cache_param(const ObExprJsonQueryParamInfo *info, ObJsonExprParam& json_param) +{ + INIT_SUCC(ret); + json_param.truncate_ = info->truncate_; + json_param.empty_type_ = info->empty_type_; + json_param.error_type_ = info->error_type_; + json_param.ascii_type_ = info->ascii_type_; + json_param.json_path_ = info->j_path_; + json_param.is_init_from_cache_ = true; + json_param.scalars_type_ = info->scalars_type_; + json_param.pretty_type_ = info->pretty_type_; + json_param.wrapper_ = info->wrapper_; + + if (OB_FAIL(json_param.on_mismatch_.push_back(info->on_mismatch_.at(0)))) { + LOG_WARN("fail to push node to mismatch type", K(ret)); + } + return ret; +} + int ObExprJsonQuery::eval_json_query(const ObExpr &expr, ObEvalCtx &ctx, ObDatum &res) { INIT_SUCC(ret); ObDatum *json_datum = NULL; - ObExpr *json_arg = expr.args_[1]; + ObExpr *json_arg = expr.args_[JSN_QUE_PATH]; ObObjType type = json_arg->datum_meta_.type_; bool is_cover_by_error = true; bool is_null_result = false; - bool is_null_json_obj = false; - bool is_null_json_array = false; - uint8_t is_type_cast = 0; - int8_t JSON_QUERY_EXPR = 1; + uint8_t is_type_mismatch = 0; ObEvalCtx::TempAllocGuard tmp_alloc_g(ctx); common::ObArenaAllocator &temp_allocator = tmp_alloc_g.get_allocator(); - ObIJsonBase *j_base = NULL; + ObJsonBin st_json(&temp_allocator); + ObIJsonBase *j_base = &st_json; + ObIJsonBase *jb_empty = NULL; + int64_t dst_len = OB_MAX_TEXT_LENGTH; + int8_t use_wrapper = 0; + bool is_json_arr = false; + bool is_json_obj = false; + ObJsonSeekResult hits; + ObJsonBin res_json(&temp_allocator); + hits.res_point_ = &res_json; - // parse json path - ObJsonPath *j_path = NULL; - if (OB_FAIL(get_ora_json_path(expr, ctx, temp_allocator, j_path, 1, is_null_result, is_cover_by_error, json_datum))) { + // get context first + ObJsonParamCacheCtx ctx_cache(&temp_allocator); + ObJsonParamCacheCtx* param_ctx = ObJsonExprHelper::get_param_cache_ctx(expr.expr_ctx_id_, &ctx.exec_ctx_); + if (OB_ISNULL(param_ctx)) { + param_ctx = &ctx_cache; + } + if (param_ctx->is_first_exec_ && OB_FAIL(init_ctx_var(param_ctx, expr))) { + is_cover_by_error = false; + LOG_WARN("fail to init param ctx", K(ret)); + } else if (OB_ISNULL(param_ctx->json_param_.json_path_) + && OB_FAIL(ObJsonUtil::get_json_path(expr.args_[JSN_QUE_PATH], ctx, // parse json path + is_null_result, param_ctx, + temp_allocator, is_cover_by_error))) { // ctx_cache->path_cache_ LOG_WARN("get_json_path failed", K(ret)); - } - - // parse pretty ascii scalars - uint8_t pretty_type = OB_JSON_PRE_ASC_EMPTY; - uint8_t ascii_type = OB_JSON_PRE_ASC_EMPTY; - uint8_t scalars_type = JSN_QUERY_SCALARS_IMPLICIT; - if (OB_SUCC(ret) && !is_null_result) { - ret = get_clause_pre_asc_sca_opt(expr, ctx, is_cover_by_error, pretty_type, ascii_type, scalars_type); - } - - // parse return node acc - ObAccuracy accuracy; - ObObjType dst_type; - json_arg = expr.args_[0]; - type = json_arg->datum_meta_.type_; - ObExpr *json_arg_ret = expr.args_[2]; - ObObjType val_type = json_arg_ret->datum_meta_.type_; - int32_t dst_len = OB_MAX_TEXT_LENGTH; - if (OB_SUCC(ret) && val_type == ObNullType) { - if (expr.args_[0]->datum_meta_.type_ != ObJsonType && j_path->is_last_func() - && OB_FAIL(ObJsonExprHelper::check_item_func_with_return(j_path->get_last_node_type(), - ObVarcharType, expr.datum_meta_.cs_type_, JSON_QUERY_EXPR))) { - is_cover_by_error = false; - LOG_WARN("check item func with return type fail", K(ret)); - } - } else if (OB_SUCC(ret) && !is_null_result) { - ret = get_dest_type(expr, dst_len, ctx, dst_type, is_cover_by_error); - } else if (is_cover_by_error) { // when need error option, should do get accuracy - get_dest_type(expr, dst_len, ctx, dst_type, is_cover_by_error); - } - - if (OB_SUCC(ret) && val_type != ObNullType && j_path->is_last_func() - && OB_FAIL( ObJsonExprHelper::check_item_func_with_return(j_path->get_last_node_type(), - dst_type, expr.datum_meta_.cs_type_, JSON_QUERY_EXPR))) { - is_cover_by_error = false; - LOG_WARN("check item func with return type fail", K(ret)); - } - - if (OB_SUCC(ret) && val_type == ObNullType) { - if (ob_is_string_type(type) || j_path->is_last_func()) { - dst_type = ObVarcharType; - accuracy.set_full_length(4000, 1, lib::is_oracle_mode()); - } else { - dst_type = ObJsonType; - accuracy.set_length(0); - } - } - - if (OB_SUCC(ret) && dst_type != ObVarcharType && dst_type != ObLongTextType && dst_type != ObJsonType) { - is_cover_by_error = false; - ret = OB_ERR_INVALID_DATA_TYPE_RETURNING; - LOG_USER_ERROR(OB_ERR_INVALID_DATA_TYPE_RETURNING); - } - - if ((expr.datum_meta_.cs_type_ == CS_TYPE_BINARY || dst_type == ObJsonType) && (pretty_type > 0 || ascii_type > 0)) { - is_cover_by_error = false; - ret = OB_ERR_NON_TEXT_RET_NOTSUPPORT; - LOG_WARN("ASCII or PRETTY not supported for non-textual return data type", K(ret)); - } - - // parse json doc - if ((OB_SUCC(ret) || is_cover_by_error) && OB_FAIL(get_ora_json_doc(expr, ctx, temp_allocator, 0, j_base, dst_type, is_null_result, is_cover_by_error))) { + } else if (param_ctx->is_first_exec_ + && OB_FAIL(get_clause_param_value(expr, ctx, ¶m_ctx->json_param_, dst_len, + is_cover_by_error))) { + // get clause param value, set into param_ctx + LOG_WARN("fail to parse clause value", K(ret)); + } else if (OB_FAIL(ObJsonUtil::get_json_doc(expr.args_[JSN_QUE_DOC], ctx, temp_allocator, + j_base, is_null_result, + is_cover_by_error, true))) { // parse json doc LOG_WARN("get_json_doc failed", K(ret)); - } - - // parse error option - uint8_t error_type = JSN_QUERY_IMPLICIT; - ObDatum *error_val = NULL; - if (OB_SUCC(ret) && !is_null_result) { - ret = get_clause_opt(expr, ctx, 8, is_cover_by_error, error_type, JSN_QUERY_RESPONSE_COUNT); - } else if (is_cover_by_error) { // always get error option on error - int temp_ret = get_clause_opt(expr, ctx, 8, is_cover_by_error, error_type, JSN_QUERY_RESPONSE_COUNT); - if (temp_ret != OB_SUCCESS) { - ret = temp_ret; - LOG_WARN("failed to get error option.", K(temp_ret)); - } - } - - // parse wrapper - uint8_t wrapper_type = JSN_QUERY_WRAPPER_IMPLICIT; - if (OB_SUCC(ret)) { - ret = get_clause_opt(expr, ctx, 7, is_cover_by_error, wrapper_type, JSN_QUERY_WRAPPER_COUNT); - } - - if (OB_SUCC(ret) && j_path->get_last_node_type() > JPN_BEGIN_FUNC_FLAG - && j_path->get_last_node_type() < JPN_END_FUNC_FLAG - && (j_path->get_last_node_type() == JPN_NUMBER - || j_path->get_last_node_type() == JPN_NUM_ONLY - || j_path->get_last_node_type() == JPN_LENGTH - || j_path->get_last_node_type() == JPN_TYPE - || j_path->get_last_node_type() == JPN_SIZE ) - && (wrapper_type == JSN_QUERY_WITHOUT_WRAPPER || wrapper_type == JSN_QUERY_WITHOUT_ARRAY_WRAPPER - || wrapper_type == JSN_QUERY_WRAPPER_IMPLICIT)) { + } else if (param_ctx->json_param_.json_path_ == nullptr) {// do seek is_cover_by_error = false; - ret = OB_ERR_WITHOUT_ARR_WRAPPER; // result cannot be returned without array wrapper - LOG_WARN("result cannot be returned without array wrapper.", K(ret), K(j_path->get_last_node_type()), K(wrapper_type)); - } - - // mismatch // if mismatch_type == 3 from dot notation - uint8_t mismatch_type = JSN_QUERY_MISMATCH_IMPLICIT; - uint8_t mismatch_val = 7; - if (OB_SUCC(ret) && !is_null_result) { - if (OB_FAIL(get_clause_opt(expr, ctx, 10, is_cover_by_error, mismatch_type, JSN_QUERY_MISMATCH_COUNT))) { - LOG_WARN("failed to get mismatch option.", K(ret), K(mismatch_type)); - } - } - - uint8_t is_truncate = 0; - if (OB_SUCC(ret) && !is_null_result) { - if (OB_FAIL(get_clause_opt(expr, ctx, 3, is_cover_by_error, is_truncate, 2))) { - LOG_WARN("failed to get mismatch option.", K(ret), K(mismatch_type)); - } - } - - // do seek - // chose wrapper - int use_wrapper = 0; - ObJsonBaseVector hits; - if (json_datum == nullptr) { - ret = ret = OB_ERR_UNEXPECTED;; + ret = OB_ERR_UNEXPECTED; LOG_WARN("json path parse fail", K(ret)); - } else if (OB_SUCC(ret) && !is_null_result) { - - if (OB_FAIL(j_base->seek(*j_path, j_path->path_node_cnt(), true, false, hits))) { - if (ret == OB_ERR_JSON_PATH_EXPRESSION_SYNTAX_ERROR) { - is_cover_by_error = false; - } else if (ret == OB_ERR_DOUBLE_TRUNCATED) { - ret = OB_ERR_CONVERSION_FAIL; - } - LOG_WARN("json seek failed", K(json_datum->get_string()), K(ret)); - } else if (hits.size() == 1) { - if (mismatch_type == JSN_QUERY_MISMATCH_DOT) { - if (hits[0]->json_type() == ObJsonNodeType::J_NULL && hits[0]->is_real_json_null(hits[0]) && dst_type != ObJsonType) { - is_null_result = true; - } - - } else { - if (OB_FAIL(get_single_obj_wrapper(wrapper_type, use_wrapper, hits[0]->json_type(), scalars_type))) { - is_cover_by_error = true; - LOG_WARN("error occur in wrapper type"); - } else if (use_wrapper == 0 && hits[0]->json_type() == ObJsonNodeType::J_NULL && !hits[0]->is_real_json_null(hits[0])) { - is_null_result = true; - } else if (use_wrapper == 0 && j_path->is_last_func() && j_path->path_node_cnt() == 1) { - // do nothing - } else if (use_wrapper == 0 && j_path->get_last_node_type() == JPN_BOOLEAN - && (hits[0]->is_json_number(hits[0]->json_type()) || hits[0]->json_type() == ObJsonNodeType::J_NULL)) { - is_null_result = true; - } else if (use_wrapper == 0 && (j_path->get_last_node_type() == JPN_DATE || j_path->get_last_node_type() == JPN_TIMESTAMP) - && !hits[0]->is_json_date(hits[0]->json_type())) { - is_null_result = true; - } else if (use_wrapper == 0 && j_path->get_last_node_type() == JPN_DOUBLE - && !hits[0]->is_json_number(hits[0]->json_type()) && hits[0]->json_type() != ObJsonNodeType::J_NULL) { - is_null_result = true; - } else if (use_wrapper == 0 && (j_path->get_last_node_type() == JPN_STRING || j_path->get_last_node_type() == JPN_STR_ONLY) - && (hits[0]->json_type() == ObJsonNodeType::J_OBJECT || hits[0]->json_type() == ObJsonNodeType::J_ARRAY)) { - is_null_result = true; - } else if (use_wrapper == 0 && (j_path->get_last_node_type() == JPN_UPPER || j_path->get_last_node_type() == JPN_LOWER) - && (hits[0]->json_type() == ObJsonNodeType::J_OBJECT || hits[0]->json_type() == ObJsonNodeType::J_ARRAY)) { - is_null_result = true; - } else if (use_wrapper == 0 && (j_path->get_last_node_type() == JPN_NUMBER || j_path->get_last_node_type() == JPN_NUM_ONLY - || j_path->get_last_node_type() == JPN_DOUBLE) - && (!hits[0]->is_json_number(hits[0]->json_type()) && hits[0]->json_type() != ObJsonNodeType::J_NULL)) { - is_null_result = true; - } else if (use_wrapper == 0 && j_path->get_last_node_type() == JPN_LENGTH && !(hits[0]->json_type() == ObJsonNodeType::J_UINT - && ((ObJsonUint *)hits[0])->get_is_string_length())) { - is_null_result = true; - } else if (use_wrapper == 0 && (j_path->get_last_node_type() == JPN_DATE || j_path->get_last_node_type() == JPN_TIMESTAMP) - && !hits[0]->is_json_date(hits[0]->json_type())) { - is_null_result = true; - } - } - } else if (hits.size() == 0) { - // parse empty option - uint8_t empty_type = JSN_QUERY_IMPLICIT; - if (OB_SUCC(ret) && !is_null_result) { - ret = get_clause_opt(expr, ctx, 9, is_cover_by_error, empty_type, JSN_QUERY_RESPONSE_COUNT); - } - if (OB_SUCC(ret) && OB_FAIL(get_empty_option(hits, is_cover_by_error, empty_type, is_null_result, is_null_json_obj, is_null_json_array))) { - LOG_WARN("get empty type", K(ret)); - } - } else if (hits.size() > 1) { - // return val decide by wrapper option - if (OB_FAIL(get_multi_scalars_wrapper_type(wrapper_type, use_wrapper, hits, scalars_type))) { - is_cover_by_error = true; - LOG_WARN("error occur in wrapper type"); - } - } + } else if (!is_null_result + && OB_FAIL(ObExprJsonQuery::doc_do_seek(j_base, ¶m_ctx->json_param_, hits, use_wrapper, + is_cover_by_error, is_null_result, + is_json_arr, is_json_obj))) { + LOG_WARN("fail to seek result", K(ret)); } // fill output - if (OB_UNLIKELY(OB_FAIL(ret))) { + if (OB_FAIL(ret)) { if (is_cover_by_error) { - if (!try_set_error_val(&temp_allocator, ctx, expr, res, ret, error_type, mismatch_type, dst_type)) { + if (!try_set_error_val(&temp_allocator, ctx, ¶m_ctx->json_param_, expr, res, ret)) { LOG_WARN("set error val fail", K(ret)); } } LOG_WARN("json_query failed", K(ret)); } else if (is_null_result) { res.set_null(); - } else if (mismatch_type == JSN_QUERY_MISMATCH_DOT && hits.size() == 1 && dst_type != ObJsonType) { - ObVector mismatch_val_tmp; - ObVector mismatch_type_tmp; //OB_JSON_TYPE_IMPLICIT - ObCollationType in_coll_type = expr.args_[0]->datum_meta_.cs_type_; + } else if (param_ctx->json_param_.on_mismatch_[0] == JSN_QUERY_MISMATCH_DOT + && hits.size() == 1 + && param_ctx->json_param_.dst_type_ != ObJsonType) { // dot notation + ObCollationType in_coll_type = expr.args_[JSN_QUE_DOC]->datum_meta_.cs_type_; ObCollationType dst_coll_type = expr.datum_meta_.cs_type_; - ret = ObExprJsonValue::cast_to_res(&temp_allocator, expr, ctx, hits[0], JSN_QUERY_NULL, error_val, - accuracy, dst_type, in_coll_type, dst_coll_type, res, mismatch_val_tmp, mismatch_type_tmp, is_type_cast, ascii_type, is_truncate); + param_ctx->json_param_.error_type_ = JSN_QUERY_NULL; + ObJsonCastParam cast_param(param_ctx->json_param_.dst_type_, in_coll_type, dst_coll_type, 0); + ret = ObJsonUtil::cast_to_res(&temp_allocator, ctx, hits[0], + param_ctx->json_param_.accuracy_, cast_param, res, is_type_mismatch); + if (OB_FAIL(ret)) { + try_set_error_val(&temp_allocator, ctx, ¶m_ctx->json_param_, expr, res, ret); + } else if (OB_FAIL(ObJsonUtil::set_lob_datum(&temp_allocator, expr, ctx, param_ctx->json_param_.dst_type_, 0, res))) { + LOG_WARN("fail to set lob datum from string val", K(ret)); + } + } else if (use_wrapper == 1) { + size_t hit_size = hits.size(); + ObJsonArray j_arr_res(&temp_allocator); + ObIJsonBase *jb_res = NULL; + jb_res = &j_arr_res; + // adaptive json binary append + if (OB_NOT_NULL(param_ctx->json_param_.json_path_) && param_ctx->json_param_.json_path_->is_last_func()) { + if (OB_FAIL(append_node_into_res(jb_res, param_ctx->json_param_.json_path_, + hits, &temp_allocator))) { + LOG_WARN("fail to tree apeend node", K(ret)); + } + } else if (OB_FAIL(append_binary_node_into_res(jb_res, param_ctx->json_param_.json_path_, + hits, &temp_allocator))) { + LOG_WARN("fail to apeend binary node", K(ret)); + } + + if (try_set_error_val(&temp_allocator, ctx, ¶m_ctx->json_param_, expr, res, ret)) { + } else if (OB_FAIL(set_result(¶m_ctx->json_param_, jb_res, &temp_allocator, + ctx, expr, res))) { + LOG_WARN("result set fail", K(ret)); + } + } else if (is_json_arr) { + ObJsonArray j_arr_var(&temp_allocator); + jb_empty = &j_arr_var; + ret = set_result(¶m_ctx->json_param_, jb_empty, &temp_allocator, ctx, expr, res); + } else if (is_json_obj) { + ObJsonObject j_obj_var(&temp_allocator); + jb_empty = &j_obj_var; + ret = set_result(¶m_ctx->json_param_, jb_empty, &temp_allocator, ctx, expr, res); } else { - if (is_null_json_obj) { - ObJsonObject j_node_null(&temp_allocator); - ObIJsonBase *jb_res = NULL; - jb_res = &j_node_null; - if (OB_FAIL(set_result(dst_type, dst_len, jb_res, &temp_allocator, ctx, expr, res, error_type, ascii_type, pretty_type, is_truncate))) { - LOG_WARN("result set fail", K(ret)); - } - } else if (use_wrapper == 1 || is_null_json_array) { - int32_t hit_size = hits.size(); - ObJsonArray j_arr_res(&temp_allocator); - ObIJsonBase *jb_res = NULL; - ObJsonNode *j_node = NULL; - ObIJsonBase *jb_node = NULL; - jb_res = &j_arr_res; - if (is_null_json_array) { + ret = set_result(¶m_ctx->json_param_, hits[0], &temp_allocator, ctx, expr, res); + } + if (OB_SUCC(ret)) { + param_ctx->is_first_exec_ = false; + } + return ret; +} + +int ObExprJsonQuery::init_ctx_var(ObJsonParamCacheCtx*& param_ctx, const ObExpr &expr) +{ + INIT_SUCC(ret); + // init json path flag + param_ctx->is_json_path_const_ = expr.args_[JSN_QUE_PATH]->is_const_expr(); + const ObExprJsonQueryParamInfo *info + = static_cast(expr.extra_info_); + if (OB_NOT_NULL(info) + && OB_FAIL(extract_plan_cache_param(info, param_ctx->json_param_))) { + LOG_WARN("fail to extract param from plan cache", K(ret)); + } + return ret; +} + +int ObExprJsonQuery::append_node_into_res(ObIJsonBase*& jb_res, + ObJsonPath* j_path, + ObJsonSeekResult &hits, + common::ObIAllocator *allocator) +{ + INIT_SUCC(ret); + size_t hit_size = hits.size(); + ObJsonNode *j_node = NULL; + ObIJsonBase *jb_node = NULL; + for (size_t i = 0; OB_SUCC(ret) && i < hit_size; i++) { + bool is_null_res = false; + if (OB_FAIL(deal_item_method_special_case(j_path, hits, is_null_res, i, true))) { + LOG_WARN("fail to deal item method special case", K(ret)); + } else if (is_null_res) { + void* buf = NULL; + buf = allocator->alloc(sizeof(ObJsonNull)); + if (OB_ISNULL(buf)) { + ret = OB_ALLOCATE_MEMORY_FAILED; } else { - for (int32_t i = 0; OB_SUCC(ret) && i < hit_size; i++) { - bool is_null_res = false; - if (hits[i]->json_type() == ObJsonNodeType::J_NULL - && !(hits[i]->is_real_json_null(hits[i]))) { - is_null_res = true; - } else if (j_path->get_last_node_type() == JPN_BOOLEAN - && (hits[i]->is_json_number(hits[i]->json_type()) || hits[i]->json_type() == ObJsonNodeType::J_NULL)) { - is_null_res = true; - } else if (j_path->get_last_node_type() == JPN_LENGTH && !(hits[i]->json_type() == ObJsonNodeType::J_UINT - && ((ObJsonUint *)hits[i])->get_is_string_length())) { - is_null_res = true; - } else if ((j_path->get_last_node_type() == JPN_STRING || j_path->get_last_node_type() == JPN_STR_ONLY) - && (hits[i]->json_type() == ObJsonNodeType::J_OBJECT || hits[i]->json_type() == ObJsonNodeType::J_ARRAY)) { - is_null_res = true; - } else if ((j_path->get_last_node_type() == JPN_UPPER || j_path->get_last_node_type() == JPN_LOWER) - && (hits[i]->json_type() == ObJsonNodeType::J_OBJECT || hits[i]->json_type() == ObJsonNodeType::J_ARRAY)) { - is_null_res = true; - } else if ((j_path->get_last_node_type() == JPN_DATE || j_path->get_last_node_type() == JPN_TIMESTAMP) - && !hits[i]->is_json_date(hits[i]->json_type())) { - is_null_res = true; - } else if ((j_path->get_last_node_type() == JPN_NUMBER || j_path->get_last_node_type() == JPN_NUM_ONLY - || j_path->get_last_node_type() == JPN_DOUBLE ) - && !hits[i]->is_json_number(hits[i]->json_type()) && hits[i]->json_type() != ObJsonNodeType::J_NULL) { - is_null_res = true; - } else if ((hits[i]->json_type() == ObJsonNodeType::J_OBJECT || hits[i]->json_type() == ObJsonNodeType::J_ARRAY) - && j_path->is_last_func() && j_path->path_node_cnt() == 1) { - // do nothing - } - if (is_null_res) { - void* buf = NULL; - buf = temp_allocator.alloc(sizeof(ObJsonNull)); - if (OB_ISNULL(buf)) { - ret = OB_ALLOCATE_MEMORY_FAILED; - } else { - jb_node = (ObJsonNull*)new(buf)ObJsonNull(true); - } - } else if (OB_FAIL(ObJsonBaseFactory::transform(&temp_allocator, hits[i], ObJsonInType::JSON_TREE, jb_node))) { // to tree - LOG_WARN("fail to transform to tree", K(ret), K(i), K(*(hits[i]))); - } - if (OB_SUCC(ret)) { - j_node = static_cast(jb_node); - if (OB_ISNULL(j_node)) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("json node input is null", K(ret), K(i), K(is_null_res), K(hits[i])); - } else if (OB_FAIL(jb_res->array_append(j_node->clone(&temp_allocator)))) { - LOG_WARN("result array append failed", K(ret), K(i), K(*j_node)); - } - } - } + jb_node = (ObJsonNull*)new(buf)ObJsonNull(true); } - if (!is_null_json_array && try_set_error_val(&temp_allocator, ctx, expr, res, ret, error_type, mismatch_type, dst_type)) { - } else if (OB_FAIL(set_result(dst_type,dst_len, jb_res, &temp_allocator, ctx, expr, res, error_type, ascii_type, pretty_type, is_truncate))) { - LOG_WARN("result set fail", K(ret)); + } else if (OB_FAIL(ObJsonBaseFactory::transform(allocator, hits[i], ObJsonInType::JSON_TREE, jb_node))) { // to tree + LOG_WARN("fail to transform to tree", K(ret), K(i), K(*(hits[i]))); + } + if (OB_SUCC(ret)) { + j_node = static_cast(jb_node); + if (OB_ISNULL(j_node)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("json node input is null", K(ret), K(i), K(is_null_res), K(hits[i])); + } else if (OB_FAIL(jb_res->array_append(j_node->clone(allocator)))) { + LOG_WARN("result array append failed", K(ret), K(i), K(*j_node)); } - } else { - ret = set_result(dst_type, dst_len, hits[0], &temp_allocator, ctx, expr, res, error_type, ascii_type, pretty_type, is_truncate); } } return ret; } -int ObExprJsonQuery::set_result(ObObjType dst_type, - int32_t dst_len, +int ObExprJsonQuery::append_binary_node_into_res(ObIJsonBase*& jb_res, + ObJsonPath* j_path, + ObJsonSeekResult &hits, + common::ObIAllocator *allocator) +{ + INIT_SUCC(ret); + size_t hit_size = hits.size(); + ObJsonBin *j_node = NULL; + ObIJsonBase *jb_node = NULL; + ObStringBuffer value(allocator); + ObBinAggSerializer bin_agg(allocator, AGG_JSON, static_cast(ObJsonNodeType::J_ARRAY)); + for (size_t i = 0; OB_SUCC(ret) && i < hit_size; i++) { + bool is_null_res = false; + if (OB_FAIL(deal_item_method_special_case(j_path, hits, is_null_res, i, true))) { + LOG_WARN("fail to deal item method special case", K(ret)); + } else if (is_null_res) { + void* buf = NULL; + buf = allocator->alloc(sizeof(ObJsonNull)); + if (OB_ISNULL(buf)) { + ret = OB_ALLOCATE_MEMORY_FAILED; + } else { + jb_node = (ObJsonNull*)new(buf)ObJsonNull(true); + } + } + if (OB_FAIL(ret)) { + } else if (OB_FAIL(ObJsonBaseFactory::transform(allocator, is_null_res ? jb_node : hits[i], ObJsonInType::JSON_BIN, jb_node))) { // to binary + LOG_WARN("fail to transform to tree", K(ret), K(i), K(*(hits[i]))); + } else { + j_node = static_cast(jb_node); + ObString key; + if (OB_ISNULL(j_node)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("json node input is null", K(ret), K(i), K(is_null_res), K(hits[i])); + } else if (OB_FAIL(bin_agg.append_key_and_value(key, value, j_node))) { + LOG_WARN("failed to append key and value", K(ret)); + } + } + } + if (OB_FAIL(ret)) { + } else if (OB_FAIL(bin_agg.serialize())) { + LOG_WARN("failed to serialize bin agg.", K(ret)); + } else if (OB_FAIL(ObJsonBaseFactory::get_json_base(allocator, bin_agg.get_buffer()->string(), ObJsonInType::JSON_BIN, ObJsonInType::JSON_BIN, jb_res, ObJsonParser::JSN_RELAXED_FLAG))) { + LOG_WARN("failed to get json base.", K(ret)); + } + return ret; +} + +int ObExprJsonQuery::check_params_valid(const ObExpr &expr, + ObJsonExprParam* json_param, + bool &is_cover_by_error) +{ + INIT_SUCC(ret); + int8_t JSON_QUERY_EXPR = 1; + ObExpr* json_arg = expr.args_[JSN_QUE_DOC]; + ObObjType val_type = json_arg->datum_meta_.type_; + ObExpr *json_arg_ret = expr.args_[JSN_QUE_RET]; + ObObjType ret_type = json_arg_ret->datum_meta_.type_; + // check conflict between item method and returning type. + if (!(val_type == ObJsonType && ret_type == ObNullType) + && json_param->json_path_->is_last_func() + && OB_FAIL( ObJsonExprHelper::check_item_func_with_return(json_param->json_path_->get_last_node_type(), + json_param->dst_type_, expr.datum_meta_.cs_type_, JSON_QUERY_EXPR))) { + is_cover_by_error = false; + LOG_WARN("check item func with return type fail", K(ret)); + } else if (json_param->dst_type_ != ObVarcharType + && json_param->dst_type_ != ObLongTextType + && json_param->dst_type_ != ObJsonType) { + is_cover_by_error = false; + ret = OB_ERR_INVALID_DATA_TYPE_RETURNING; + LOG_USER_ERROR(OB_ERR_INVALID_DATA_TYPE_RETURNING); + } else if (OB_FAIL(check_item_method_valid_with_wrapper(json_param->json_path_, json_param->wrapper_))) { + is_cover_by_error = false; + LOG_WARN("fail to check item method with wrapper", K(ret)); + } else if ((expr.datum_meta_.cs_type_ == CS_TYPE_BINARY || json_param->dst_type_ == ObJsonType) && (json_param->pretty_type_ > 0 || json_param->ascii_type_ > 0)) { + is_cover_by_error = false; + ret = OB_ERR_NON_TEXT_RET_NOTSUPPORT; + LOG_WARN("ASCII or PRETTY not supported for non-textual return data type", K(ret)); + } + return ret; +} + +int ObExprJsonQuery::get_clause_param_value(const ObExpr &expr, + ObEvalCtx &ctx, + ObJsonExprParam* json_param, + int64_t &dst_len, + bool &is_cover_by_error) +{ + INIT_SUCC(ret); + ObArray param_vec; + int8_t val = 0; + // returning type + ObExpr* json_arg = expr.args_[JSN_QUE_DOC]; + ObObjType type = json_arg->datum_meta_.type_; + ObExpr *json_arg_ret = expr.args_[JSN_QUE_RET]; + ObObjType val_type = json_arg_ret->datum_meta_.type_; + if (val_type == ObNullType) { + if (ob_is_string_type(type) || json_param->json_path_->is_last_func()) { + json_param->dst_type_ = ObVarcharType; + json_param->accuracy_.set_full_length(VARCHAR2_DEFAULT_LEN, 1, lib::is_oracle_mode()); + } else { + json_param->dst_type_ = ObJsonType; + json_param->accuracy_.set_length(0); + } + } else { + ret = ObJsonUtil::get_accuracy(expr, ctx, json_param->accuracy_, json_param->dst_type_, is_cover_by_error); + } + // truncate 3, scalars 4, pretty 5, ascii 6, wrapper 7, error 8, empty 9, mismatch 10 + for (size_t i = JSN_QUE_TRUNC; OB_SUCC(ret) && i <= JSN_QUE_MISMATCH; i ++) { + if (OB_FAIL(ObJsonExprHelper::get_clause_opt(expr.args_[i], ctx, val))) { + LOG_WARN("fail to get clause option", K(ret)); + } else if (OB_FAIL(param_vec.push_back(val))) { + LOG_WARN("fail to push val into array", K(ret)); + } + } + if (OB_FAIL(ret) && is_cover_by_error) { + is_cover_by_error = false; + ret = ObJsonExprHelper::get_clause_opt(expr.args_[JSN_QUE_ERROR], ctx, json_param->error_type_); + } else if (OB_FAIL(ret)) { + } else if (param_vec.size() == 8) { + json_param->truncate_ = param_vec[JSN_QUE_TRUNC_OPT]; + json_param->scalars_type_ = param_vec[JSN_QUE_SCALAR_OPT]; + json_param->pretty_type_ = param_vec[JSN_QUE_PRETTY_OPT]; + json_param->ascii_type_ = param_vec[JSN_QUE_ASCII_OPT]; + json_param->wrapper_ = param_vec[JSN_QUE_WRAPPER_OPT]; + json_param->error_type_ = param_vec[JSN_QUE_ERROR_OPT]; + json_param->empty_type_ = param_vec[JSN_QUE_EMPTY_OPT]; + json_param->on_mismatch_.push_back(param_vec[JSN_QUE_MISMATCH_OPT]); + } else { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("fail to get param value", K(ret)); + } + // mismatch // if mismatch_type == 3 from dot notation + if (OB_FAIL(ret)) { + } else if (OB_FAIL(json_param->on_mismatch_type_.push_back(JsnValueMisMatch::OB_JSON_TYPE_IMPLICIT))) { + LOG_WARN("push back failed", K(ret)); + } else if (OB_FAIL(check_params_valid(expr, json_param, is_cover_by_error))) { + LOG_WARN("fail to check clause", K(ret)); + } + return ret; +} + +int ObExprJsonQuery::doc_do_seek(ObIJsonBase* j_base, + ObJsonExprParam *json_param, + ObJsonSeekResult &hits, + int8_t &use_wrapper, + bool &is_cover_by_error, + bool &is_null_result, + bool& is_json_arr, + bool& is_json_obj) +{ + INIT_SUCC(ret); + if (OB_FAIL(j_base->seek(*json_param->json_path_, json_param->json_path_->path_node_cnt(), true, false, hits))) { + if (ret == OB_ERR_JSON_PATH_EXPRESSION_SYNTAX_ERROR) { + is_cover_by_error = false; + } else if (ret == OB_ERR_DOUBLE_TRUNCATED) { + ret = OB_ERR_CONVERSION_FAIL; + } + LOG_WARN("json seek failed", K(ret)); + } else if (hits.size() == 1) { + if (json_param->on_mismatch_[0] == JSN_QUERY_MISMATCH_DOT) { + if (hits[0]->json_type() == ObJsonNodeType::J_NULL && hits[0]->is_real_json_null(hits[0]) && json_param->dst_type_ != ObJsonType) { + is_null_result = true; + } + } else { + if (OB_FAIL(get_single_obj_wrapper(json_param->wrapper_, use_wrapper, hits[0]->json_type(), json_param->scalars_type_))) { + is_cover_by_error = true; + LOG_WARN("error occur in wrapper type"); + } else if (use_wrapper == 1) { // do nothing + } else if (OB_FAIL(deal_item_method_special_case(json_param->json_path_, hits, is_null_result, + 0, false))) { + LOG_WARN("fail to deal special case", K(ret)); + } + } + } else if (hits.size() == 0) { + if (OB_SUCC(ret) && OB_FAIL(get_empty_option(is_cover_by_error, + json_param->empty_type_, + is_null_result, is_json_arr, is_json_obj))) { + LOG_WARN("get empty type", K(ret)); + } else if (is_json_arr || is_json_obj) { + use_wrapper = 0; + } + } else if (hits.size() > 1) { + // return val decide by wrapper option + if (OB_FAIL(get_multi_scalars_wrapper_type(json_param->wrapper_, use_wrapper))) { + is_cover_by_error = true; + LOG_WARN("error occur in wrapper type", K(ret), K(hits.size())); + } + } + return ret; +} + +int ObExprJsonQuery::deal_item_method_special_case(ObJsonPath* j_path, + ObJsonSeekResult &hits, + bool &is_null_result, + size_t pos, + bool use_wrapper) +{ + INIT_SUCC(ret); + if (hits[pos]->json_type() == ObJsonNodeType::J_NULL && !hits[pos]->is_real_json_null(hits[pos])) { + is_null_result = true; + } else if (!use_wrapper && j_path->is_last_func() && j_path->path_node_cnt() == 1) { // do nothing + } else if (j_path->get_last_node_type() == JPN_LENGTH && !(hits[pos]->json_type() == ObJsonNodeType::J_UINT + && ((ObJsonUint *)hits[pos])->get_is_string_length())) { // distinct uint and length() + is_null_result = true; + } else if (ObJsonUtil::get_query_item_method_null_option(j_path, hits[pos]) == 1) { + is_null_result = true; + } + return ret; +} + +int ObExprJsonQuery::check_item_method_valid_with_wrapper(ObJsonPath *j_path, int8_t wrapper_type) +{ + INIT_SUCC(ret); + if (OB_SUCC(ret) && j_path->is_last_func() + && ObJsonUtil::is_number_item_method(j_path) + && (wrapper_type == JSN_QUERY_WITHOUT_WRAPPER + || wrapper_type == JSN_QUERY_WITHOUT_ARRAY_WRAPPER + || wrapper_type == JSN_QUERY_WRAPPER_IMPLICIT)) { + ret = OB_ERR_WITHOUT_ARR_WRAPPER; // result cannot be returned without array wrapper + LOG_WARN("result cannot be returned without array wrapper.", K(ret), K(j_path->get_last_node_type()), K(wrapper_type)); + } + return ret; +} + +int ObExprJsonQuery::set_result(ObJsonExprParam* json_param, ObIJsonBase *jb_res, common::ObIAllocator *allocator, ObEvalCtx &ctx, const ObExpr &expr, - ObDatum &res, - uint8_t error_type, - uint8_t ascii_type, - uint8_t pretty_type, - uint8_t is_truncate) { + ObDatum &res) { INIT_SUCC(ret); - if (dst_type == ObVarcharType || dst_type == ObLongTextType) { - ObJsonBuffer jbuf(allocator); - ObString res_string; - if (OB_FAIL(jb_res->print(jbuf, true, pretty_type > 0))) { - LOG_WARN("json binary to string failed", K(ret)); - } else if (jbuf.empty()) { - ret = OB_ALLOCATE_MEMORY_FAILED; - LOG_WARN("allocate memory for result failed", K(ret)); - } else { - res_string.assign_ptr(jbuf.ptr(), jbuf.length()); - } - if (OB_SUCC(ret)) { - uint64_t length = res_string.length(); - if (dst_type == ObVarcharType && length > dst_len) { - if (is_truncate) { - res_string.assign_ptr(res_string.ptr(), dst_len); - if (OB_FAIL(ObJsonExprHelper::pack_json_str_res(expr, ctx, res, res_string))) { - LOG_WARN("fail to pack json result", K(ret)); - } - } else { - char res_ptr[OB_MAX_DECIMAL_PRECISION] = {0}; - if (OB_ISNULL(ObCharset::lltostr(dst_len, res_ptr, 10, 1))) { - LOG_WARN("dst_len fail to string.", K(ret)); - } - ret = OB_OPERATE_OVERFLOW; - LOG_USER_ERROR(OB_OPERATE_OVERFLOW, res_ptr, "json_query"); - if (!try_set_error_val(allocator, ctx, expr, res, ret, error_type, JSN_QUERY_MISMATCH_IMPLICIT, dst_type)) { - LOG_WARN("set error val fail", K(ret)); - } - } - } else { - ObTextStringDatumResult text_result(expr.datum_meta_.type_, &expr, &ctx, &res); - if (ascii_type == 0) { - if (OB_FAIL(text_result.init(res_string.length()))) { - LOG_WARN("init lob result failed"); - } else if (OB_FAIL(text_result.append(res_string))) { - LOG_WARN("failed to append realdata", K(ret), K(res_string), K(text_result)); - } - } else { - char *buf = NULL; - int64_t buf_len = res_string.length() * ObCharset::MAX_MB_LEN * 2; - int32_t length = 0; - int64_t reserve_len = 0; - - if (OB_FAIL(text_result.init(buf_len))) { - LOG_WARN("init lob result failed"); - } else if (OB_FAIL(text_result.get_reserved_buffer(buf, reserve_len))) { - LOG_WARN("fail to get reserved buffer", K(ret)); - } else if (reserve_len != buf_len) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("get reserve len is invalid", K(ret), K(reserve_len), K(buf_len)); - } else if (OB_FAIL(ObJsonExprHelper::calc_asciistr_in_expr(res_string, expr.args_[0]->datum_meta_.cs_type_, - expr.datum_meta_.cs_type_, - buf, buf_len, length))) { - LOG_WARN("fail to calc unistr", K(ret)); - } else if (OB_FAIL(text_result.lseek(length, 0))) { - LOG_WARN("text_result lseek failed", K(ret), K(text_result), K(length)); - } - } - if (OB_SUCC(ret)) { - text_result.set_result(); - } + uint8_t is_type_mismatch = 0; + ObCollationType in_coll_type = expr.args_[0]->datum_meta_.cs_type_; + ObCollationType dst_coll_type = expr.datum_meta_.cs_type_; + ObJsonCastParam cast_param(json_param->dst_type_, in_coll_type, dst_coll_type, json_param->ascii_type_); + cast_param.is_quote_ = true; + cast_param.is_trunc_ = json_param->truncate_; + cast_param.is_pretty_ = json_param->pretty_type_; + cast_param.rt_expr_ = &expr; + if (OB_FAIL(ObJsonUtil::cast_to_res(allocator, ctx, jb_res, + json_param->accuracy_, cast_param, res, is_type_mismatch))) { + if (ret == OB_OPERATE_OVERFLOW) { + if (!try_set_error_val(allocator, ctx, json_param, expr, res, ret)) { + LOG_WARN("set error val fail", K(ret)); } } - } else if (ob_is_json(dst_type)) { - ObString raw_str; - ObIJsonBase *jb_res_bin = NULL; - if (OB_FAIL(ret)) { - LOG_WARN("json extarct get results failed", K(ret)); - } else if (OB_FAIL(ObJsonBaseFactory::transform(allocator, jb_res, ObJsonInType::JSON_BIN, jb_res_bin))) { // to BIN - LOG_WARN("fail to transform to tree", K(ret)); - } else if (OB_FAIL(jb_res_bin->get_raw_binary(raw_str, allocator))) { - LOG_WARN("json extarct get result binary failed", K(ret)); - } else if (OB_FAIL(ObJsonExprHelper::pack_json_str_res(expr, ctx, res, raw_str))) { - LOG_WARN("fail to pack json result", K(ret)); - } - } else { - ret = OB_ERR_INVALID_DATA_TYPE_RETURNING; - LOG_USER_ERROR(OB_ERR_INVALID_DATA_TYPE_RETURNING); + } + if (OB_SUCC(ret) && OB_FAIL(ObJsonUtil::set_lob_datum(allocator, expr, ctx, json_param->dst_type_, json_param->ascii_type_, res))) { + LOG_WARN("fail to set lob datum", K(ret)); } return ret; } int ObExprJsonQuery::cg_expr(ObExprCGCtx &expr_cg_ctx, const ObRawExpr &raw_expr, ObExpr &rt_expr) const -{ - UNUSED(expr_cg_ctx); - UNUSED(raw_expr); - rt_expr.eval_func_ = eval_json_query; - return OB_SUCCESS; -} - -int ObExprJsonQuery::get_clause_pre_asc_sca_opt(const ObExpr &expr, ObEvalCtx &ctx, bool &is_cover_by_error, uint8_t &pretty_type, uint8_t &ascii_type, uint8_t &scalars_type) { INIT_SUCC(ret); - // parse pretty - if (OB_SUCC(ret)) { - ret = get_clause_opt(expr, ctx, 5, is_cover_by_error, pretty_type, OB_JSON_PRE_ASC_COUNT); - } - // parse ascii - if (OB_SUCC(ret)) { - ret = get_clause_opt(expr, ctx, 6, is_cover_by_error, ascii_type, OB_JSON_PRE_ASC_COUNT); - } - // parse scalars - if (OB_SUCC(ret)) { - ret = get_clause_opt(expr, ctx, 4, is_cover_by_error, scalars_type, JSN_QUERY_SCALARS_COUNT); - } - return ret; -} - -int ObExprJsonQuery::get_ora_json_path(const ObExpr &expr, ObEvalCtx &ctx, - common::ObArenaAllocator &allocator, ObJsonPath*& j_path, - uint16_t index, bool &is_null, bool &is_cover_by_error, - ObDatum*& json_datum) -{ - INIT_SUCC(ret); - ObExpr *json_arg = expr.args_[index]; - ObObjType type = json_arg->datum_meta_.type_; - if (OB_SUCC(ret) && !is_null) { - if (OB_FAIL(json_arg->eval(ctx, json_datum))) { - is_cover_by_error = false; - LOG_WARN("eval json arg failed", K(ret)); - } else if (type == ObNullType || json_datum->is_null()) { - is_null = true; - } else if (!ob_is_string_type(type)) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("input type error", K(type)); - } - ObString j_path_text = json_datum->get_string(); - - if (OB_FAIL(ObJsonExprHelper::get_json_or_str_data(json_arg, ctx, allocator, j_path_text, is_null))) { - LOG_WARN("fail to get real data.", K(ret), K(j_path_text)); - } else if (j_path_text.length() == 0) { - is_null = true; - } - ObJsonPathCache ctx_cache(&allocator); - ObJsonPathCache* path_cache = ObJsonExprHelper::get_path_cache_ctx(expr.expr_ctx_id_, &ctx.exec_ctx_); - path_cache = ((path_cache != NULL) ? path_cache : &ctx_cache); - - if (OB_FAIL(ObJsonExprHelper::find_and_add_cache(path_cache, j_path, j_path_text, 1, true))) { - is_cover_by_error = false; - ret = OB_ERR_JSON_PATH_EXPRESSION_SYNTAX_ERROR; - LOG_USER_ERROR(OB_ERR_JSON_PATH_EXPRESSION_SYNTAX_ERROR, j_path_text.length(), j_path_text.ptr()); - } - } - return ret; -} - -int ObExprJsonQuery::get_ora_json_doc(const ObExpr &expr, ObEvalCtx &ctx, - common::ObArenaAllocator &allocator, - uint16_t index, ObIJsonBase*& j_base, - ObObjType dst_type, - bool &is_null, bool &is_cover_by_error) -{ - INIT_SUCC(ret); - ObDatum *json_datum = NULL; - ObExpr *json_arg = expr.args_[index]; - ObObjType type = json_arg->datum_meta_.type_; - ObCollationType cs_type = json_arg->datum_meta_.cs_type_; - ObJsonInType j_in_type; - if (OB_SUCC(ret) && OB_FAIL(json_arg->eval(ctx, json_datum))) { - LOG_WARN("eval json arg failed", K(ret)); - is_cover_by_error = false; - } else if (type == ObNullType || json_datum->is_null()) { - is_null = true; - } else if (type != ObJsonType && !ob_is_string_type(type)) { - ret = OB_ERR_INVALID_TYPE_FOR_OP; - LOG_USER_ERROR(OB_ERR_INVALID_TYPE_FOR_OP, ob_obj_type_str(dst_type), ob_obj_type_str(type)); + ObIAllocator &alloc = *expr_cg_ctx.allocator_; + ObExprJsonQueryParamInfo* info + = OB_NEWx(ObExprJsonQueryParamInfo, (&alloc), alloc, T_FUN_SYS_JSON_QUERY); + if (OB_ISNULL(info)) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("allocate memory failed", K(ret)); + } else if (OB_FAIL(info->init_jsn_query_expr_param(alloc, expr_cg_ctx, &raw_expr))) { + ret = OB_SUCCESS; // not use plan cache } else { - ObString j_str = json_datum->get_string(); - j_in_type = ObJsonExprHelper::get_json_internal_type(type); - uint32_t parse_flag = ObJsonParser::JSN_RELAXED_FLAG; - if (OB_FAIL(ObJsonExprHelper::get_json_or_str_data(json_arg, ctx, allocator, j_str, is_null))) { - LOG_WARN("fail to get real data.", K(ret), K(j_str)); - } else if (j_str.length() == 0) { // maybe input json doc is null type - is_null = true; - } else if (OB_FAIL(ObJsonBaseFactory::get_json_base(&allocator, j_str, j_in_type, - j_in_type, j_base, parse_flag))) { - LOG_WARN("fail to get json base.", K(ret), K(type), K(j_str), K(j_in_type)); - if (ret == OB_ERR_JSON_OUT_OF_DEPTH) { - is_cover_by_error = false; - } - ret = OB_ERR_JSON_SYNTAX_ERROR; + rt_expr.extra_info_ = info; + } + rt_expr.eval_func_ = eval_json_query; + return ret; +} + +int ObExprJsonQueryParamInfo::init_jsn_query_expr_param(ObIAllocator &alloc, ObExprCGCtx &op_cg_ctx, const ObRawExpr* raw_expr) +{ + INIT_SUCC(ret); + ObExecContext *exec_ctx = op_cg_ctx.session_->get_cur_exec_ctx(); + const ObRawExpr *path = raw_expr->get_param_expr(JSN_QUE_PATH); + ObObj const_data; + ObArray param_vec; + uint32_t pos = -1; + // parse clause node + // truncate 3, scalars 4, pretty 5, ascii 6, wrapper 7, error 8, empty 9, mismatch 10 + for (int64_t i = JSN_QUE_TRUNC; OB_SUCC(ret) && i <= JSN_QUE_MISMATCH; i ++) { + if (OB_FAIL(get_int_val_from_raw(alloc, exec_ctx, raw_expr->get_param_expr(i), const_data))) { + LOG_WARN("failed to calc offset expr", K(ret)); + } else if (OB_FAIL(param_vec.push_back(const_data.get_tinyint()))) { + LOG_WARN("fail to push val into array", K(ret)); } } + if (OB_SUCC(ret)) { + truncate_ = param_vec[JSN_QUE_TRUNC_OPT]; + scalars_type_ = param_vec[JSN_QUE_SCALAR_OPT]; + pretty_type_ = param_vec[JSN_QUE_PRETTY_OPT]; + ascii_type_ = param_vec[JSN_QUE_ASCII_OPT]; + wrapper_ = param_vec[JSN_QUE_WRAPPER_OPT]; + error_type_ = param_vec[JSN_QUE_ERROR_OPT]; + empty_type_ = param_vec[JSN_QUE_EMPTY_OPT]; + } + // parse mismatch 1. init array 2. push_back node + if (OB_FAIL(ret)) { + } else if (OB_FAIL(on_mismatch_.init(1))) { // mismatch size == 1 + LOG_WARN("fail to init mismatch array", K(ret)); + } else if (OB_FAIL(on_mismatch_.push_back(param_vec[JSN_QUE_MISMATCH_OPT]))) { + LOG_WARN("fail to push node into mismatch array", K(ret)); + } else if (OB_FAIL(ObJsonUtil::init_json_path(alloc, op_cg_ctx, path, *this))) { // init json path + LOG_WARN("fail to init path from str", K(ret)); + } return ret; } -int ObExprJsonQuery::get_empty_option(ObJsonBaseVector &hits, bool &is_cover_by_error, int8_t empty_type, - bool &is_null_result, bool &is_null_json_obj, bool &is_null_json_array) +int ObExprJsonQuery::get_empty_option(bool &is_cover_by_error, int8_t empty_type, + bool &is_null_result, bool &is_json_arr, + bool &is_json_obj) { INIT_SUCC(ret); switch (empty_type) { case JSN_QUERY_IMPLICIT: { + is_cover_by_error = true; ret = OB_ERR_JSON_VALUE_NO_VALUE; LOG_USER_ERROR(OB_ERR_JSON_VALUE_NO_VALUE); - LOG_WARN("json value seek result empty.", K(hits.size())); + LOG_WARN("json value seek result empty.", K(ret)); break; } case JSN_QUERY_ERROR: { is_cover_by_error = false; ret = OB_ERR_JSON_VALUE_NO_VALUE; LOG_USER_ERROR(OB_ERR_JSON_VALUE_NO_VALUE); - LOG_WARN("json value seek result empty.", K(hits.size())); + LOG_WARN("json value seek result empty.", K(ret)); break; } case JSN_QUERY_EMPTY_OBJECT: { - is_null_json_obj = true; + is_json_obj = true; break; } case JSN_QUERY_NULL: { @@ -684,7 +679,7 @@ int ObExprJsonQuery::get_empty_option(ObJsonBaseVector &hits, bool &is_cover_by_ } case JSN_QUERY_EMPTY: case JSN_QUERY_EMPTY_ARRAY: { - is_null_json_array = true; // set_json_array + is_json_arr = true; break; } default: // empty_type from get_on_empty_or_error has done range check, do nothing for default @@ -693,7 +688,7 @@ int ObExprJsonQuery::get_empty_option(ObJsonBaseVector &hits, bool &is_cover_by_ return ret; } -int ObExprJsonQuery::get_single_obj_wrapper(uint8_t wrapper_type, int &use_wrapper, ObJsonNodeType in_type, uint8_t scalars_type) +int ObExprJsonQuery::get_single_obj_wrapper(int8_t wrapper_type, int8_t &use_wrapper, ObJsonNodeType in_type, int8_t scalars_type) { INIT_SUCC(ret); switch (wrapper_type) { @@ -728,7 +723,7 @@ int ObExprJsonQuery::get_single_obj_wrapper(uint8_t wrapper_type, int &use_wrapp return ret; } -int ObExprJsonQuery::get_multi_scalars_wrapper_type(uint8_t wrapper_type, int &use_wrapper, ObJsonBaseVector &hits, uint8_t scalars_type) +int ObExprJsonQuery::get_multi_scalars_wrapper_type(int8_t wrapper_type, int8_t &use_wrapper) { INIT_SUCC(ret); switch (wrapper_type) { @@ -737,7 +732,7 @@ int ObExprJsonQuery::get_multi_scalars_wrapper_type(uint8_t wrapper_type, int &u case JSN_QUERY_WRAPPER_IMPLICIT: { ret = OB_ERR_WITHOUT_ARR_WRAPPER; // result cannot be returned without array wrapper LOG_USER_ERROR(OB_ERR_WITHOUT_ARR_WRAPPER); - LOG_WARN("result cannot be returned without array wrapper.", K(ret), K(hits.size())); + LOG_WARN("result cannot be returned without array wrapper.", K(ret), K(wrapper_type)); break; } case JSN_QUERY_WITH_WRAPPER: @@ -758,100 +753,60 @@ int ObExprJsonQuery::get_multi_scalars_wrapper_type(uint8_t wrapper_type, int &u return ret; } +int ObExprJsonQuery::get_error_option(int8_t &error_type, ObIJsonBase *&error_val, ObIJsonBase *jb_arr, ObIJsonBase *jb_obj, bool &is_null) { + INIT_SUCC(ret); + if (error_type == JSN_QUERY_EMPTY || error_type == JSN_QUERY_EMPTY_ARRAY) { + error_val = jb_arr; + is_null = false; + } else if (error_type == JSN_QUERY_EMPTY_OBJECT) { + error_val = jb_obj; + is_null = false; + } else if (error_type == JSN_QUERY_NULL || error_type == JSN_QUERY_IMPLICIT) { + is_null = true; + } + return ret; +} + +int ObExprJsonQuery::get_mismatch_option(int8_t &mismatch_type, int &ret) { + int t_ret = OB_SUCCESS; + if (mismatch_type == JSN_QUERY_MISMATCH_ERROR) { + t_ret = ret; + } + return t_ret; +} + bool ObExprJsonQuery::try_set_error_val(common::ObIAllocator *allocator, ObEvalCtx &ctx, + ObJsonExprParam* json_param, const ObExpr &expr, ObDatum &res, - int &ret, - uint8_t error_type, - uint8_t mismatch_type, - ObObjType dst_type) + int &ret) { bool has_set_res = true; bool mismatch_error = true; + bool is_null = false; + ObIJsonBase* j_base = NULL; + ObJsonArray j_arr_res(allocator); + ObIJsonBase *jb_arr = NULL; + jb_arr = &j_arr_res; + ObJsonObject j_obj_res(allocator); + ObIJsonBase *jb_obj = NULL; + jb_obj = &j_obj_res; if (OB_FAIL(ret)) { - if (error_type == JSN_QUERY_EMPTY_ARRAY || error_type == JSN_QUERY_EMPTY) { - ret = OB_SUCCESS; - ObJsonArray j_arr_res(allocator); - ObIJsonBase *jb_res = NULL; - jb_res = &j_arr_res; - if (OB_FAIL(set_result(dst_type, OB_MAX_TEXT_LENGTH, jb_res, allocator, ctx, expr, res, error_type, 0, 0))) { - LOG_WARN("result set fail", K(ret)); - } - } else if (error_type == JSN_QUERY_EMPTY_OBJECT) { - ret = OB_SUCCESS; - ObJsonObject j_node_null(allocator); - ObIJsonBase *jb_res = NULL; - jb_res = &j_node_null; - if (OB_FAIL(set_result(dst_type, OB_MAX_TEXT_LENGTH, jb_res, allocator, ctx, expr, res, error_type, 0, 0))) { - LOG_WARN("result set fail", K(ret)); - } - } else if (error_type == JSN_QUERY_NULL || error_type == JSN_QUERY_IMPLICIT) { + if (json_param->error_type_ == JSN_QUERY_ERROR) { + } else if (OB_FAIL(get_error_option(json_param->error_type_, j_base, jb_arr, jb_obj, is_null))) { + LOG_WARN("fail to get error clause", K(ret)); + } else if (is_null) { res.set_null(); - ret = OB_SUCCESS; + } else if (OB_FAIL(set_result(json_param, j_base, allocator, ctx, expr, res))) { + LOG_WARN("result set fail", K(ret)); } } else { has_set_res = false; } - return has_set_res; } -int ObExprJsonQuery::get_clause_opt(const ObExpr &expr, - ObEvalCtx &ctx, - uint8_t index, - bool &is_cover_by_error, - uint8_t &type, - uint8_t size_para) -{ - INIT_SUCC(ret); - ObExpr *json_arg = expr.args_[index]; - ObObjType val_type = json_arg->datum_meta_.type_; - ObDatum *json_datum = NULL; - if (OB_FAIL(json_arg->eval(ctx, json_datum))) { - is_cover_by_error = false; - LOG_WARN("eval json arg failed", K(ret)); - } else if (val_type != ObIntType) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("input type error", K(val_type)); - } else { - int64_t option_type = json_datum->get_int(); - if (option_type < 0 || - option_type >= size_para) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("input option type error", K(option_type)); - } else { - type = static_cast(option_type); - } - } - return ret; -} - -int ObExprJsonQuery::get_dest_type(const ObExpr &expr, - int32_t &dst_len, - ObEvalCtx& ctx, - ObObjType &dest_type, - bool &is_cover_by_error) -{ - INIT_SUCC(ret); - ParseNode node; - ObDatum *dst_type_dat = NULL; - if (OB_ISNULL(expr.args_) || OB_ISNULL(expr.args_[2])) { - ret = OB_ERR_UNEXPECTED; - is_cover_by_error = false; - LOG_WARN("unexpected expr", K(ret), K(expr.arg_cnt_), KP(expr.args_)); - } else if (OB_FAIL(expr.args_[2]->eval(ctx, dst_type_dat))) { - is_cover_by_error = false; - LOG_WARN("eval dst type datum failed", K(ret)); - } else { - node.value_ = dst_type_dat->get_int(); - dest_type = static_cast(node.int16_values_[0]); - dst_len = node.int32_values_[OB_NODE_CAST_C_LEN_IDX]; - } - return ret; -} - - } // sql } // oceanbase \ No newline at end of file diff --git a/src/sql/engine/expr/ob_expr_json_query.h b/src/sql/engine/expr/ob_expr_json_query.h index a8fe9987e6..8a60a0eb2f 100644 --- a/src/sql/engine/expr/ob_expr_json_query.h +++ b/src/sql/engine/expr/ob_expr_json_query.h @@ -18,6 +18,9 @@ #include "lib/json_type/ob_json_tree.h" #include "lib/json_type/ob_json_base.h" #include "ob_json_param_type.h" +#include "ob_expr_json_utils.h" +#include "ob_expr_json_func_helper.h" + using namespace oceanbase::common; @@ -26,8 +29,6 @@ namespace oceanbase namespace sql { - - class ObExprJsonQuery : public ObFuncExprOperator { @@ -43,71 +44,57 @@ public: virtual int cg_expr(ObExprCGCtx &expr_cg_ctx, const ObRawExpr &raw_expr, ObExpr &rt_expr) const override; virtual common::ObCastMode get_cast_mode() const { return CM_ERROR_ON_SCALE_OVER;} - + virtual bool need_rt_ctx() const override { return true; } private: - static int get_dest_type(const ObExpr &expr, - int32_t &dst_len, - ObEvalCtx& ctx, - ObObjType &dest_type, - bool &is_cover_by_error); + static int calc_returning_type(ObExprResType& type, + ObExprResType* types_stack, + ObExprTypeCtx& type_ctx, + ObExprResType& dst_type, + common::ObIAllocator *allocator, + bool is_json_input); static bool try_set_error_val(common::ObIAllocator *allocator, ObEvalCtx &ctx, - const ObExpr &expr, ObDatum &res, int &ret, - uint8_t error_type, uint8_t mismatch_type, - ObObjType dst_type); - static int set_result(ObObjType dst_type, int32_t dst_len, ObIJsonBase *jb_res, - common::ObIAllocator *allocator, ObEvalCtx &ctx, - const ObExpr &expr, ObDatum &res, uint8_t error_type, uint8_t ascii_type, uint8_t pretty_type = 0, uint8_t is_truncate = 0); + ObJsonExprParam* json_param, + const ObExpr &expr, ObDatum &res, int &ret); + static int set_result(ObJsonExprParam* json_param, + ObIJsonBase *jb_res, + common::ObIAllocator *allocator, + ObEvalCtx &ctx, + const ObExpr &expr, + ObDatum &res); +public: + static int get_empty_option(bool &is_cover_by_error, + int8_t empty_type, bool &is_null_result, + bool &is_json_arr, bool &is_json_obj); + static int get_single_obj_wrapper(int8_t wrapper_type, int8_t &use_wrapper, ObJsonNodeType in_type, int8_t scalars_type); + static int get_multi_scalars_wrapper_type(int8_t wrapper_type, int8_t &use_wrapper); + static int get_clause_param_value(const ObExpr &expr, ObEvalCtx &ctx, + ObJsonExprParam* json_param, int64_t &dst_len, + bool &is_cover_by_error); + static int check_params_valid(const ObExpr &expr, ObJsonExprParam* json_param, + bool &is_cover_by_error); + static int check_item_method_valid_with_wrapper(ObJsonPath *j_path, int8_t wrapper_type); + static int append_node_into_res(ObIJsonBase*& jb_res, ObJsonPath* j_path, + ObJsonSeekResult &hits, common::ObIAllocator *allocator); + static int append_binary_node_into_res(ObIJsonBase*& jb_res, + ObJsonPath* j_path, + ObJsonSeekResult &hits, + common::ObIAllocator *allocator); + static int doc_do_seek(ObIJsonBase* j_base, ObJsonExprParam *json_param, + ObJsonSeekResult &hits, int8_t &use_wrapper, + bool &is_cover_by_error, + bool &is_null_result, + bool& is_json_arr, + bool& is_json_obj); + static int deal_item_method_special_case(ObJsonPath* j_path, + ObJsonSeekResult &hits, + bool &is_null_result, + size_t pos, + bool use_wrapper); + static int get_error_option(int8_t &error_type, ObIJsonBase *&error_val, ObIJsonBase *jb_arr, ObIJsonBase *jb_obj, bool &is_null); + static int get_mismatch_option(int8_t &mismatch_type, int &ret); + static int init_ctx_var(ObJsonParamCacheCtx*& param_ctx, const ObExpr &expr); - - - static int get_clause_opt(const ObExpr &expr, - ObEvalCtx &ctx, - uint8_t index, - bool &is_cover_by_error, - uint8_t &type, - uint8_t size_para); - /* - oracle mode get json path to JsonBase in static_typing_engine - @param[in] expr the input arguments - @param[in] ctx the eval context - @param[in] allocator the Allocator in context - @param[in] index the input arguments index - @param[out] j_path the pointer to JsonPath - @param[out] is_null the flag for null situation - @param[out] is_cover_by_error the flag for whether need cover by error clause - @return Returns OB_SUCCESS on success, error code otherwise. - */ - static int get_ora_json_path(const ObExpr &expr, ObEvalCtx &ctx, - common::ObArenaAllocator &allocator, ObJsonPath*& j_path, - uint16_t index, bool &is_null, bool &is_cover_by_error, - ObDatum*& json_datum); - - /* - oracle mode get json doc to JsonBase in static_typing_engine - @param[in] expr the input arguments - @param[in] ctx the eval context - @param[in] allocator the Allocator in context - @param[in] index the input arguments index - @param[out] j_base the pointer to JsonBase - @param[out] j_in_type the pointer to input type - @param[out] is_null the flag for null situation - @param[out] is_cover_by_error the flag for whether need cover by error clause - @return Returns OB_SUCCESS on success, error code otherwise. - */ - static int get_ora_json_doc(const ObExpr &expr, ObEvalCtx &ctx, - common::ObArenaAllocator &allocator, - uint16_t index, ObIJsonBase*& j_base, - ObObjType dst_type, - bool &is_null, bool &is_cover_by_error); - - static int get_empty_option(ObJsonBaseVector &hits, bool &is_cover_by_error, int8_t empty_type, - bool &is_null_result, bool &is_null_json_obj, bool &is_null_json_array); - static int get_clause_pre_asc_sca_opt(const ObExpr &expr, ObEvalCtx &ctx, - bool &is_cover_by_error, uint8_t &pretty_type, - uint8_t &ascii_type, uint8_t &scalars_type); - static int get_single_obj_wrapper(uint8_t wrapper_type, int &use_wrapper, ObJsonNodeType in_type, uint8_t scalars_type); - static int get_multi_scalars_wrapper_type(uint8_t wrapper_type, int &use_wrapper, - ObJsonBaseVector &hits, uint8_t scalars_type); + static int extract_plan_cache_param(const ObExprJsonQueryParamInfo *info, ObJsonExprParam& json_param); /* code from ob_expr_cast for cal_result_type */ const static int32_t OB_LITERAL_MAX_INT_LEN = 21; diff --git a/src/sql/engine/expr/ob_expr_json_remove.cpp b/src/sql/engine/expr/ob_expr_json_remove.cpp index 252c7bc6f4..de71ae0459 100644 --- a/src/sql/engine/expr/ob_expr_json_remove.cpp +++ b/src/sql/engine/expr/ob_expr_json_remove.cpp @@ -106,7 +106,7 @@ int ObExprJsonRemove::eval_json_remove(const ObExpr &expr, ObEvalCtx &ctx, ObDat path_cache = ((path_cache != NULL) ? path_cache : &ctx_cache); } - ObJsonBaseVector hits; + ObJsonSeekResult hits; for (int64_t i = 1; OB_SUCC(ret) && !is_null_result && i < expr.arg_cnt_; i++) { hits.clear(); ObDatum *path_data = NULL; @@ -135,6 +135,8 @@ int ObExprJsonRemove::eval_json_remove(const ObExpr &expr, ObEvalCtx &ctx, ObDat } else { if (OB_FAIL(remove_from_json(json_path, hits[0]))) { LOG_WARN("remove_from_json failed", K(ret)); + } else if (OB_FAIL(ObJsonExprHelper::refresh_root_when_bin_rebuild_all(json_doc))) { + LOG_WARN("refresh_root_when_bin_rebuild_all fail", K(ret)); } } } @@ -145,25 +147,25 @@ int ObExprJsonRemove::eval_json_remove(const ObExpr &expr, ObEvalCtx &ctx, ObDat LOG_WARN("json_remove failed", K(ret)); } else if (is_null_result) { res.set_null(); - } else { - ObString str; - if (OB_FAIL(json_doc->get_raw_binary(str, &temp_allocator))) { - LOG_WARN("json_remove get result binary failed", K(ret)); - } else if (OB_FAIL(ObJsonExprHelper::pack_json_str_res(expr, ctx, res, str))) { - LOG_WARN("fail to pack json result", K(ret)); - } + } else if (OB_FAIL(ObJsonExprHelper::pack_json_res(expr, ctx, temp_allocator, json_doc, res))) { + LOG_WARN("pack fail", K(ret)); + } + if (OB_NOT_NULL(json_doc)) { + json_doc->reset(); } - return ret; } int ObExprJsonRemove::cg_expr(ObExprCGCtx &expr_cg_ctx, const ObRawExpr &raw_expr, ObExpr &rt_expr) const { - UNUSED(expr_cg_ctx); - UNUSED(raw_expr); - rt_expr.eval_func_ = eval_json_remove; - return OB_SUCCESS; + INIT_SUCC(ret); + if (OB_FAIL(ObJsonExprHelper::init_json_expr_extra_info(expr_cg_ctx.allocator_, raw_expr, type_, rt_expr))) { + LOG_WARN("init_json_partial_update_extra_info fail", K(ret)); + } else { + rt_expr.eval_func_ = eval_json_remove; + } + return ret; } } diff --git a/src/sql/engine/expr/ob_expr_json_replace.cpp b/src/sql/engine/expr/ob_expr_json_replace.cpp index e41a7dea25..f63e850610 100644 --- a/src/sql/engine/expr/ob_expr_json_replace.cpp +++ b/src/sql/engine/expr/ob_expr_json_replace.cpp @@ -83,7 +83,7 @@ int ObExprJsonReplace::eval_json_replace(const ObExpr &expr, ObEvalCtx &ctx, ObD } for (int64_t i = 1; OB_SUCC(ret) && !is_null_result && i < expr.arg_cnt_; i+=2) { - ObJsonBaseVector hit; + ObJsonSeekResult hit; ObDatum *path_data = NULL; if (expr.args_[i]->datum_meta_.type_ == ObNullType) { is_null_result = true; @@ -114,7 +114,8 @@ int ObExprJsonReplace::eval_json_replace(const ObExpr &expr, ObEvalCtx &ctx, ObD // replace int32_t hits = hit.size(); - if (hits == 0) { + if(OB_FAIL(ret)) { + } else if (hits == 0) { // do nothing } else if (hits != 1) { ret = OB_ERR_UNEXPECTED; @@ -132,13 +133,11 @@ int ObExprJsonReplace::eval_json_replace(const ObExpr &expr, ObEvalCtx &ctx, ObD LOG_WARN("Json parse and seek failed", K(ret)); } else if (is_null_result) { res.set_null(); - } else { - ObString str; - if (OB_FAIL(json_doc->get_raw_binary(str, &temp_allocator))) { - LOG_WARN("json_replace result to binary failed", K(ret)); - } else if (OB_FAIL(ObJsonExprHelper::pack_json_str_res(expr, ctx, res, str))) { - LOG_WARN("fail to pack json result", K(ret)); - } + } else if (OB_FAIL(ObJsonExprHelper::pack_json_res(expr, ctx, temp_allocator, json_doc, res))) { + LOG_WARN("pack fail", K(ret)); + } + if (OB_NOT_NULL(json_doc)) { + json_doc->reset(); } return ret; } @@ -147,10 +146,13 @@ int ObExprJsonReplace::cg_expr(ObExprCGCtx &expr_cg_ctx, const ObRawExpr &raw_expr, ObExpr &rt_expr) const { - UNUSED(expr_cg_ctx); - UNUSED(raw_expr); - rt_expr.eval_func_ = eval_json_replace; - return OB_SUCCESS; + INIT_SUCC(ret); + if (OB_FAIL(ObJsonExprHelper::init_json_expr_extra_info(expr_cg_ctx.allocator_, raw_expr, type_, rt_expr))) { + LOG_WARN("init_json_expr_extra_info fail", K(ret)); + } else { + rt_expr.eval_func_ = eval_json_replace; + } + return ret; } } diff --git a/src/sql/engine/expr/ob_expr_json_schema_valid.cpp b/src/sql/engine/expr/ob_expr_json_schema_valid.cpp new file mode 100644 index 0000000000..14cdec685b --- /dev/null +++ b/src/sql/engine/expr/ob_expr_json_schema_valid.cpp @@ -0,0 +1,203 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#define USING_LOG_PREFIX SQL_ENG + +#include "ob_expr_json_schema_valid.h" +#include "ob_expr_json_func_helper.h" + +namespace oceanbase +{ +using namespace common; +namespace sql +{ + +OB_SERIALIZE_MEMBER((ObExprJsonSchemaValid, ObFuncExprOperator), json_schema_); + +ObExprJsonSchemaValid::ObExprJsonSchemaValid(common::ObIAllocator &alloc) + : ObFuncExprOperator(alloc, T_FUN_SYS_JSON_SCHEMA_VALID, N_JSON_SCHEMA_VALID, OB_JSON_SCHEMA_EXPR_ARG_NUM, VALID_FOR_GENERATED_COL, NOT_ROW_DIMENSION), + json_schema_(ObString::make_empty_string()) {} + +ObExprJsonSchemaValid::~ObExprJsonSchemaValid() +{ +} + +int ObExprJsonSchemaValid::calc_result_type2(ObExprResType &type, + ObExprResType &type1, + ObExprResType &type2, + ObExprTypeCtx &type_ctx) const +{ + INIT_SUCC(ret); + UNUSED(type_ctx); + + // set the result type to bool + type.set_int32(); + type.set_precision(DEFAULT_PRECISION_FOR_BOOL); + type.set_scale(ObAccuracy::DDL_DEFAULT_ACCURACY[ObIntType].scale_); + + // 1st param is json schema (also json doc) + // 2nd param is json schema (also json doc) + if (OB_FAIL(ObJsonExprHelper::is_valid_for_json(type1, 1, N_JSON_SCHEMA_VALID))) { + LOG_WARN("wrong type for json doc.", K(ret), K(type1.get_type())); + } else if (OB_FAIL(ObJsonExprHelper::is_valid_for_json(type2, 2, N_JSON_SCHEMA_VALID))) { + LOG_WARN("wrong type for json doc.", K(ret), K(type2.get_type())); + } + + return ret; +} + +int ObExprJsonSchemaValid::cg_expr(ObExprCGCtx &op_cg_ctx, + const ObRawExpr &raw_expr, + ObExpr &rt_expr) const +{ + INIT_SUCC(ret); + const ObRawExpr *schema = raw_expr.get_param_expr(0); + if (lib::is_mysql_mode() && OB_JSON_SCHEMA_EXPR_ARG_NUM == rt_expr.arg_cnt_ + && OB_NOT_NULL(schema) && (schema->is_const_expr() || schema->is_static_scalar_const_expr()) + && schema->get_expr_type() != T_OP_GET_USER_VAR) { + ObIAllocator &alloc = *op_cg_ctx.allocator_; + ObExprJsonSchemaValidInfo *info + = OB_NEWx(ObExprJsonSchemaValidInfo, (&alloc), alloc, T_FUN_SYS_JSON_SCHEMA_VALID); + bool got_data = false; + if (OB_ISNULL(info)) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("allocate memory failed", K(ret)); + } else if (OB_FAIL(info->init_json_schema_extra_info(alloc, op_cg_ctx, schema, got_data))) { + LOG_WARN("allocate memory failed", K(ret)); + } else if (got_data) { + rt_expr.extra_info_ = info; + } + } + + if (OB_SUCC(ret)) { + rt_expr.eval_func_ = eval_json_schema_valid; + } + return ret; +} + +int ObExprJsonSchemaValid::eval_json_schema_valid(const ObExpr &expr, ObEvalCtx &ctx, ObDatum &res) +{ + INIT_SUCC(ret); + const ObExprJsonSchemaValidInfo *info + = static_cast(expr.extra_info_); + ObIJsonBase* j_schema = nullptr; + ObIJsonBase* j_doc = nullptr; + bool is_null_result = false; + ObEvalCtx::TempAllocGuard tmp_alloc_g(ctx); + common::ObArenaAllocator &temp_allocator = tmp_alloc_g.get_allocator(); + ObJsonBin j_schema_bin; + if (OB_ISNULL(info)) { + // schema is not const + if (OB_FAIL(ObJsonExprHelper::get_json_schema(expr, ctx, temp_allocator, 0, + j_schema, is_null_result))) { + LOG_WARN("get_json_doc failed", K(ret)); + } + } else { + // schema is const + new (&j_schema_bin) ObJsonBin(info->json_schema_.ptr(), info->json_schema_.length(), &temp_allocator); + if (OB_FAIL(j_schema_bin.reset_iter())) { + LOG_WARN("fail to reset iter for new json bin", K(ret)); + } else { + // schema validation only seek, do not need reserve parent stack + j_schema_bin.set_seek_flag(true); + j_schema = &j_schema_bin; + } + } + + if (OB_FAIL(ret)) { + } else if (!is_null_result && OB_FAIL(ObJsonExprHelper::get_json_doc(expr, ctx, temp_allocator, 1, + j_doc, is_null_result, false, false, true))) { + LOG_WARN("get_json_doc failed", K(ret)); + } else if (is_null_result) { + res.set_null(); + } else { + ObJsonSchemaValidator validator(&temp_allocator, j_schema); + bool is_valid = false; + if (OB_FAIL(validator.schema_validator(j_doc, is_valid))) { + LOG_WARN("failed in validator", K(ret)); + } else { + res.set_int(static_cast(is_valid)); + } + } + return ret; +} + +OB_DEF_SERIALIZE(ObExprJsonSchemaValidInfo) +{ + INIT_SUCC(ret); + LST_DO_CODE(OB_UNIS_ENCODE, json_schema_); + return ret; +} + +OB_DEF_DESERIALIZE(ObExprJsonSchemaValidInfo) +{ + INIT_SUCC(ret); + LST_DO_CODE(OB_UNIS_DECODE, json_schema_); + return ret; +} + +OB_DEF_SERIALIZE_SIZE(ObExprJsonSchemaValidInfo) +{ + int64_t len = 0; + LST_DO_CODE(OB_UNIS_ADD_LEN, json_schema_); + return len; +} + +int ObExprJsonSchemaValidInfo::init_json_schema_extra_info(ObIAllocator &alloc, + ObExprCGCtx &op_cg_ctx, + const ObRawExpr* schema, + bool& got_data) +{ + INIT_SUCC(ret); + ObExecContext *exec_ctx = op_cg_ctx.session_->get_cur_exec_ctx(); + got_data = false; + ObObj const_data; + ObIJsonBase* j_schema = nullptr; + if (OB_FAIL(ObSQLUtils::calc_const_or_calculable_expr(exec_ctx, + schema, + const_data, + got_data, + alloc))) { + LOG_WARN("failed to calc offset expr", K(ret)); + } else if (!got_data || const_data.is_null()) { + got_data = false; + } else if (OB_FAIL(ObJsonExprHelper::get_const_json_schema(const_data, N_JSON_SCHEMA_VALID, &alloc, j_schema))) { + LOG_WARN("parse json schema failed", K(ret)); + } else if (OB_FAIL(j_schema->get_raw_binary(json_schema_, &alloc))){ + LOG_WARN("fail to get binary string", K(ret)); + } else { + got_data = true; + } + return ret; +} + +int ObExprJsonSchemaValidInfo::deep_copy(common::ObIAllocator &allocator, + const ObExprOperatorType type, + ObIExprExtraInfo *&copied_info) const +{ + INIT_SUCC(ret); + if (OB_FAIL(ObExprExtraInfoFactory::alloc(allocator, type, copied_info))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("allocate memory failed", K(ret)); + } else { + ObExprJsonSchemaValidInfo &other = *static_cast(copied_info); + if (OB_FAIL(ob_write_string(allocator, json_schema_, other.json_schema_, true))) { + LOG_WARN("fail to copy string", K(ret)); + } + } + return ret; +} + + + +} /* sql */ +} /* oceanbase */ diff --git a/src/sql/engine/expr/ob_expr_json_schema_valid.h b/src/sql/engine/expr/ob_expr_json_schema_valid.h new file mode 100644 index 0000000000..955b1ff780 --- /dev/null +++ b/src/sql/engine/expr/ob_expr_json_schema_valid.h @@ -0,0 +1,69 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + * This file contains implementation for json_schema_valid. + */ + +#ifndef OCEANBASE_SQL_OB_EXPR_JSON_SCHEMA_VALID_H_ +#define OCEANBASE_SQL_OB_EXPR_JSON_SCHEMA_VALID_H_ + +#include "lib/json_type/ob_json_tree.h" +#include "lib/json_type/ob_json_bin.h" +#include "lib/json_type/ob_json_schema.h" +#include "sql/engine/expr/ob_expr_operator.h" + +using namespace oceanbase::common; + +namespace oceanbase +{ +namespace sql +{ + +struct ObExprJsonSchemaValidInfo : public ObIExprExtraInfo +{ + OB_UNIS_VERSION(1); +public: + ObExprJsonSchemaValidInfo(common::ObIAllocator &alloc, ObExprOperatorType type) + : ObIExprExtraInfo(alloc, type) + { + } + + virtual int deep_copy(common::ObIAllocator &allocator, + const ObExprOperatorType type, + ObIExprExtraInfo *&copied_info) const override; + int init_json_schema_extra_info(ObIAllocator &alloc, ObExprCGCtx &op_cg_ctx, const ObRawExpr* raw_expr, bool& got_data); + ObString json_schema_; +}; + +class ObExprJsonSchemaValid : public ObFuncExprOperator +{ + OB_UNIS_VERSION(1); +public: + explicit ObExprJsonSchemaValid(common::ObIAllocator &alloc); + virtual ~ObExprJsonSchemaValid(); + virtual int calc_result_type2(ObExprResType &type, + ObExprResType &type1, + ObExprResType &type2, + common::ObExprTypeCtx &type_ctx) + const override; + static int eval_json_schema_valid(const ObExpr &expr, ObEvalCtx &ctx, ObDatum &res); + virtual int cg_expr(ObExprCGCtx &expr_cg_ctx, const ObRawExpr &raw_expr, + ObExpr &rt_expr) const override; + virtual bool need_rt_ctx() const override { return true; } +private: + DISALLOW_COPY_AND_ASSIGN(ObExprJsonSchemaValid); + const static uint8_t OB_JSON_SCHEMA_EXPR_ARG_NUM = 2; +private: + ObString json_schema_; +}; + +} // sql +} // oceanbase +#endif // OCEANBASE_SQL_OB_EXPR_JSON_SCHEMA_VALID_H_ \ No newline at end of file diff --git a/src/sql/engine/expr/ob_expr_json_schema_validation_report.cpp b/src/sql/engine/expr/ob_expr_json_schema_validation_report.cpp new file mode 100644 index 0000000000..2c94cae0db --- /dev/null +++ b/src/sql/engine/expr/ob_expr_json_schema_validation_report.cpp @@ -0,0 +1,211 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#define USING_LOG_PREFIX SQL_ENG + +#include "ob_expr_json_schema_valid.h" +#include "ob_expr_json_schema_validation_report.h" +#include "ob_expr_json_func_helper.h" + +namespace oceanbase +{ +using namespace common; +namespace sql +{ +class ObJsonSchemaReportItem +{ +public: + static constexpr char* RESULT = const_cast("valid"); + static constexpr char* REASON = const_cast("reason"); + static constexpr char* REASON_BEGIN = const_cast("The JSON document location '"); + static constexpr char* REASON_MID = const_cast("' failed requirement '"); + static constexpr char* REASON_END = const_cast("' at JSON Schema location '"); + static constexpr char* SCHEMA_LOCATION = const_cast("schema-location"); + static constexpr char* DOC_LOCATION = const_cast("document-location"); + static constexpr char* FAILED_KEYWORD = const_cast("schema-failed-keyword"); +}; + +OB_SERIALIZE_MEMBER((ObExprJsonSchemaValidationReport, ObFuncExprOperator), json_schema_); + +ObExprJsonSchemaValidationReport::ObExprJsonSchemaValidationReport(common::ObIAllocator &alloc) + : ObFuncExprOperator(alloc, T_FUN_SYS_JSON_SCHEMA_VALIDATION_REPORT, N_JSON_SCHEMA_VALIDATION_REPORT, OB_JSON_SCHEMA_EXPR_ARG_NUM, VALID_FOR_GENERATED_COL, NOT_ROW_DIMENSION), + json_schema_(ObString::make_empty_string()) {} + +ObExprJsonSchemaValidationReport::~ObExprJsonSchemaValidationReport() +{ +} + +int ObExprJsonSchemaValidationReport::calc_result_type2(ObExprResType &type, + ObExprResType &type1, + ObExprResType &type2, + ObExprTypeCtx &type_ctx) const +{ + INIT_SUCC(ret); + UNUSED(type_ctx); + + // set the result type: json + type.set_json(); + type.set_length((ObAccuracy::DDL_DEFAULT_ACCURACY[ObJsonType]).get_length()); + + // 1st param is json schema (also json doc) + // 2nd param is json schema (also json doc) + if (OB_FAIL(ObJsonExprHelper::is_valid_for_json(type1, 1, N_JSON_SCHEMA_VALID))) { + LOG_WARN("wrong type for json doc.", K(ret), K(type1.get_type())); + } else if (OB_FAIL(ObJsonExprHelper::is_valid_for_json(type2, 2, N_JSON_SCHEMA_VALID))) { + LOG_WARN("wrong type for json doc.", K(ret), K(type2.get_type())); + } + + return ret; +} + +int ObExprJsonSchemaValidationReport::cg_expr(ObExprCGCtx &op_cg_ctx, + const ObRawExpr &raw_expr, + ObExpr &rt_expr) const +{ + INIT_SUCC(ret); + const ObRawExpr *schema = raw_expr.get_param_expr(0); + if (lib::is_mysql_mode() && OB_JSON_SCHEMA_EXPR_ARG_NUM == rt_expr.arg_cnt_ + && OB_NOT_NULL(schema) && (schema->is_const_expr() || schema->is_static_scalar_const_expr()) + && schema->get_expr_type() != T_OP_GET_USER_VAR) { + ObIAllocator &alloc = *op_cg_ctx.allocator_; + ObExprJsonSchemaValidInfo *info + = OB_NEWx(ObExprJsonSchemaValidInfo, (&alloc), alloc, T_FUN_SYS_JSON_SCHEMA_VALIDATION_REPORT); + bool got_data = false; + if (OB_ISNULL(info)) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("allocate memory failed", K(ret)); + } else if (OB_FAIL(info->init_json_schema_extra_info(alloc, op_cg_ctx, schema, got_data))) { + LOG_WARN("allocate memory failed", K(ret)); + } else if (got_data) { + rt_expr.extra_info_ = info; + } + } + + if (OB_SUCC(ret)) { + rt_expr.eval_func_ = eval_json_schema_validation_report; + } + return ret; +} + +int ObExprJsonSchemaValidationReport::eval_json_schema_validation_report(const ObExpr &expr, ObEvalCtx &ctx, ObDatum &res) +{ + INIT_SUCC(ret); + const ObExprJsonSchemaValidInfo *info + = static_cast(expr.extra_info_); + ObIJsonBase* j_schema = nullptr; + ObIJsonBase* j_doc = nullptr; + bool is_null_result = false; + ObEvalCtx::TempAllocGuard tmp_alloc_g(ctx); + common::ObArenaAllocator &temp_allocator = tmp_alloc_g.get_allocator(); + ObJsonBin j_schema_bin; + if (OB_ISNULL(info)) { + // schema is not const + if (OB_FAIL(ObJsonExprHelper::get_json_schema(expr, ctx, temp_allocator, 0, + j_schema, is_null_result))) { + LOG_WARN("get_json_doc failed", K(ret)); + } + } else { + // schema is const + new (&j_schema_bin) ObJsonBin(info->json_schema_.ptr(), info->json_schema_.length(), &temp_allocator); + if (OB_FAIL(j_schema_bin.reset_iter())) { + LOG_WARN("fail to reset iter for new json bin", K(ret)); + } else { + // schema validation only seek, do not need reserve parent stack + j_schema_bin.set_seek_flag(true); + j_schema = &j_schema_bin; + } + } + + if (OB_FAIL(ret)) { + } else if (!is_null_result && OB_FAIL(ObJsonExprHelper::get_json_doc(expr, ctx, temp_allocator, 1, + j_doc, is_null_result, false, false, true))) { + LOG_WARN("get_json_doc failed", K(ret)); + } else if (is_null_result) { + res.set_null(); + } else { + ObJsonSchemaValidator validator(&temp_allocator, j_schema); + ObIJsonBase* validation_report = nullptr; + bool is_valid = false; + if (OB_FAIL(validator.schema_validator(j_doc, is_valid))) { + LOG_WARN("failed in validator", K(ret)); + } else if (OB_FAIL(raise_validation_report(temp_allocator, validator, is_valid, validation_report))){ + LOG_WARN("failed to raise validation report", K(ret)); + } else { + ObString raw_bin; + if (OB_FAIL(validation_report->get_raw_binary(raw_bin, &temp_allocator))) { + LOG_WARN("failed: json get binary", K(ret)); + } else if (OB_FAIL(ObJsonExprHelper::pack_json_str_res(expr, ctx, res, raw_bin))) { + LOG_WARN("fail to pack json result", K(ret)); + } + } + } + return ret; +} + +int ObExprJsonSchemaValidationReport::raise_validation_report(ObIAllocator &allocator, + ObJsonSchemaValidator& validator, + const bool& is_valid, + ObIJsonBase*& validation_report) +{ + INIT_SUCC(ret); + ObJsonObject* report_obj = nullptr; + ObJsonBoolean* schema_result = nullptr; + if (OB_ISNULL(report_obj = OB_NEWx(ObJsonObject, &allocator, &allocator))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("fail to init schema report.", K(ret)); + } else if (OB_FALSE_IT(validation_report = report_obj)) { + } else if (OB_ISNULL(schema_result = OB_NEWx(ObJsonBoolean, &allocator, is_valid))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("fail to init schema report result.", K(ret)); + } else if (OB_FAIL(report_obj->add(ObJsonSchemaReportItem::RESULT, schema_result, false, true, false))) { + LOG_WARN("fail to add schema result.", K(ret)); + } else if (!is_valid) { // if not valid, need to describe the reason in detail + // reason + ObJsonBuffer reason(&allocator); + ObJsonBuffer json_pointer(&allocator); + ObJsonBuffer schema_pointer(&allocator); + ObJsonString* reason_str = nullptr; + ObJsonString* schema_loc_str = nullptr; + ObJsonString* doc_loc_str = nullptr; + ObJsonString* failed_keyword_str = nullptr; + if (OB_FAIL(validator.get_json_or_schema_point(json_pointer, false))) { + LOG_WARN("fail to get json pointer.", K(ret)); + } else if (OB_FAIL(validator.get_json_or_schema_point(schema_pointer, true))) { + LOG_WARN("fail to get schema pointer.", K(ret)); + } else if (OB_FAIL(reason.append(ObJsonSchemaReportItem::REASON_BEGIN)) + || OB_FAIL(reason.append(json_pointer.ptr(), json_pointer.length())) + || OB_FAIL(reason.append(ObJsonSchemaReportItem::REASON_MID)) + || OB_FAIL(reason.append(validator.get_failed_keyword())) + || OB_FAIL(reason.append(ObJsonSchemaReportItem::REASON_END)) + || OB_FAIL(reason.append(schema_pointer.ptr(), schema_pointer.length()))) { + LOG_WARN("fail to get reason.", K(ret)); + } else if (OB_ISNULL(reason_str = OB_NEWx(ObJsonString, &allocator, reason.ptr(), reason.length())) + || OB_ISNULL(schema_loc_str = OB_NEWx(ObJsonString, &allocator, schema_pointer.ptr(), schema_pointer.length())) + || OB_ISNULL(doc_loc_str = OB_NEWx(ObJsonString, &allocator, json_pointer.ptr(), json_pointer.length())) + || OB_ISNULL(failed_keyword_str = OB_NEWx(ObJsonString, &allocator, validator.get_failed_keyword()))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("fail to init schema report value.", K(ret)); + } else if (OB_FAIL(report_obj->add(ObJsonSchemaReportItem::REASON, reason_str, false, true, false))) { + LOG_WARN("fail to add reason.", K(ret)); + } else if (OB_FAIL(report_obj->add(ObJsonSchemaReportItem::SCHEMA_LOCATION, schema_loc_str, false, true, false))) { + LOG_WARN("fail to add schema location.", K(ret)); + } else if (OB_FAIL(report_obj->add(ObJsonSchemaReportItem::DOC_LOCATION, doc_loc_str, false, true, false))) { + LOG_WARN("fail to add document location.", K(ret)); + } else if (OB_FAIL(report_obj->add(ObJsonSchemaReportItem::FAILED_KEYWORD, failed_keyword_str, false, true, false))) { + LOG_WARN("fail to add document location.", K(ret)); + } + } + return ret; +} + +} /* sql */ +} /* oceanbase */ diff --git a/src/sql/engine/expr/ob_expr_json_schema_validation_report.h b/src/sql/engine/expr/ob_expr_json_schema_validation_report.h new file mode 100644 index 0000000000..c443f3e523 --- /dev/null +++ b/src/sql/engine/expr/ob_expr_json_schema_validation_report.h @@ -0,0 +1,55 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + * This file contains implementation for json_schema_validation_report. + */ + +#ifndef OCEANBASE_SQL_OB_EXPR_JSON_SCHEMA_VALIDATION_REPORT_H_ +#define OCEANBASE_SQL_OB_EXPR_JSON_SCHEMA_VALIDATION_REPORT_H_ + +#include "lib/json_type/ob_json_tree.h" +#include "lib/json_type/ob_json_bin.h" +#include "lib/json_type/ob_json_schema.h" +#include "sql/engine/expr/ob_expr_operator.h" + +using namespace oceanbase::common; + +namespace oceanbase +{ +namespace sql +{ + +class ObExprJsonSchemaValidationReport : public ObFuncExprOperator +{ + OB_UNIS_VERSION(1); +public: + explicit ObExprJsonSchemaValidationReport(common::ObIAllocator &alloc); + virtual ~ObExprJsonSchemaValidationReport(); + virtual int calc_result_type2(ObExprResType &type, + ObExprResType &type1, + ObExprResType &type2, + common::ObExprTypeCtx &type_ctx) + const override; + static int eval_json_schema_validation_report(const ObExpr &expr, ObEvalCtx &ctx, ObDatum &res); + virtual int cg_expr(ObExprCGCtx &expr_cg_ctx, const ObRawExpr &raw_expr, + ObExpr &rt_expr) const override; + virtual bool need_rt_ctx() const override { return true; } + static int raise_validation_report(ObIAllocator &allocator, ObJsonSchemaValidator& validator, + const bool& is_valid, ObIJsonBase*& validation_report); +private: + DISALLOW_COPY_AND_ASSIGN(ObExprJsonSchemaValidationReport); + const static uint8_t OB_JSON_SCHEMA_EXPR_ARG_NUM = 2; +private: + ObString json_schema_; +}; + +} // sql +} // oceanbase +#endif // OCEANBASE_SQL_OB_EXPR_JSON_SCHEMA_VALIDATION_REPORT_H_ \ No newline at end of file diff --git a/src/sql/engine/expr/ob_expr_json_search.cpp b/src/sql/engine/expr/ob_expr_json_search.cpp index 2abeb9b4b0..b8a2fa0116 100644 --- a/src/sql/engine/expr/ob_expr_json_search.cpp +++ b/src/sql/engine/expr/ob_expr_json_search.cpp @@ -15,6 +15,7 @@ #include "util/easy_string.h" #include "sql/engine/expr/ob_expr_util.h" #include "share/object/ob_obj_cast.h" +#include "share/ob_json_access_utils.h" #include "sql/session/ob_sql_session_info.h" #include "ob_expr_json_func_helper.h" using namespace oceanbase::common; @@ -411,7 +412,7 @@ int ObExprJsonSearch::eval_json_search(const ObExpr &expr, ObEvalCtx &ctx, ObDat bool is_finish = false; for (uint64_t i = 4; OB_SUCC(ret) && !is_null && i < expr.arg_cnt_ && !is_finish; i++) { ObJsonPath *j_path = json_paths[i - 4]; - ObJsonBaseVector hit; + ObJsonSeekResult hit; if (one_flag && hits.size() > 0) { is_finish = true; } else if (j_path->can_match_many()) { @@ -502,7 +503,7 @@ int ObExprJsonSearch::eval_json_search(const ObExpr &expr, ObEvalCtx &ctx, ObDat res.set_null(); } else { ObString raw_bin; - if (OB_FAIL(j_res->get_raw_binary(raw_bin, &temp_allocator))) { + if (OB_FAIL(ObJsonWrapper::get_raw_binary(j_res, raw_bin, &temp_allocator))) { LOG_WARN("json_keys get result binary failed", K(ret)); } else if (OB_FAIL(ObJsonExprHelper::pack_json_str_res(expr, ctx, res, raw_bin))) { LOG_WARN("fail to pack json result", K(ret)); diff --git a/src/sql/engine/expr/ob_expr_json_set.cpp b/src/sql/engine/expr/ob_expr_json_set.cpp index 0f2710977b..2ad1df6b8b 100644 --- a/src/sql/engine/expr/ob_expr_json_set.cpp +++ b/src/sql/engine/expr/ob_expr_json_set.cpp @@ -61,7 +61,7 @@ int ObExprJsonSet::calc_result_typeN(ObExprResType& type, return ret; } -int ObExprJsonSet::set_value(ObJsonBaseVector &hit, ObIJsonBase *&json_doc, ObIJsonBase* json_val, +int ObExprJsonSet::set_value(ObJsonSeekResult &hit, ObIJsonBase *&json_doc, ObIJsonBase* json_val, ObJsonPath *json_path, ObIAllocator *allocator) { INIT_SUCC(ret); @@ -76,7 +76,7 @@ int ObExprJsonSet::set_value(ObJsonBaseVector &hit, ObIJsonBase *&json_doc, ObIJ if (OB_FAIL(json_doc->seek(*json_path, json_path->path_node_cnt() - 1, true, true, hit))) { LOG_WARN("json seek failed", K(ret)); } else if (hit.size() != 0) { - ObIJsonBase* pos_node = *hit.last(); + ObIJsonBase* pos_node = hit.last(); ObJsonPathBasicNode* path_last = json_path->last_path_node(); if (path_last->get_node_type() == JPN_ARRAY_CELL) { if (pos_node->json_type() == ObJsonNodeType::J_ARRAY) { @@ -84,6 +84,8 @@ int ObExprJsonSet::set_value(ObJsonBaseVector &hit, ObIJsonBase *&json_doc, ObIJ ObJsonArrayIndex array_index; if (OB_FAIL(path_last->get_first_array_index(arr_len, array_index))) { LOG_WARN("error, get array index failed", K(ret), K(arr_len)); + } else if (json_doc->is_bin() && ! json_val->is_bin() && OB_FAIL(ObJsonBaseFactory::transform(allocator, json_val, ObJsonInType::JSON_BIN, json_val))) { + LOG_WARN("json tree to bin fail", K(ret)); } else if (OB_FAIL(pos_node->array_insert(array_index.get_array_index(), json_val))) { LOG_WARN("error, insert array node failed", K(ret), K(array_index.get_array_index())); } @@ -94,21 +96,32 @@ int ObExprJsonSet::set_value(ObJsonBaseVector &hit, ObIJsonBase *&json_doc, ObIJ LOG_WARN("error, alloc jsonarray node failed", K(ret)); } else { ObJsonArray* json_array = (ObJsonArray*)new(array_buf)ObJsonArray(allocator); - ObJsonNode *j_parent = static_cast(pos_node)->get_parent(); + ObIJsonBase *j_parent = nullptr; + ObIJsonBase *j_array = json_array; + ObIJsonBase *j_pos_node = pos_node; bool is_idx_from_end = path_last->node_content_.array_cell_.is_index_from_end_; - if (!is_idx_from_end && (OB_FAIL(json_array->array_append(pos_node)) + if (OB_FAIL(pos_node->get_parent(j_parent))) { + LOG_WARN("get_parent fail", K(ret), KPC(pos_node)); + } else if (! pos_node->is_tree() && OB_FAIL(ObJsonBaseFactory::transform(allocator, pos_node, ObJsonInType::JSON_TREE, j_pos_node))) { + LOG_WARN("json tree to bin fail", K(ret)); + } else if (!is_idx_from_end && (OB_FAIL(json_array->array_append(j_pos_node)) || OB_FAIL(json_array->array_append(json_val)))) { LOG_WARN("error, array append node failed", K(ret)); } else if (is_idx_from_end && (OB_FAIL(json_array->array_append(json_val)) - || OB_FAIL(json_array->array_append(pos_node)))) { + || OB_FAIL(json_array->array_append(j_pos_node)))) { LOG_WARN("error, array append node failed", K(ret)); } else if (OB_ISNULL(j_parent)){ + json_doc->reset(); json_doc = json_array; - } else { - j_parent->replace(pos_node, json_array); + } else if (j_parent->is_bin() && OB_FAIL(ObJsonBaseFactory::transform(allocator, j_array, ObJsonInType::JSON_BIN, j_array))) { + LOG_WARN("json tree to bin fail", K(ret)); + } else if (OB_FAIL(j_parent->replace(pos_node, j_array))) { + LOG_WARN("replace fail", K(ret), KPC(pos_node), KPC(j_array)); } } } + } else if (json_doc->is_bin() && ! json_val->is_bin() && OB_FAIL(ObJsonBaseFactory::transform(allocator, json_val, ObJsonInType::JSON_BIN, json_val))) { + LOG_WARN("json tree to bin fail", K(ret)); } else if (path_last->get_node_type() == JPN_MEMBER && pos_node->json_type() == ObJsonNodeType::J_OBJECT) { ObString key_name; @@ -117,6 +130,9 @@ int ObExprJsonSet::set_value(ObJsonBaseVector &hit, ObIJsonBase *&json_doc, ObIJ LOG_WARN("error, json object add kv pair failed", K(ret)); } } else {} + if (OB_SUCC(ret) && OB_FAIL(ObJsonExprHelper::refresh_root_when_bin_rebuild_all(json_doc))) { + LOG_WARN("refresh_root_when_bin_rebuild_all fail", K(ret)); + } } } else { ret = OB_ERR_UNEXPECTED; @@ -149,7 +165,7 @@ int ObExprJsonSet::eval_json_set(const ObExpr &expr, ObEvalCtx &ctx, ObDatum &re } for (int64_t i = 1; OB_SUCC(ret) && !is_null_result && i < expr.arg_cnt_; i+=2) { - ObJsonBaseVector hit; + ObJsonSeekResult hit; ObDatum *path_data = NULL; ObJsonPath *json_path = NULL; if (expr.args_[i]->datum_meta_.type_ == ObNullType) { @@ -183,13 +199,12 @@ int ObExprJsonSet::eval_json_set(const ObExpr &expr, ObEvalCtx &ctx, ObDatum &re LOG_WARN("Json parse and seek failed", K(ret)); } else if (is_null_result) { res.set_null(); - } else { - ObString str; - if (OB_FAIL(json_doc->get_raw_binary(str, &temp_allocator))) { - LOG_WARN("json_set result to binary failed", K(ret)); - } else if (OB_FAIL(ObJsonExprHelper::pack_json_str_res(expr, ctx, res, str))) { - LOG_WARN("fail to pack json result", K(ret)); - } + } else if (OB_FAIL(ObJsonExprHelper::pack_json_res(expr, ctx, temp_allocator, json_doc, res))) { + LOG_WARN("pack fail", K(ret)); + } + + if (OB_NOT_NULL(json_doc)) { + json_doc->reset(); } return ret; } @@ -197,10 +212,13 @@ int ObExprJsonSet::eval_json_set(const ObExpr &expr, ObEvalCtx &ctx, ObDatum &re int ObExprJsonSet::cg_expr(ObExprCGCtx &expr_cg_ctx, const ObRawExpr &raw_expr, ObExpr &rt_expr) const { - UNUSED(expr_cg_ctx); - UNUSED(raw_expr); - rt_expr.eval_func_ = eval_json_set; - return OB_SUCCESS; + INIT_SUCC(ret); + if (OB_FAIL(ObJsonExprHelper::init_json_expr_extra_info(expr_cg_ctx.allocator_, raw_expr, type_, rt_expr))) { + LOG_WARN("init_json_expr_extra_info fail", K(ret)); + } else { + rt_expr.eval_func_ = eval_json_set; + } + return ret; } } diff --git a/src/sql/engine/expr/ob_expr_json_set.h b/src/sql/engine/expr/ob_expr_json_set.h index e1d7732201..9da27f72c5 100644 --- a/src/sql/engine/expr/ob_expr_json_set.h +++ b/src/sql/engine/expr/ob_expr_json_set.h @@ -33,7 +33,7 @@ public: int64_t param_num, common::ObExprTypeCtx& type_ctx) const override; - static int set_value(ObJsonBaseVector &hit, ObIJsonBase *&json_doc, ObIJsonBase* json_val, + static int set_value(ObJsonSeekResult &hit, ObIJsonBase *&json_doc, ObIJsonBase* json_val, ObJsonPath *json_path, ObIAllocator *allocator); static int eval_json_set(const ObExpr &expr, ObEvalCtx &ctx, ObDatum &res); virtual int cg_expr(ObExprCGCtx &expr_cg_ctx, const ObRawExpr &raw_expr, diff --git a/src/sql/engine/expr/ob_expr_json_utils.cpp b/src/sql/engine/expr/ob_expr_json_utils.cpp new file mode 100644 index 0000000000..18ae6caf7f --- /dev/null +++ b/src/sql/engine/expr/ob_expr_json_utils.cpp @@ -0,0 +1,2915 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + * This file is for implement of func json expr helper + */ + +#define USING_LOG_PREFIX SQL_ENG +#include "lib/ob_errno.h" +#include "sql/engine/expr/ob_expr_cast.h" +#include "sql/engine/ob_exec_context.h" +#include "sql/engine/expr/ob_datum_cast.h" +#include "ob_expr_json_func_helper.h" +#include "lib/encode/ob_base64_encode.h" // for ObBase64Encoder +#include "lib/utility/ob_fast_convert.h" // ObFastFormatInt::format_unsigned +#include "lib/charset/ob_dtoa.h" // ob_gcvt_opt +#include "rpc/obmysql/ob_mysql_global.h" // DOUBLE_TO_STRING_CONVERSION_BUFFER_SIZE +#include "sql/ob_result_set.h" +#include "sql/ob_spi.h" +#include "ob_expr_json_utils.h" +#include "share/object/ob_obj_cast_util.h" +#include "share/object/ob_obj_cast.h" + +using namespace oceanbase::common; +using namespace oceanbase::sql; + +namespace oceanbase +{ +namespace sql +{ + +#define CAST_FAIL(stmt) \ + (OB_UNLIKELY((OB_SUCCESS != (ret = get_cast_ret((stmt)))))) + +#define GET_SESSION() \ + ObBasicSessionInfo *session = ctx.exec_ctx_.get_my_session(); \ + if (OB_ISNULL(session)) { \ + ret = OB_ERR_UNEXPECTED; \ + LOG_WARN("session is NULL", K(ret)); \ + } else + +int ObExprJsonQueryParamInfo::deep_copy(common::ObIAllocator &allocator, + const ObExprOperatorType type, + ObIExprExtraInfo *&copied_info) const +{ + INIT_SUCC(ret); + if (OB_FAIL(ObExprExtraInfoFactory::alloc(allocator, type, copied_info))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("allocate memory failed", K(ret)); + } else { + ObExprJsonQueryParamInfo& other = *static_cast(copied_info); + other.truncate_ = truncate_; + other.format_json_ = format_json_; + other.wrapper_ = wrapper_; + other.empty_type_ = empty_type_; + other.error_type_ = error_type_; + other.pretty_type_ = pretty_type_; + other.ascii_type_ = ascii_type_; + other.scalars_type_ = scalars_type_; + other.j_path_ = NULL; + if (OB_FAIL(ob_write_string(allocator, path_str_, other.path_str_, true))) { + LOG_WARN("fail to deep copy path str", K(ret)); + } else if (OB_FAIL(other.on_mismatch_.assign(on_mismatch_))) { + LOG_WARN("fail to assign mismatch array", K(ret)); + } else if (OB_FAIL(other.on_mismatch_type_.assign(on_mismatch_type_))) { + LOG_WARN("fail to assgin mismatch type", K(ret)); + } else if (OB_FAIL(other.parse_json_path(path_str_, other.j_path_))) { + LOG_WARN("fail to resolve json path", K(ret)); + } + } + return ret; +} + +int ObExprJsonQueryParamInfo::parse_json_path(ObString path_str, ObJsonPath*& j_path_) +{ + INIT_SUCC(ret); + j_path_ = NULL; + void* buf = allocator_.alloc(sizeof(ObJsonPath)); + if (path_str.empty()) { + allocator_.free(buf); + } else if (OB_ISNULL(buf)) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("fail to alloc path.", K(ret)); + } else { + j_path_ = new (buf) ObJsonPath(path_str, &allocator_); + if (OB_FAIL(j_path_->parse_path())) { + LOG_WARN("wrong path expression, parse path failed or with wildcards", K(ret), K(path_str)); + } + } + return ret; +} + +OB_DEF_SERIALIZE_SIZE(ObExprJsonQueryParamInfo) +{ + int64_t len = 0; + LST_DO_CODE(OB_UNIS_ADD_LEN, + truncate_, + format_json_, + wrapper_, + empty_type_, + error_type_, + pretty_type_, + ascii_type_, + scalars_type_, + path_str_, + on_mismatch_, + on_mismatch_type_); + return len; +} + +OB_DEF_SERIALIZE(ObExprJsonQueryParamInfo) +{ + INIT_SUCC(ret); + LST_DO_CODE(OB_UNIS_ENCODE, + truncate_, + format_json_, + wrapper_, + empty_type_, + error_type_, + pretty_type_, + ascii_type_, + scalars_type_, + path_str_, + on_mismatch_, + on_mismatch_type_); + return ret; +} + +OB_DEF_DESERIALIZE(ObExprJsonQueryParamInfo) +{ + INIT_SUCC(ret); + LST_DO_CODE(OB_UNIS_DECODE, + truncate_, + format_json_, + wrapper_, + empty_type_, + error_type_, + pretty_type_, + ascii_type_, + scalars_type_, + path_str_, + on_mismatch_, + on_mismatch_type_); + OZ(parse_json_path(path_str_, j_path_)); + return ret; +} + +int ObJsonUtil::set_mismatch_val(ObIArray& val, ObIArray& type, int64_t& opt_val, uint32_t& pos) +{ + INIT_SUCC(ret); + if (opt_val >= OB_JSON_ON_MISMATCH_ERROR && + opt_val <= OB_JSON_ON_MISMATCH_IMPLICIT) { + pos ++; + if (OB_FAIL(val.push_back(static_cast(opt_val)))) { + LOG_WARN("mismtach add fail", K(ret)); + } else if (OB_FAIL(type.push_back(0))) { + LOG_WARN("mismatch option add fail", K(ret)); + } + } else if (opt_val >= OB_JSON_TYPE_MISSING_DATA && + opt_val <= OB_JSON_TYPE_DOT) { + + /* one mismatch val has multi mismatch type*/ + uint8_t t_value = type.at(pos); + type.pop_back(); + switch(opt_val) { + case OB_JSON_TYPE_MISSING_DATA :{ + t_value |= 1; + break; + } + case OB_JSON_TYPE_EXTRA_DATA :{ + t_value |= 2; + break; + } + case OB_JSON_TYPE_TYPE_ERROR :{ + t_value |= 4; + break; + } + default :{ + break; + } + } + type.push_back(t_value); + } else { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("option type error", K(opt_val), K(ret)); + } + return ret; +} + +int ObJsonUtil::init_json_path(ObIAllocator &alloc, ObExprCGCtx &op_cg_ctx, + const ObRawExpr* path, + ObExprJsonQueryParamInfo& res) +{ + INIT_SUCC(ret); + ObObj const_data; + bool got_data = false; + ObExecContext *exec_ctx = op_cg_ctx.session_->get_cur_exec_ctx(); + if (OB_NOT_NULL(path) + && (path->is_const_expr() || path->is_static_scalar_const_expr()) + && path->get_expr_type() != T_OP_GET_USER_VAR) { + void* buf = alloc.alloc(sizeof(ObJsonPath)); + if (OB_ISNULL(buf)) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("fail to alloc path.", K(ret)); + } else if (OB_FAIL(ObSQLUtils::calc_const_or_calculable_expr(exec_ctx, + path, + const_data, + got_data, + alloc))) { + LOG_WARN("failed to calc offset expr", K(ret)); + } else if (!got_data || const_data.is_null() + || !ob_is_string_type(const_data.get_type())) { + ret = OB_ERR_INVALID_INPUT_ARGUMENT; + LOG_WARN("fail to get int value", K(ret)); + } else { + ObString path_str = const_data.get_string(); + res.j_path_ = new (buf) ObJsonPath(path_str, &alloc); + if (OB_FAIL(res.j_path_->parse_path())) { + if (lib::is_oracle_mode()) { + ret = OB_ERR_JSON_PATH_EXPRESSION_SYNTAX_ERROR; + LOG_USER_ERROR(OB_ERR_JSON_PATH_EXPRESSION_SYNTAX_ERROR, path_str.length(), path_str.ptr()); + } else { + ret = OB_ERR_INVALID_JSON_PATH; + LOG_USER_ERROR(OB_ERR_INVALID_JSON_PATH); + } + LOG_WARN("wrong path expression, parse path failed or with wildcards", K(ret), K(path_str)); + } else if (OB_FAIL(ob_write_string(alloc, path_str, res.path_str_, true))) { + LOG_WARN("fail to deep copy path str", K(ret), K(path_str)); + } + } + } + return ret; +} + +int ObJsonUtil::datetime_scale_check(const ObAccuracy &accuracy, + int64_t &value, + bool strict) +{ + INIT_SUCC(ret); + ObScale scale = accuracy.get_scale(); + + if (OB_UNLIKELY(scale > MAX_SCALE_FOR_TEMPORAL)) { + ret = OB_ERR_TOO_BIG_PRECISION; + LOG_USER_ERROR(OB_ERR_TOO_BIG_PRECISION, scale, "CAST", + static_cast(MAX_SCALE_FOR_TEMPORAL)); + } else if (OB_UNLIKELY(0 <= scale && scale < MAX_SCALE_FOR_TEMPORAL)) { + // first check zero + if (strict && + (value == ObTimeConverter::ZERO_DATE || + value == ObTimeConverter::ZERO_DATETIME)) { + ret = OB_INVALID_DATE_VALUE; + LOG_WARN("Zero datetime is invalid in json_value.", K(value)); + } else { + int64_t temp_value = value; + ObTimeConverter::round_datetime(scale, temp_value); + if (strict && temp_value != value) { + ret = OB_OPERATE_OVERFLOW; + LOG_WARN("Invalid input value.", K(value), K(scale)); + } else if (ObTimeConverter::is_valid_datetime(temp_value)) { + value = temp_value; + } else { + ret = OB_ERR_NULL_VALUE; // set null for res + LOG_DEBUG("Invalid datetime val, return set_null", K(temp_value)); + } + } + } + + return ret; +} + +int ObJsonUtil::get_accuracy(const ObExpr &expr, + ObEvalCtx &ctx, + ObAccuracy &accuracy, + ObObjType &dest_type, + bool &is_cover_by_error) +{ + INIT_SUCC(ret); + ObDatum *dst_type_dat = NULL; + + if (OB_ISNULL(expr.args_) || OB_ISNULL(expr.args_[2])) { + ret = OB_ERR_UNEXPECTED; + is_cover_by_error = false; + LOG_WARN("unexpected expr", K(ret), K(expr.arg_cnt_), KP(expr.args_)); + } else if (OB_FAIL(expr.args_[2]->eval(ctx, dst_type_dat))) { + is_cover_by_error = false; + LOG_WARN("eval dst type datum failed", K(ret)); + } else { + ret = ObJsonUtil::get_accuracy_internal(accuracy, + ctx, + dest_type, + dst_type_dat->get_int(), + expr.datum_meta_.length_semantics_); + } + return ret; +} + +/*json cast to sql scalar*/ +int ObJsonUtil::get_accuracy_internal(ObAccuracy &accuracy, + ObEvalCtx& ctx, + ObObjType &dest_type, + const int64_t value, + const ObLengthSemantics &length_semantics) +{ + INIT_SUCC(ret); + ParseNode node; + node.value_ = value; + dest_type = static_cast(node.int16_values_[0]); + + if (ObFloatType == dest_type) { + // boundaries already checked in calc result type + if (node.int16_values_[OB_NODE_CAST_N_PREC_IDX] > OB_MAX_FLOAT_PRECISION) { + dest_type = ObDoubleType; + } + } + ObObjTypeClass dest_tc = ob_obj_type_class(dest_type); + if (ObStringTC == dest_tc) { + // parser will abort all negative number + // if length < 0 means DEFAULT_STR_LENGTH or OUT_OF_STR_LEN. + accuracy.set_full_length(node.int32_values_[1], length_semantics, + lib::is_oracle_mode()); + } else if (ObRawTC == dest_tc) { + accuracy.set_length(node.int32_values_[1]); + } else if(ObTextTC == dest_tc || ObJsonTC == dest_tc) { + accuracy.set_length(node.int32_values_[1] < 0 ? + ObAccuracy::DDL_DEFAULT_ACCURACY[dest_type].get_length() : node.int32_values_[1]); + } else if (ObIntervalTC == dest_tc) { + if (OB_UNLIKELY(!ObIntervalScaleUtil::scale_check(node.int16_values_[3]) || + !ObIntervalScaleUtil::scale_check(node.int16_values_[2]))) { + ret = OB_ERR_DATETIME_INTERVAL_PRECISION_OUT_OF_RANGE; + LOG_WARN("Invalid scale.", K(ret), K(node.int16_values_[3]), K(node.int16_values_[2])); + } else { + ObScale scale = (dest_type == ObIntervalYMType) ? + ObIntervalScaleUtil::interval_ym_scale_to_ob_scale( + static_cast(node.int16_values_[3])) + : ObIntervalScaleUtil::interval_ds_scale_to_ob_scale( + static_cast(node.int16_values_[2]), + static_cast(node.int16_values_[3])); + accuracy.set_scale(scale); + } + } else { + const ObAccuracy &def_acc = + ObAccuracy::DDL_DEFAULT_ACCURACY2[lib::is_oracle_mode()][dest_type]; + if (ObNumberType == dest_type && 0 == node.int16_values_[2]) { + accuracy.set_precision(def_acc.get_precision()); + } else { + accuracy.set_precision(node.int16_values_[2]); + } + accuracy.set_scale(node.int16_values_[3]); + if (lib::is_oracle_mode() && ObDoubleType == dest_type) { + accuracy.set_accuracy(def_acc.get_precision()); + } + if (ObNumberType == dest_type + && is_decimal_int_accuracy_valid(accuracy.get_precision(), accuracy.get_scale())) { + bool enable_decimalint = false; + if (OB_ISNULL(ctx.exec_ctx_.get_my_session())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("type_ctx.get_session() is null", K(ret)); + } else if (OB_FAIL(ObSQLUtils::check_enable_decimalint(ctx.exec_ctx_.get_my_session(), + enable_decimalint))) { + LOG_WARN("fail to check_enable_decimalint_type", + K(ret), K(ctx.exec_ctx_.get_my_session()->get_effective_tenant_id())); + } else if (enable_decimalint) { + dest_type = ObDecimalIntType; + } + } + } + + return ret; +} + +int ObJsonUtil::time_scale_check(const ObAccuracy &accuracy, int64_t &value, bool strict) +{ + INIT_SUCC(ret); + ObScale scale = accuracy.get_scale(); + + if (0 <= scale && scale < MAX_SCALE_FOR_TEMPORAL) { + int64_t temp_value = value; + ObTimeConverter::round_datetime(scale, temp_value); + if (strict && temp_value != value) { // round success + ret = OB_OPERATE_OVERFLOW; + LOG_WARN("Invalid input value.", K(value), K(scale)); + } else { + value = temp_value; + } + } + + return ret; +} + +static OB_INLINE int get_cast_ret(int ret) +{ + // compatibility for old ob + if (OB_ERR_UNEXPECTED_TZ_TRANSITION == ret || + OB_ERR_UNKNOWN_TIME_ZONE == ret) { + ret = OB_INVALID_DATE_VALUE; + } + + return ret; +} + +int ObJsonUtil::number_range_check(const ObAccuracy &accuracy, + ObIAllocator *allocator, + number::ObNumber &val, + bool strict) +{ + INIT_SUCC(ret); + ObPrecision precision = accuracy.get_precision(); + ObScale scale = accuracy.get_scale(); + const number::ObNumber *min_check_num = NULL; + const number::ObNumber *max_check_num = NULL; + const number::ObNumber *min_num_mysql = NULL; + const number::ObNumber *max_num_mysql = NULL; + bool is_finish = false; + if (lib::is_oracle_mode()) { + if (OB_MAX_NUMBER_PRECISION >= precision + && precision >= OB_MIN_NUMBER_PRECISION + && number::ObNumber::MAX_SCALE >= scale + && scale >= number::ObNumber::MIN_SCALE) { + min_check_num = &(ObNumberConstValue::ORACLE_CHECK_MIN[precision][scale + ObNumberConstValue::MAX_ORACLE_SCALE_DELTA]); + max_check_num = &(ObNumberConstValue::ORACLE_CHECK_MAX[precision][scale + ObNumberConstValue::MAX_ORACLE_SCALE_DELTA]); + } else if (ORA_NUMBER_SCALE_UNKNOWN_YET == scale + && PRECISION_UNKNOWN_YET == precision) { + is_finish = true; + } else if (PRECISION_UNKNOWN_YET == precision + && number::ObNumber::MAX_SCALE >= scale + && scale >= number::ObNumber::MIN_SCALE) { + number::ObNumber num; + if (OB_FAIL(num.from(val, *allocator))) { + } else if (OB_FAIL(num.round(scale))) { + } else if (val.compare(num) != 0) { + ret = OB_OPERATE_OVERFLOW; + LOG_WARN("input value is out of range.", K(scale), K(val)); + } else { + is_finish = true; + } + } else { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid arguments", K(ret), K(precision), K(scale)); + } + } else { + if (OB_UNLIKELY(precision < scale)) { + ret = OB_ERR_M_BIGGER_THAN_D; + LOG_WARN("Invalid accuracy.", K(ret), K(scale), K(precision)); + } else if (number::ObNumber::MAX_PRECISION >= precision + && precision >= OB_MIN_DECIMAL_PRECISION + && number::ObNumber::MAX_SCALE >= scale + && scale >= 0) { + min_check_num = &(ObNumberConstValue::MYSQL_CHECK_MIN[precision][scale]); + max_check_num = &(ObNumberConstValue::MYSQL_CHECK_MAX[precision][scale]); + min_num_mysql = &(ObNumberConstValue::MYSQL_MIN[precision][scale]); + max_num_mysql = &(ObNumberConstValue::MYSQL_MAX[precision][scale]); + } else { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid arguments", K(ret), K(precision), K(scale)); + } + } + if (OB_SUCC(ret) && !is_finish) { + if (OB_ISNULL(min_check_num) || OB_ISNULL(max_check_num) + || (!lib::is_oracle_mode() + && (OB_ISNULL(min_num_mysql) || OB_ISNULL(max_num_mysql)))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("min_num or max_num is null", K(ret), KPC(min_check_num), KPC(max_check_num)); + } else if (val <= *min_check_num) { + if (lib::is_oracle_mode()) { + ret = OB_ERR_VALUE_LARGER_THAN_ALLOWED; + } else { + ret = OB_DATA_OUT_OF_RANGE; + } + LOG_WARN("val is out of min range check.", K(val), K(*min_check_num)); + is_finish = true; + } else if (val >= *max_check_num) { + if (lib::is_oracle_mode()) { + ret = OB_ERR_VALUE_LARGER_THAN_ALLOWED; + } else { + ret = OB_DATA_OUT_OF_RANGE; + } + LOG_WARN("val is out of max range check.", K(val), K(*max_check_num)); + is_finish = true; + } else { + ObNumStackOnceAlloc tmp_alloc; + number::ObNumber num; + if (OB_FAIL(num.from(val, tmp_alloc))) { + } else if (OB_FAIL(num.round(scale))) { + LOG_WARN("num.round failed", K(ret), K(scale)); + } else { + if (strict) { + if (num.compare(val) != 0) { + ret = OB_OPERATE_OVERFLOW; + LOG_WARN("input value is out of range.", K(scale), K(val)); + } else { + is_finish = true; + } + } else { + if (OB_ISNULL(allocator)) { + ret = OB_ERR_NULL_VALUE; + LOG_WARN("allocator is null", K(ret)); + } else if (OB_FAIL(val.deep_copy_v3(num, *allocator))) { + LOG_WARN("val.deep_copy_v3 failed", K(ret), K(num)); + } else { + is_finish = true; + } + } + } + } + } + if (OB_SUCC(ret) && !is_finish) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected situation, res is not set", K(ret)); + } + LOG_DEBUG("number_range_check_v2 done", K(ret), K(is_finish), K(accuracy), K(val), + KPC(min_check_num), KPC(max_check_num)); + + return ret; +} + +int ObJsonUtil::set_lob_datum(common::ObIAllocator *allocator, + const ObExpr &expr, + ObEvalCtx &ctx, + ObObjType dst_type, + uint8_t ascii_type, + ObDatum &res) +{ + INIT_SUCC(ret); + if (res.is_null()) { // null value jump this process + } else { + switch (dst_type) { + case ObVarcharType: + case ObRawType: + case ObNVarchar2Type: + case ObNCharType: + case ObCharType: + case ObTinyTextType: + case ObTextType : + case ObMediumTextType: + case ObHexStringType: + case ObLongTextType: { + ObString val; + val = res.get_string(); + ObTextStringDatumResult text_result(expr.datum_meta_.type_, &expr, &ctx, &res); + if (OB_FAIL(ret)) { + } else if (ascii_type == 0) { + if (OB_FAIL(text_result.init(val.length()))) { + LOG_WARN("init lob result failed"); + } else if (OB_FAIL(text_result.append(val))) { + LOG_WARN("failed to append realdata", K(ret), K(val), K(text_result)); + } + } else { + char *buf = NULL; + int64_t buf_len = val.length() * ObCharset::MAX_MB_LEN * 2; + int64_t reserve_len = 0; + int32_t length = 0; + + if (OB_FAIL(text_result.init(buf_len))) { + LOG_WARN("init lob result failed"); + } else if (OB_FAIL(text_result.get_reserved_buffer(buf, reserve_len))) { + LOG_WARN("fail to get reserved buffer", K(ret)); + } else if (reserve_len != buf_len) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get reserve len is invalid", K(ret), K(reserve_len), K(buf_len)); + } else if (OB_FAIL(ObJsonExprHelper::calc_asciistr_in_expr(val, expr.args_[0]->datum_meta_.cs_type_, + expr.datum_meta_.cs_type_, + buf, reserve_len, length))) { + LOG_WARN("fail to calc unistr", K(ret)); + } else if (OB_FAIL(text_result.lseek(length, 0))) { + LOG_WARN("text_result lseek failed", K(ret), K(text_result), K(length)); + } + } + if (OB_SUCC(ret)) { + // old engine set same alloctor for wrapper, so we can use val without copy + text_result.set_result(); + } + break; + } + case ObJsonType: { + ObString out_val; + out_val = res.get_string(); + ObTextStringDatumResult text_result(expr.datum_meta_.type_, &expr, &ctx, &res); + if (OB_SUCC(ret)) { + if (OB_FAIL(text_result.init(out_val.length()))) { + LOG_WARN("init lob result failed"); + } else if (OB_FAIL(text_result.append(out_val))) { + LOG_WARN("failed to append realdata", K(ret), K(out_val), K(text_result)); + } else { + text_result.set_result(); + } + } + break; + } + default: { + break; + } + } + } + return ret; +} + +int ObJsonUtil::bit_length_check(const ObAccuracy &accuracy, + uint64_t &value) +{ + int ret = OB_SUCCESS; + int32_t bit_len = 0; + int32_t dst_bit_len = accuracy.get_precision(); + bit_len = ObJsonBaseUtil::get_bit_len(value); + if(OB_UNLIKELY(bit_len <= 0)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("bit length is negative", K(ret), K(value), K(bit_len)); + } else { + if (OB_UNLIKELY(bit_len > dst_bit_len)) { + ret = OB_ERR_DATA_TOO_LONG; + LOG_WARN("bit type length is too long", K(ret), K(bit_len), + K(dst_bit_len), K(value)); + } + } + return ret; +} + +// padding %padding_cnt character, we also need to convert collation type here. +// eg: select cast('abc' as nchar(100)) from dual; +// the space must be in utf16, because dst_type is nchar +int ObJsonUtil::padding_char_for_cast(int64_t padding_cnt, + const ObCollationType &padding_cs_type, + ObIAllocator &alloc, + ObString &padding_res) +{ + int ret = OB_SUCCESS; + padding_res.reset(); + const ObCharsetType &cs = ObCharset::charset_type_by_coll(padding_cs_type); + char padding_char = (CHARSET_BINARY == cs) ? OB_PADDING_BINARY : OB_PADDING_CHAR; + int64_t padding_str_size = sizeof(padding_char) * padding_cnt; + char *padding_str_ptr = reinterpret_cast(alloc.alloc(padding_str_size)); + if (OB_ISNULL(padding_str_ptr)) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("alloc memory failed", K(ret)); + } else if (CHARSET_BINARY == cs) { + MEMSET(padding_str_ptr, padding_char, padding_str_size); + padding_res.assign_ptr(padding_str_ptr, padding_str_size); + } else { + MEMSET(padding_str_ptr, padding_char, padding_str_size); + ObString padding_str(padding_str_size, padding_str_ptr); + if (OB_FAIL(ObExprUtil::convert_string_collation(padding_str, + ObCharset::get_system_collation(), + padding_res, + padding_cs_type, + alloc))) { + LOG_WARN("convert padding str collation faield", K(ret), K(padding_str), + K(padding_cs_type)); + } + } + LOG_DEBUG("pad char done", K(ret), K(padding_cnt), K(padding_cs_type), K(padding_res)); + return ret; +} + +int cast_to_null(common::ObIAllocator *allocator, + ObEvalCtx &ctx, + ObIJsonBase *j_base, + common::ObAccuracy &accuracy, + ObJsonCastParam &cast_param, + ObDatum &res, + uint8_t &is_type_mismatch) +{ + INIT_SUCC(ret); + UNUSED(allocator); + UNUSED(ctx); + UNUSED(j_base); + UNUSED(accuracy); + UNUSED(cast_param); + UNUSED(is_type_mismatch); + if (!cast_param.is_only_check_) { + res.set_null(); + } + return ret; +} + +int cast_to_int(common::ObIAllocator *allocator, + ObEvalCtx &ctx, + ObIJsonBase *j_base, + common::ObAccuracy &accuracy, + ObJsonCastParam &cast_param, + ObDatum &res, + uint8_t &is_type_mismatch) +{ + INIT_SUCC(ret); + UNUSED(allocator); + UNUSED(ctx); + UNUSED(accuracy); + UNUSED(is_type_mismatch); + int64_t val = 0; + if (OB_ISNULL(j_base)) { + ret = OB_ERR_NULL_VALUE; + LOG_WARN("json base is null", K(ret)); + } else if (CAST_FAIL(j_base->to_int(val, true))) { + ret = OB_OPERATE_OVERFLOW; + LOG_USER_ERROR(OB_OPERATE_OVERFLOW, "SIGNED", "json_value"); + LOG_WARN("cast to int failed", K(ret), K(*j_base)); + } else if (cast_param.dst_type_ < ObIntType && + CAST_FAIL(int_range_check(cast_param.dst_type_, val, val))) { + ret = OB_OPERATE_OVERFLOW; + LOG_USER_ERROR(OB_OPERATE_OVERFLOW, "SIGNED", "json_value"); + } else if (!cast_param.is_only_check_) { + if (cast_param.dst_type_ == ObIntType) { + res.set_int(val); + } else { + res.set_int32(static_cast(val)); + } + } + return ret; +} + +int cast_to_uint(common::ObIAllocator *allocator, + ObEvalCtx &ctx, + ObIJsonBase *j_base, + common::ObAccuracy &accuracy, + ObJsonCastParam &cast_param, + ObDatum &res, + uint8_t &is_type_mismatch) +{ + INIT_SUCC(ret); + UNUSED(allocator); + UNUSED(ctx); + UNUSED(accuracy); + UNUSED(is_type_mismatch); + uint64_t val = 0; + if (OB_ISNULL(j_base)) { + ret = OB_ERR_NULL_VALUE; + LOG_WARN("json base is null", K(ret)); + } else if (CAST_FAIL(j_base->to_uint(val, true, true))) { + LOG_WARN("cast to uint failed", K(ret), K(*j_base)); + if (ret == OB_OPERATE_OVERFLOW) { + LOG_USER_ERROR(OB_OPERATE_OVERFLOW, "UNSIGNED", "json_value"); + } + } else if (cast_param.dst_type_ < ObUInt64Type && + CAST_FAIL(uint_upper_check(cast_param.dst_type_, val))) { + LOG_WARN("uint_upper_check failed", K(ret)); + } else if (!cast_param.is_only_check_) { + if (cast_param.dst_type_ == ObUInt64Type) { + res.set_uint(val); + } else { + res.set_uint32(static_cast(val)); + } + } + + return ret; +} + +int cast_to_string(common::ObIAllocator *allocator, + ObEvalCtx &ctx, + ObIJsonBase *j_base, + common::ObAccuracy &accuracy, + ObJsonCastParam &cast_param, + ObDatum &res, + uint8_t &is_type_mismatch) +{ + INIT_SUCC(ret); + UNUSED(ctx); + ObString val; + if (OB_ISNULL(j_base)) { + ret = OB_ERR_NULL_VALUE; + LOG_WARN("json base is null", K(ret)); + } else if (OB_ISNULL(allocator)) { + ret = OB_ERR_NULL_VALUE; + LOG_WARN("allocator is null", K(ret)); + } else { + ObJsonBuffer j_buf(allocator); + if (CAST_FAIL(j_base->print(j_buf, cast_param.is_quote_, cast_param.is_pretty_))) { + is_type_mismatch = 1; + LOG_WARN("fail to_string as json", K(ret)); + } else { + ObObjType in_type = ObLongTextType; + ObString temp_str_val(j_buf.length(), j_buf.ptr()); + bool is_need_string_string_convert = ((CS_TYPE_BINARY == cast_param.dst_coll_type_) + || (ObCharset::charset_type_by_coll(cast_param.in_coll_type_) != + ObCharset::charset_type_by_coll(cast_param.dst_coll_type_))) + && !(lib::is_mysql_mode() && temp_str_val.length() == 0); + if (is_need_string_string_convert) { + if (CS_TYPE_BINARY != cast_param.in_coll_type_ + && CS_TYPE_BINARY != cast_param.dst_coll_type_ + && (ObCharset::charset_type_by_coll(cast_param.in_coll_type_) != + ObCharset::charset_type_by_coll(cast_param.dst_coll_type_))) { + char *buf = NULL; + int64_t buf_len = (temp_str_val.length() == 0 ? 1 : temp_str_val.length()) * ObCharset::CharConvertFactorNum; + uint32_t result_len = 0; + buf = reinterpret_cast(allocator->alloc(buf_len)); + if (OB_ISNULL(buf)) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("alloc memory failed", K(ret)); + } else if (OB_FAIL(ObCharset::charset_convert(cast_param.in_coll_type_, temp_str_val.ptr(), + temp_str_val.length(), cast_param.dst_coll_type_, buf, + buf_len, result_len))) { + LOG_WARN("charset convert failed", K(ret)); + } else { + val.assign_ptr(buf, result_len); + } + } else { + if (CS_TYPE_BINARY == cast_param.in_coll_type_ || CS_TYPE_BINARY == cast_param.dst_coll_type_) { + // just copy string when in_cs_type or out_cs_type is binary + const ObCharsetInfo *cs = NULL; + int64_t align_offset = 0; + if (CS_TYPE_BINARY == cast_param.in_coll_type_ && lib::is_mysql_mode() + && (NULL != (cs = ObCharset::get_charset(cast_param.dst_coll_type_)))) { + if (cs->mbminlen > 0 && temp_str_val.length() % cs->mbminlen != 0) { + align_offset = cs->mbminlen - temp_str_val.length() % cs->mbminlen; + } + } + int64_t len = align_offset + temp_str_val.length(); + char *buf = reinterpret_cast(allocator->alloc(len)); + if (OB_ISNULL(buf)) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("allocate memory failed", K(ret)); + } else { + MEMMOVE(buf + align_offset, temp_str_val.ptr(), len - align_offset); + MEMSET(buf, 0, align_offset); + val.assign_ptr(buf, len); + } + } else { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("same charset should not be here, just use cast_eval_arg", K(ret), + K(in_type), K(cast_param.dst_type_), K(cast_param.in_coll_type_), K(cast_param.dst_coll_type_)); + } + } + } else { + val.assign_ptr(temp_str_val.ptr(), temp_str_val.length()); + } + + ObLengthSemantics senmactics = accuracy.get_length_semantics(); + // do str length check + const int32_t str_len_char = static_cast(ObCharset::strlen_char( + senmactics == LS_BYTE ? CS_TYPE_BINARY : cast_param.dst_coll_type_, val.ptr(), val.length())); + ObLength max_accuracy_len; + if (lib::is_oracle_mode()) { + max_accuracy_len = (cast_param.dst_type_ == ObLongTextType) ? OB_MAX_LONGTEXT_LENGTH : accuracy.get_length(); + } else { // mysql mode + max_accuracy_len = (ob_obj_type_class(cast_param.dst_type_) == ObTextTC) + ? ObAccuracy::DDL_DEFAULT_ACCURACY[cast_param.dst_type_].get_length() + : accuracy.get_length(); + } + if (max_accuracy_len > 0 && lib::is_oracle_mode()) { + max_accuracy_len *= (senmactics == LS_BYTE ? 1 : 2); + } + + uint32_t byte_len = 0; + byte_len = ObCharset::charpos(senmactics == LS_BYTE ? CS_TYPE_BINARY : cast_param.dst_coll_type_, val.ptr(), str_len_char, max_accuracy_len); + + if (OB_SUCC(ret)) { + if (max_accuracy_len == DEFAULT_STR_LENGTH) { // default string len + } else if (cast_param.is_trunc_ && max_accuracy_len < str_len_char) { + if (!cast_param.is_const_ && (j_base->json_type() == ObJsonNodeType::J_INT + || j_base->json_type() == ObJsonNodeType::J_UINT + || j_base->json_type() == ObJsonNodeType::J_BOOLEAN + || j_base->json_type() == ObJsonNodeType::J_DOUBLE + || j_base->json_type() == ObJsonNodeType::J_DECIMAL)) { + ret = OB_ERR_VALUE_EXCEEDED_MAX; + } else { + // bugfix: + // Q1:SELECT c1 ,jt.ww b_c1 FROM t1, json_table ( c2 columns( ww varchar2(2 char) truncate path '$.a')) jt ; + // Q2:SELECT c1 ,jt.ww b_c1 FROM t1, json_table ( c2 columns( ww varchar2(2 byte) truncate path '$.a')) jt; + // should not split in the middle of char + if (byte_len == 0) { // value has zero length + val.assign_ptr("", 0); + } else if (senmactics == LS_BYTE && cast_param.dst_coll_type_ != CS_TYPE_BINARY) { + int64_t char_len; // not used + // zero max_accuracy_len not allowed + byte_len = ObCharset::max_bytes_charpos(cast_param.dst_coll_type_, val.ptr(), str_len_char, max_accuracy_len, char_len); + if (byte_len == 0) { // buffer not enough for one bytes + ret = OB_OPERATE_OVERFLOW; + } else { + val.assign_ptr(val.ptr(), byte_len); + } + } else { + val.assign_ptr(val.ptr(), byte_len); + } + } + } else if (max_accuracy_len <= 0 || str_len_char > max_accuracy_len) { + ret = OB_OPERATE_OVERFLOW; + LOG_USER_ERROR(OB_OPERATE_OVERFLOW, "STRING", "json expr"); + } + } + if (OB_SUCC(ret) && ObCharType == cast_param.dst_type_ && CS_TYPE_BINARY == cast_param.dst_coll_type_) { // binary need padding + int64_t text_length = val.length(); + if (max_accuracy_len > text_length) { + int64_t padding_cnt = max_accuracy_len - text_length; + ObString padding_res; + if (OB_FAIL(ObJsonUtil::padding_char_for_cast(padding_cnt, cast_param.dst_coll_type_, *allocator, + padding_res))) { + LOG_WARN("padding char failed", K(ret), K(padding_cnt), K(cast_param.dst_coll_type_)); + } else { + int64_t padding_size = padding_res.length() + val.length(); + char *buf = reinterpret_cast(allocator->alloc(padding_size)); + if (OB_ISNULL(buf)) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("allocate memory failed", K(ret)); + } else { + MEMMOVE(buf, val.ptr(), val.length()); + MEMMOVE(buf + val.length(), padding_res.ptr(), padding_res.length()); + val.assign_ptr(buf, padding_size); + } + } + } + } + if (OB_SUCC(ret) && !cast_param.is_only_check_) { + res.set_string(val); + } + } + } + + return ret; +} + +bool ObJsonUtil::type_cast_to_string(ObString &json_string, + common::ObIAllocator *allocator, + ObEvalCtx &ctx, + ObIJsonBase *j_base, + common::ObAccuracy &accuracy) { + int res = 0; + ObDatum t_res; + uint8_t is_type_mismatch = false; + ObJsonCastParam cast_param(ObLongTextType, CS_TYPE_BINARY, CS_TYPE_BINARY, false); + res = cast_to_string(allocator, ctx, j_base, accuracy, cast_param, t_res, is_type_mismatch); + if (res == 0) { + json_string = t_res.get_string(); + } + return res == 0 ? true : false; +} + +int cast_to_datetime(common::ObIAllocator *allocator, + ObEvalCtx &ctx, + ObIJsonBase *j_base, + common::ObAccuracy &accuracy, + ObJsonCastParam &cast_param, + ObDatum &res, + uint8_t &is_type_mismatch) +{ + INIT_SUCC(ret); + UNUSED(cast_param); + ObString json_string; + int64_t val; + GET_SESSION() + { + oceanbase::common::ObTimeConvertCtx cvrt_ctx(session->get_timezone_info(), false); + if (OB_ISNULL(j_base)) { + ret = OB_ERR_NULL_VALUE; + LOG_WARN("json base is null", K(ret)); + } else if (lib::is_oracle_mode()) { + if (OB_FAIL(common_get_nls_format(session, ctx, cast_param.rt_expr_, ObDateTimeType, + true, + cvrt_ctx.oracle_nls_format_))) { + LOG_WARN("common_get_nls_format failed", K(ret)); + } else if (!j_base->is_json_date(j_base->json_type()) + && ObJsonUtil::type_cast_to_string(json_string, allocator, ctx, j_base, accuracy) && json_string.length() > 0) { + ObJsonString json_str(json_string.ptr(),json_string.length()); + if (CAST_FAIL(json_str.to_datetime(val, &cvrt_ctx))) { + is_type_mismatch = 1; + LOG_WARN("wrapper to datetime failed.", K(ret), K(*j_base)); + } + } else if (CAST_FAIL(j_base->to_datetime(val, &cvrt_ctx))) { + is_type_mismatch = 1; + LOG_WARN("wrapper to datetime failed.", K(ret), K(*j_base)); + } + if (OB_SUCC(ret) && CAST_FAIL(ObJsonUtil::datetime_scale_check(accuracy, val))) { + LOG_WARN("datetime_scale_check failed.", K(ret)); + } + } else { + if (CAST_FAIL(j_base->to_datetime(val, &cvrt_ctx))) { + LOG_WARN("wrapper to datetime failed.", K(ret), K(*j_base)); + } else if (CAST_FAIL(ObJsonUtil::datetime_scale_check(accuracy, val))) { + LOG_WARN("datetime_scale_check failed.", K(ret)); + } + } + } + if (cast_param.is_only_check_) { + } else if (ret == OB_ERR_NULL_VALUE) { + res.set_null(); + } else if (OB_SUCC(ret)) { + res.set_datetime(val); + } + return ret; +} + +int cast_to_timstamp(common::ObIAllocator *allocator, + ObEvalCtx &ctx, + ObIJsonBase *j_base, + common::ObAccuracy &accuracy, + ObJsonCastParam &cast_param, + ObDatum &res, + uint8_t &is_type_mismatch) +{ + INIT_SUCC(ret); + UNUSED(allocator); + ObOTimestampData out_val; + int64_t val; + oceanbase::common::ObTimeConvertCtx cvrt_ctx(NULL, cast_param.dst_type_ == ObTimestampType); + GET_SESSION() + { + if (OB_ISNULL(j_base)) { + ret = OB_ERR_NULL_VALUE; + LOG_WARN("json base is null", K(ret)); + } else { + cvrt_ctx.tz_info_ = session->get_timezone_info(); + if (lib::is_oracle_mode()) { + if (OB_FAIL(common_get_nls_format(session, ctx, cast_param.rt_expr_, ObDateTimeType, + true, + cvrt_ctx.oracle_nls_format_))) { + LOG_WARN("common_get_nls_format failed", K(ret)); + } + } + } + } + if (OB_SUCC(ret)) { + if (CAST_FAIL(j_base->to_datetime(val, &cvrt_ctx))) { + is_type_mismatch = 1; + LOG_WARN("wrapper to datetime failed.", K(ret), K(*j_base)); + } else if (cast_param.dst_type_ == ObTimestampType) { + out_val.time_us_ = val; + out_val.time_ctx_.tail_nsec_ = 0; + } else if (OB_FAIL(ObTimeConverter::odate_to_otimestamp(val, cvrt_ctx.tz_info_, cast_param.dst_type_, out_val))) { + is_type_mismatch = 1; + LOG_WARN("fail to timestamp_to_timestamp_tz", K(ret), K(val), K(cast_param.dst_type_)); + } + if (OB_SUCC(ret)) { + ObScale scale = accuracy.get_scale(); + if (OB_UNLIKELY(0 <= scale && scale < MAX_SCALE_FOR_ORACLE_TEMPORAL)) { + ObOTimestampData ot_data = ObTimeConverter::round_otimestamp(scale, out_val); + if (ObTimeConverter::is_valid_otimestamp(ot_data.time_us_, + static_cast(ot_data.time_ctx_.tail_nsec_))) { + out_val = ot_data; + } else { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("invalid otimestamp, set it null ", K(ot_data), K(scale), "orig_date", out_val); + } + } + } + if (OB_SUCC(ret) && (!cast_param.is_only_check_)) { + if (cast_param.dst_type_ == ObTimestampTZType) { + res.set_otimestamp_tz(out_val); + } else if (cast_param.dst_type_ == ObTimestampType) { + res.set_datetime(out_val.time_us_); + } else { + res.set_otimestamp_tiny(out_val); + } + } + } + return ret; +} + +int cast_to_date_time(common::ObIAllocator *allocator, + ObEvalCtx &ctx, + ObIJsonBase *j_base, + common::ObAccuracy &accuracy, + ObJsonCastParam &cast_param, + ObDatum &res, + uint8_t &is_type_mismatch) +{ + INIT_SUCC(ret); + if (cast_param.dst_type_ == oceanbase::common::ObTimestampType) { + ret = cast_to_timstamp(allocator, ctx, j_base, accuracy, cast_param, res, is_type_mismatch); + } else { + ret = cast_to_datetime(allocator, ctx, j_base, accuracy, cast_param, res, is_type_mismatch); + } + return ret; +} + +int cast_to_date(common::ObIAllocator *allocator, + ObEvalCtx &ctx, + ObIJsonBase *j_base, + common::ObAccuracy &accuracy, + ObJsonCastParam &cast_param, + ObDatum &res, + uint8_t &is_type_mismatch) +{ + INIT_SUCC(ret); + UNUSED(allocator); + UNUSED(cast_param); + UNUSED(ctx); + UNUSED(accuracy); + int32_t val; + if (OB_ISNULL(j_base)) { + ret = OB_ERR_NULL_VALUE; + LOG_WARN("json base is null", K(ret)); + } else if (j_base->json_type() == ObJsonNodeType::J_NULL) { + res.set_null(); + } else if (CAST_FAIL(j_base->to_date(val))) { + is_type_mismatch = 1; + LOG_WARN("wrapper to date failed.", K(ret), K(*j_base)); + ret = OB_OPERATE_OVERFLOW; + LOG_USER_ERROR(OB_OPERATE_OVERFLOW, "DATE", "json_value"); + } else if (!cast_param.is_only_check_) { + res.set_date(val); + } + + return ret; +} + +int cast_to_time(common::ObIAllocator *allocator, + ObEvalCtx &ctx, + ObIJsonBase *j_base, + common::ObAccuracy &accuracy, + ObJsonCastParam &cast_param, + ObDatum &res, + uint8_t &is_type_mismatch) +{ + INIT_SUCC(ret); + UNUSED(allocator); + UNUSED(ctx); + UNUSED(is_type_mismatch); + int64_t val = 0; + if (OB_ISNULL(j_base)) { + ret = OB_ERR_NULL_VALUE; + LOG_WARN("json base is null", K(ret)); + } else if (CAST_FAIL(j_base->to_time(val))) { + if (ret == OB_ERR_UNEXPECTED + && j_base->json_type() == ObJsonNodeType::J_INT + && cast_param.is_json_table_) { + ret = OB_SUCCESS; + int64_t in_val = j_base->get_int(); + if (OB_FAIL(ObTimeConverter::int_to_time(in_val, val))) { + LOG_WARN("int_to_time failed", K(ret), K(in_val), K(val)); + } + } else { + LOG_WARN("wrapper to time failed.", K(ret), K(*j_base)); + ret = OB_OPERATE_OVERFLOW; + LOG_USER_ERROR(OB_OPERATE_OVERFLOW, "TIME", "json_value"); + } + } + if (OB_FAIL(ret)) { + } else if (CAST_FAIL(ObJsonUtil::time_scale_check(accuracy, val))) { + LOG_WARN("time_scale_check failed.", K(ret)); + } else if (!cast_param.is_only_check_) { + res.set_time(val); + } + + return ret; +} + +int cast_to_year(common::ObIAllocator *allocator, + ObEvalCtx &ctx, + ObIJsonBase *j_base, + common::ObAccuracy &accuracy, + ObJsonCastParam &cast_param, + ObDatum &res, + uint8_t &is_type_mismatch) +{ + INIT_SUCC(ret); + UNUSED(allocator); + UNUSED(cast_param); + UNUSED(ctx); + UNUSED(accuracy); + UNUSED(is_type_mismatch); + // Compatible with mysql. + // There is no year type in json binary, it is store as a full int. + // For example, 1901 is stored as 1901, not 01. + // in mysql 8.0, json is converted to int first, then converted to year. + // However, json value returning as different behavior to cast expr. + int64_t int_val; + uint8_t val = 0; + const uint16 min_year = 1901; + const uint16 max_year = 2155; + + if (OB_ISNULL(j_base)) { + ret = OB_ERR_NULL_VALUE; + LOG_WARN("json base is null", K(ret)); + } else if (CAST_FAIL(j_base->to_int(int_val))) { + LOG_WARN("wrapper to year failed.", K(ret), K(*j_base)); + } else if ((lib::is_oracle_mode() || !cast_param.is_json_table_) + && (0 != int_val && (int_val < min_year || int_val > max_year))) { + // different with cast, if 0 < int val < 100, do not add base year + LOG_DEBUG("int out of year range", K(int_val)); + ret = OB_DATA_OUT_OF_RANGE; + } else if(CAST_FAIL(ObTimeConverter::int_to_year(int_val, val))) { + LOG_WARN("int to year failed.", K(ret), K(int_val)); + } else if (!cast_param.is_only_check_) { + res.set_year(val); + } + + return ret; +} + +int cast_to_float(common::ObIAllocator *allocator, + ObEvalCtx &ctx, + ObIJsonBase *j_base, + common::ObAccuracy &accuracy, + ObJsonCastParam &cast_param, + ObDatum &res, + uint8_t &is_type_mismatch) +{ + INIT_SUCC(ret); + UNUSED(allocator); + UNUSED(ctx); + UNUSED(accuracy); + UNUSED(is_type_mismatch); + double tmp_val; + float val = 0; + if (OB_ISNULL(j_base)) { + ret = OB_ERR_NULL_VALUE; + LOG_WARN("json base is null", K(ret)); + } else if (CAST_FAIL(j_base->to_double(tmp_val))) { + LOG_WARN("wrapper to date failed.", K(ret), K(*j_base)); + } else { + val = static_cast(tmp_val); + if (lib::is_mysql_mode() && CAST_FAIL(real_range_check(cast_param.dst_type_, tmp_val, val))) { + LOG_WARN("real_range_check failed", K(ret), K(tmp_val)); + } else if (!cast_param.is_only_check_) { + res.set_float(val); + } + } + + return ret; +} + +int cast_to_double(common::ObIAllocator *allocator, + ObEvalCtx &ctx, + ObIJsonBase *j_base, + common::ObAccuracy &accuracy, + ObJsonCastParam &cast_param, + ObDatum &res, + uint8_t &is_type_mismatch) +{ + INIT_SUCC(ret); + UNUSED(allocator); + UNUSED(ctx); + UNUSED(accuracy); + UNUSED(is_type_mismatch); + double val = 0; + if (OB_ISNULL(j_base)) { + ret = OB_ERR_NULL_VALUE; + LOG_WARN("json base is null", K(ret)); + } else if (CAST_FAIL(j_base->to_double(val))) { + LOG_WARN("wrapper to date failed.", K(ret), K(*j_base)); + } else if (ObUDoubleType == cast_param.dst_type_ && CAST_FAIL(numeric_negative_check(val))) { + LOG_WARN("numeric_negative_check failed", K(ret), K(val)); + } else if (!cast_param.is_only_check_) { + res.set_double(val); + } + + return ret; +} + +int ObJsonUtil::cast_to_number_type(common::ObIAllocator *allocator, + ObIJsonBase *j_base, + common::ObAccuracy &accuracy, + ObJsonCastParam &cast_param, + uint8_t &is_type_mismatch, + number::ObNumber &val) +{ + INIT_SUCC(ret); + if (OB_ISNULL(j_base)) { + ret = OB_ERR_NULL_VALUE; + LOG_WARN("json base is null", K(ret)); + } else if (CAST_FAIL(j_base->to_number(allocator, val))) { + is_type_mismatch = 1; + LOG_WARN("fail to cast json as decimal", K(ret)); + } else if (ObUNumberType == cast_param.dst_type_ && CAST_FAIL(numeric_negative_check(val))) { + LOG_WARN("numeric_negative_check failed", K(ret), K(val)); + } else if (CAST_FAIL(ObJsonUtil::number_range_check(accuracy, allocator, val))) { + LOG_WARN("number_range_check failed", K(ret), K(val)); + } + return ret; +} + +int cast_to_number(common::ObIAllocator *allocator, + ObEvalCtx &ctx, + ObIJsonBase *j_base, + common::ObAccuracy &accuracy, + ObJsonCastParam &cast_param, + ObDatum &res, + uint8_t &is_type_mismatch) +{ + INIT_SUCC(ret); + UNUSED(ctx); + number::ObNumber val; + if (OB_FAIL(ObJsonUtil::cast_to_number_type(allocator, j_base, accuracy, cast_param, is_type_mismatch, val))) { + LOG_WARN("failed to cast to number type failed.", K(ret)); + } else if (!cast_param.is_only_check_) { + res.set_number(val); + } + return ret; +} + +int cast_to_decimalint(common::ObIAllocator *allocator, + ObEvalCtx &ctx, + ObIJsonBase *j_base, + common::ObAccuracy &accuracy, + ObJsonCastParam &cast_param, + ObDatum &res, + uint8_t &is_type_mismatch) +{ + INIT_SUCC(ret); + // TODO:@xiaofeng.lby, modify this after support cast json into decimalint directly in json_decimalint + number::ObNumber temp_num; + ObDecimalInt *decint = nullptr; + int32_t int_bytes; + number::ObNumber val; + if (OB_FAIL(ObJsonUtil::cast_to_number_type(allocator, j_base, accuracy, cast_param, is_type_mismatch, val))) { + LOG_WARN("cast to number failed", K(ret)); + } else if (OB_FAIL(wide::from_number(val, *allocator, accuracy.scale_, decint, int_bytes))) { + LOG_WARN("cast number to decimal int failed", K(ret)); + } + if (OB_SUCC(ret)) { + const int len = wide::ObDecimalIntConstValue::get_int_bytes_by_precision(accuracy.precision_); + if (len < int_bytes) { + res.set_null(); + } else if (len > int_bytes) { + ObDecimalIntBuilder res_builder; + res_builder.from(decint, int_bytes); + res_builder.extend(len); + res.set_decimal_int(res_builder.get_decimal_int(), res_builder.get_int_bytes()); + } else { + res.set_decimal_int(decint, int_bytes); + } + } + return ret; +} + +int cast_to_bit(common::ObIAllocator *allocator, + ObEvalCtx &ctx, + ObIJsonBase *j_base, + common::ObAccuracy &accuracy, + ObJsonCastParam &cast_param, + ObDatum &res, + uint8_t &is_type_mismatch) +{ + INIT_SUCC(ret); + UNUSED(allocator); + UNUSED(ctx); + UNUSED(accuracy); + UNUSED(cast_param); + UNUSED(is_type_mismatch); + uint64_t val = 0; + int64_t int_val; + if (OB_ISNULL(j_base)) { + ret = OB_ERR_NULL_VALUE; + LOG_WARN("json base is null", K(ret)); + } else if (CAST_FAIL(j_base->to_int(int_val))) { + ret = OB_ERR_INVALID_JSON_VALUE_FOR_CAST; + LOG_WARN("fail get int from json", K(ret)); + } else { + val = static_cast(int_val); + if (OB_FAIL(ObJsonUtil::bit_length_check(accuracy, val))) { + LOG_WARN("fail to check bit range", K(ret)); + } else if (!cast_param.is_only_check_) { + res.set_uint(val); + } + } + + return ret; +} + +int cast_to_json(common::ObIAllocator *allocator, + ObEvalCtx &ctx, + ObIJsonBase *j_base, + common::ObAccuracy &accuracy, + ObJsonCastParam &cast_param, + ObDatum &res, + uint8_t &is_type_mismatch) +{ + INIT_SUCC(ret); + UNUSED(ctx); + UNUSED(cast_param); + ObString val; + if (OB_ISNULL(j_base)) { + ret = OB_ERR_NULL_VALUE; + LOG_WARN("json base is null", K(ret)); + } else if (CAST_FAIL(j_base->get_raw_binary(val, allocator))) { + is_type_mismatch = 1; + LOG_WARN("failed to get raw binary", K(ret)); + } else { + char *buf = static_cast(allocator->alloc(val.length())); + if (OB_UNLIKELY(buf == NULL)) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("fail to alloc memory for json array result", K(ret), K(val.length())); + } else { + MEMCPY(buf, val.ptr(), val.length()); + val.assign_ptr(buf, val.length()); + if (!cast_param.is_only_check_) { + res.set_string(val); + } + } + } + + return ret; +} + +int cast_not_expected(common::ObIAllocator *allocator, + ObEvalCtx &ctx, + ObIJsonBase *j_base, + common::ObAccuracy &accuracy, + ObJsonCastParam &cast_param, + ObDatum &res, + uint8_t &is_type_mismatch) +{ + int ret = OB_ERR_UNEXPECTED; + UNUSED(allocator); + UNUSED(ctx); + UNUSED(j_base); + UNUSED(accuracy); + UNUSED(res); + UNUSED(is_type_mismatch); + LOG_WARN("unexpected dst_type", K(cast_param.dst_type_)); + return ret; +} + +int ObJsonUtil::cast_to_res(common::ObIAllocator *allocator, + ObEvalCtx &ctx, + ObIJsonBase *j_base, + ObAccuracy &accuracy, + ObJsonCastParam &cast_param, + ObDatum &res, + uint8_t &is_type_mismatch) +{ + INIT_SUCC(ret); + ObJsonUtil::ObJsonCastSqlScalar cast_func_ = get_json_cast_func(cast_param.dst_type_); + if (OB_ISNULL(j_base)) { + res.set_null(); + } else if (OB_ISNULL(cast_func_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("eval func can not be null", K(ret)); + } else if (OB_FAIL(((ObJsonUtil::ObJsonCastSqlScalar)(cast_func_))(allocator, + ctx, j_base, accuracy, cast_param, + res, is_type_mismatch))) { + LOG_WARN("fail to deal json cast to sql scalar", K(ret)); + } + + LOG_DEBUG("finish cast_to_res.", K(ret), K(cast_param.dst_type_)); + + return ret; +} + +ObJsonUtil::ObJsonCastSqlScalar OB_JSON_CAST_SQL_EXPLICIT[ObMaxTC] = +{ + // ObNullTC = 0, // null + cast_to_null, + // ObIntTC = 1, // int8, int16, int24, int32, int64. + cast_to_int, + // ObUIntTC = 2, // uint8, uint16, uint24, uint32, uint64. + cast_to_uint, + // ObFloatTC = 3, // float, ufloat. + cast_to_float, + // ObDoubleTC = 4, // double, udouble. + cast_to_double, + // ObNumberTC = 5, // number, unumber. + cast_to_number, + // ObDateTimeTC = 6, // datetime, timestamp. + cast_to_date_time, + // ObDateTC = 7, // date + cast_to_date, + // ObTimeTC = 8, // time + cast_to_time, + // ObYearTC = 9, // year + cast_to_year, + // ObStringTC = 10, // varchar, char, varbinary, binary. + cast_to_string, + // ObExtendTC = 11, // extend + cast_not_expected, + // ObUnknownTC = 12, // unknown + cast_not_expected, + // ObTextTC = 13, // TinyText,MediumText, Text ,LongText, TinyBLOB,MediumBLOB, // BLOB ,LongBLOB + cast_to_string, + // ObBitTC = 14, // bit + cast_to_bit, + // ObEnumSetTC = 15, // enum, set + cast_not_expected, + // ObEnumSetInnerTC = 16, + cast_not_expected, + // ObOTimestampTC = 17, //timestamp with time zone + cast_to_timstamp, + // ObRawTC = 18, // raw + cast_to_string, + // ObIntervalTC = 19, //oracle interval type class include interval year to month and interval day to second + cast_not_expected, + // ObRowIDTC = 20, // oracle rowid typeclass, includes urowid and rowid + cast_not_expected, + // ObLobTC = 21, //oracle lob typeclass ObLobType not use + cast_not_expected, + // ObJsonTC = 22, // json type class + cast_to_json, + // ObGeometryTC = 23, // geometry type class + cast_not_expected, + // ObUserDefinedSQLTC = 24, // user defined type class in SQL + cast_not_expected, + // ObDecimalIntTC = 25, // decimal int class + cast_to_decimalint, +}; + +#undef CAST_FAIL +#undef GET_SESSION + +int ObJsonUtil::get_json_path(ObExpr* expr, + ObEvalCtx &ctx, + bool &is_null_result, + ObJsonParamCacheCtx *¶m_ctx, + common::ObIAllocator &temp_allocator, + bool &is_cover_by_error) +{ + INIT_SUCC(ret); + ObDatum *json_datum = NULL; + ObObjType type = expr->datum_meta_.type_; + ObJsonPathCache ctx_cache(&temp_allocator); + ObJsonPath* j_path = nullptr; + ObString j_path_text; + ObJsonPathCache* path_cache = path_cache = param_ctx->get_path_cache(); + // parse json path + if (!param_ctx->is_first_exec_ && param_ctx->is_json_path_const_ + && OB_NOT_NULL(path_cache)) { + } else { + type = expr->datum_meta_.type_; + if (OB_FAIL(expr->eval(ctx, json_datum))) { + is_cover_by_error = false; + LOG_WARN("eval json arg failed", K(ret)); + } else if (type == ObNullType || json_datum->is_null()) { + is_null_result = true; + } else if (!ob_is_string_type(type)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("input type error", K(type)); + } + + if OB_SUCC(ret) { + j_path_text = json_datum->get_string(); + if (OB_FAIL(ObJsonExprHelper::get_json_or_str_data(expr, ctx, temp_allocator, j_path_text, is_null_result))) { + LOG_WARN("fail to get real data.", K(ret), K(j_path_text)); + } else if (j_path_text.length() == 0) { // maybe input json doc is null type + is_null_result = true; + } else if (OB_FAIL(ObJsonExprHelper::convert_string_collation_type(expr->datum_meta_.cs_type_, + CS_TYPE_UTF8MB4_BIN, + &ctx.exec_ctx_.get_allocator(), + j_path_text, + j_path_text))) { + LOG_WARN("convert string memory failed", K(ret), K(j_path_text)); + } + path_cache = ((path_cache != NULL) ? path_cache : &ctx_cache); + } + } + if (OB_SUCC(ret)) { + if (OB_FAIL(ObJsonExprHelper::find_and_add_cache(path_cache, j_path, j_path_text, 0, true, param_ctx->is_json_path_const_))) { + is_cover_by_error = false; + if (lib::is_oracle_mode()) { + ret = OB_ERR_JSON_PATH_EXPRESSION_SYNTAX_ERROR; + LOG_USER_ERROR(OB_ERR_JSON_PATH_EXPRESSION_SYNTAX_ERROR, j_path_text.length(), j_path_text.ptr()); + } + } else { + param_ctx->json_param_.json_path_ = j_path; + } + } + return ret; +} + +int ObJsonUtil::get_json_doc(ObExpr *expr, + ObEvalCtx &ctx, + common::ObIAllocator &allocator, + ObIJsonBase*& j_base, + bool &is_null, bool & is_cover_by_error, + bool relax) +{ + INIT_SUCC(ret); + ObDatum *json_datum = NULL; + ObObjType val_type = expr->datum_meta_.type_; + ObCollationType cs_type = expr->datum_meta_.cs_type_; + + bool is_oracle = lib::is_oracle_mode(); + + if (OB_UNLIKELY(OB_FAIL(expr->eval(ctx, json_datum)))) { + is_cover_by_error = false; + LOG_WARN("eval json arg failed", K(ret)); + } else if (val_type == ObNullType || json_datum->is_null()) { + is_null = true; + } else if (val_type != ObJsonType && !ob_is_string_type(val_type)) { + ret = OB_ERR_INVALID_TYPE_FOR_OP; + LOG_WARN("input type error", K(val_type)); + } else if (lib::is_mysql_mode() && OB_FAIL(ObJsonExprHelper::ensure_collation(val_type, cs_type))) { + LOG_WARN("fail to ensure collation", K(ret), K(val_type), K(cs_type)); + } else { + ObString j_str; + if (OB_FAIL(ObJsonExprHelper::get_json_or_str_data(expr, ctx, allocator, j_str, is_null))) { + LOG_WARN("fail to get real data.", K(ret), K(j_str)); + } else if (is_null) { + } else { + ObJsonInType j_in_type = ObJsonExprHelper::get_json_internal_type(val_type); + ObJsonInType expect_type = j_in_type; + bool relax_json = (is_oracle && relax); + uint32_t parse_flag = relax_json ? ObJsonParser::JSN_RELAXED_FLAG : 0; + if (is_oracle && j_str.length() == 0) { + is_null = true; + } else if (OB_FAIL(ObJsonBaseFactory::get_json_base(&allocator, j_str, j_in_type, + expect_type, j_base, parse_flag))) { + LOG_WARN("fail to get json base", K(ret), K(j_in_type)); + if (ret == OB_ERR_JSON_OUT_OF_DEPTH) { + is_cover_by_error = false; + } else if (is_oracle) { + ret = OB_ERR_JSON_SYNTAX_ERROR; + } else { + ret = OB_ERR_INVALID_JSON_TEXT_IN_PARAM; + LOG_USER_ERROR(OB_ERR_INVALID_JSON_TEXT_IN_PARAM); + } + } else if (j_base->is_bin()) { + // only use json doc to search + static_cast(j_base)->set_seek_flag(true); + } + } + } + return ret; +} + +int func_upper_json_string(ObIJsonBase*& in, + bool &is_null_result, + common::ObIAllocator *allocator, + uint8_t &is_type_mismatch) +{ + UNUSED(is_null_result); + UNUSED(allocator); + UNUSED(is_type_mismatch); + if (((ObJsonString *)in)->get_is_null_to_str()) { + is_null_result = true; + } + return OB_SUCCESS; +} + +int func_conversion_fail(ObIJsonBase*& in, + bool &is_null_result, + common::ObIAllocator *allocator, + uint8_t &is_type_mismatch) +{ + INIT_SUCC(ret); + UNUSED(in); + UNUSED(allocator); + UNUSED(is_type_mismatch); + UNUSED(is_null_result); + ret = OB_ERR_CONVERSION_FAIL; + LOG_WARN("data seek fail", K(ret)); + return ret; +} + +int func_str_json_null(ObIJsonBase*& in, + bool &is_null_result, + common::ObIAllocator *allocator, + uint8_t &is_type_mismatch) +{ + INIT_SUCC(ret); + UNUSED(allocator); + if (!in->is_real_json_null(in)) { + ret = OB_INVALID_NUMERIC; + is_type_mismatch = 1; + LOG_WARN("string only function meet non-string data", K(ret)); + } + return ret; +} + +int func_num_json_null(ObIJsonBase*& in, + bool &is_null_result, + common::ObIAllocator *allocator, + uint8_t &is_type_mismatch) +{ + INIT_SUCC(ret); + UNUSED(allocator); + if (!in->is_real_json_null(in)) { + ret = OB_INVALID_NUMERIC; + is_type_mismatch = 1; + LOG_WARN("number only function meet non-number data", K(ret)); + } + return ret; +} + +int set_null_result(ObIJsonBase*& in, + bool &is_null_result, + common::ObIAllocator *allocator, + uint8_t &is_type_mismatch) +{ + UNUSED(in); + UNUSED(allocator); + UNUSED(is_type_mismatch); + is_null_result = true; + return OB_SUCCESS; +} + +int cast_succ(ObIJsonBase*& in, + bool &is_null_result, + common::ObIAllocator *allocator, + uint8_t &is_type_mismatch) +{ + UNUSED(in); + UNUSED(is_null_result); + UNUSED(allocator); + UNUSED(is_type_mismatch); + return OB_SUCCESS; +} + +int func_path_syntax_fail(ObIJsonBase*& in, + bool &is_null_result, + common::ObIAllocator *allocator, + uint8_t &is_type_mismatch) +{ + INIT_SUCC(ret); + UNUSED(in); + UNUSED(allocator); + UNUSED(is_type_mismatch); + UNUSED(is_null_result); + ret = OB_ERR_JSON_PATH_SYNTAX_ERROR; + LOG_WARN("boolean only function meet non-boolean data", K(ret)); + return ret; +} + +int func_bool_json_int(ObIJsonBase*& in, + bool &is_null_result, + common::ObIAllocator *allocator, + uint8_t &is_type_mismatch) +{ + INIT_SUCC(ret); + if (in->get_int() == 1 || in->get_int() == 0) { + bool is_true = (in->get_int() == 1); + ObJsonBoolean* tmp_ans = static_cast (allocator->alloc(sizeof(ObJsonBoolean))); + if (OB_ISNULL(tmp_ans)) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("allocate row buffer failed at ObJsonDecimal", K(ret)); + } else { + tmp_ans = new (tmp_ans) ObJsonBoolean(is_true); + in = tmp_ans; + } + } else { + is_null_result = true; + } + return ret; +} + +int func_bool_json_double(ObIJsonBase*& in, + bool &is_null_result, + common::ObIAllocator *allocator, + uint8_t &is_type_mismatch) +{ + INIT_SUCC(ret); + if (in->get_double() == 1.0 || in->get_double() == 0.0) { + bool is_true = (in->get_double() == 1.0); + ObJsonBoolean* tmp_ans = static_cast (allocator->alloc(sizeof(ObJsonBoolean))); + if (OB_ISNULL(tmp_ans)) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("allocate row buffer failed at ObJsonDecimal", K(ret)); + } else { + tmp_ans = new (tmp_ans) ObJsonBoolean(is_true); + in = tmp_ans; + } + } else { + is_null_result = true; + } + return ret; +} + +// 17 * 30 +ObJsonUtil::ObItemMethodValid OB_JSON_VALUE_ITEM_METHOD_CAST_FUNC[ObMaxItemMethod][ObMaxJsonType] = +{ + { + // abs -> ** + cast_succ, // j_null + cast_succ, // decimal + cast_succ, // int + cast_succ, // uint + cast_succ, // double + cast_succ, // string + cast_succ, //object + cast_succ, //array + cast_succ, // boolean + cast_succ, // date + cast_succ, //time + cast_succ, // datetime + cast_succ, //timestamp + cast_succ, // opaque + cast_succ, // empty + cast_succ, //ofloat + cast_succ, // odouble + cast_succ, // odeciaml + cast_succ, // oint + cast_succ, // olong + cast_succ, // obinary + cast_succ, // oid + cast_succ, //rawhex + cast_succ, // rawid + cast_succ, // oracledate + cast_succ, // odate + cast_succ, // otimestamp + cast_succ, // otimestamptz + cast_succ, // odaysecond + cast_succ, // oyearmonth + }, + { + // boolean - > *** + set_null_result, // null + set_null_result, // decimal + func_bool_json_int, // int + set_null_result, // uint + func_bool_json_double, // double + set_null_result, // string + set_null_result, //object + set_null_result, //array + cast_succ, // boolean + set_null_result, // date + set_null_result, //time + set_null_result, // datetime + set_null_result, //timestamp + set_null_result, // opaque + cast_succ, // empty + set_null_result, //ofloat + set_null_result, // odouble + set_null_result, // odeciaml + func_bool_json_int, // oint + set_null_result, // olong + set_null_result, // obinary + set_null_result, // oid + set_null_result, //rawhex + set_null_result, // rawid + set_null_result, // oracledate + set_null_result, // odate + set_null_result, // otimestamp + set_null_result, // otimestamptz + set_null_result, // odaysecond + set_null_result, // oyearmonth + }, + { + // bool_only -> *** + func_path_syntax_fail, // null + func_path_syntax_fail, // decimal + func_path_syntax_fail, // int + func_path_syntax_fail, // uint + func_path_syntax_fail, // double + func_path_syntax_fail, // string + func_path_syntax_fail, //object + func_path_syntax_fail, //array + cast_succ, // boolean + func_path_syntax_fail, // date + func_path_syntax_fail, //time + func_path_syntax_fail, // datetime + func_path_syntax_fail, //timestamp + func_path_syntax_fail, // opaque + cast_succ, // empty + func_path_syntax_fail, //ofloat + func_path_syntax_fail, // odouble + func_path_syntax_fail, // odeciaml + func_path_syntax_fail, // oint + func_path_syntax_fail, // olong + func_path_syntax_fail, // obinary + func_path_syntax_fail, // oid + func_path_syntax_fail, //rawhex + func_path_syntax_fail, // rawid + func_path_syntax_fail, // oracledate + func_path_syntax_fail, // odate + func_path_syntax_fail, // otimestamp + func_path_syntax_fail, // otimestamptz + func_path_syntax_fail, // odaysecond + func_path_syntax_fail, // oyearmonth + }, + { + // ceiling -> ** + cast_succ, // j_null + cast_succ, // decimal + cast_succ, // int + cast_succ, // uint + cast_succ, // double + cast_succ, // string + cast_succ, //object + cast_succ, //array + cast_succ, // boolean + cast_succ, // date + cast_succ, //time + cast_succ, // datetime + cast_succ, //timestamp + cast_succ, // opaque + cast_succ, // empty + cast_succ, //ofloat + cast_succ, // odouble + cast_succ, // odeciaml + cast_succ, // oint + cast_succ, // olong + cast_succ, // obinary + cast_succ, // oid + cast_succ, //rawhex + cast_succ, // rawid + cast_succ, // oracledate + cast_succ, // odate + cast_succ, // otimestamp + cast_succ, // otimestamptz + cast_succ, // odaysecond + cast_succ, // oyearmonth + }, + { + // date -> *** + func_conversion_fail, // j_null + func_conversion_fail, // decimal + func_conversion_fail, // int + func_conversion_fail, // uint + func_conversion_fail, // double + cast_succ, // string + func_conversion_fail, //object + func_conversion_fail, //array + func_conversion_fail, // boolean + cast_succ, // date + cast_succ, //time + cast_succ, // datetime + cast_succ, //timestamp + func_conversion_fail, // opaque + func_conversion_fail, // empty + func_conversion_fail, //ofloat + func_conversion_fail, // odouble + func_conversion_fail, // odeciaml + func_conversion_fail, // oint + func_conversion_fail, // olong + cast_succ, // obinary + cast_succ, // oid + cast_succ, //rawhex + cast_succ, // rawid + cast_succ, // oracledate + cast_succ, // odate + cast_succ, // otimestamp + cast_succ, // otimestamptz + cast_succ, // odaysecond + cast_succ, // oyearmonth + }, + { + // double -> *** + func_conversion_fail, // j_null + cast_succ, // decimal + cast_succ, // int + cast_succ, // uint + cast_succ, // double + func_conversion_fail, // string + func_conversion_fail, //object + func_conversion_fail, //array + func_conversion_fail, // boolean + func_conversion_fail, // date + func_conversion_fail, //time + func_conversion_fail, // datetime + func_conversion_fail, //timestamp + func_conversion_fail, // opaque + func_conversion_fail, // empty + cast_succ, //ofloat + cast_succ, // odouble + cast_succ, // odeciaml + cast_succ, // oint + cast_succ, // olong + func_conversion_fail, // obinary + func_conversion_fail, // oid + func_conversion_fail, //rawhex + func_conversion_fail, // rawid + func_conversion_fail, // oracledate + func_conversion_fail, // odate + func_conversion_fail, // otimestamp + func_conversion_fail, // otimestamptz + func_conversion_fail, // odaysecond + func_conversion_fail, // oyearmonth + }, + { + // floor ->** + cast_succ, // j_null + cast_succ, // decimal + cast_succ, // int + cast_succ, // uint + cast_succ, // double + cast_succ, // string + cast_succ, //object + cast_succ, //array + cast_succ, // boolean + cast_succ, // date + cast_succ, //time + cast_succ, // datetime + cast_succ, //timestamp + cast_succ, // opaque + cast_succ, // empty + cast_succ, //ofloat + cast_succ, // odouble + cast_succ, // odeciaml + cast_succ, // oint + cast_succ, // olong + cast_succ, // obinary + cast_succ, // oid + cast_succ, //rawhex + cast_succ, // rawid + cast_succ, // oracledate + cast_succ, // odate + cast_succ, // otimestamp + cast_succ, // otimestamptz + cast_succ, // odaysecond + cast_succ, // oyearmonth + }, + { + // length -> ** + cast_succ, // j_null + cast_succ, // decimal + cast_succ, // int + cast_succ, // uint + cast_succ, // double + cast_succ, // string + cast_succ, //object + cast_succ, //array + cast_succ, // boolean + cast_succ, // date + cast_succ, //time + cast_succ, // datetime + cast_succ, //timestamp + cast_succ, // opaque + cast_succ, // empty + cast_succ, //ofloat + cast_succ, // odouble + cast_succ, // odeciaml + cast_succ, // oint + cast_succ, // olong + cast_succ, // obinary + cast_succ, // oid + cast_succ, //rawhex + cast_succ, // rawid + cast_succ, // oracledate + cast_succ, // odate + cast_succ, // otimestamp + cast_succ, // otimestamptz + cast_succ, // odaysecond + cast_succ, // oyearmonth + }, + { + // lower -> *** + cast_succ, // j_null + cast_succ, // decimal + cast_succ, // int + cast_succ, // uint + cast_succ, // double + func_upper_json_string, // string + cast_succ, //object + cast_succ, //array + cast_succ, // boolean + cast_succ, // date + cast_succ, //time + cast_succ, // datetime + cast_succ, //timestamp + cast_succ, // opaque + cast_succ, // empty + cast_succ, //ofloat + cast_succ, // odouble + cast_succ, // odeciaml + cast_succ, // oint + cast_succ, // olong + cast_succ, // obinary + cast_succ, // oid + cast_succ, //rawhex + cast_succ, // rawid + cast_succ, // oracledate + cast_succ, // odate + cast_succ, // otimestamp + cast_succ, // otimestamptz + cast_succ, // odaysecond + cast_succ, // oyearmonth + }, + { + // number -> + func_num_json_null, // j_null + cast_succ, // decimal + cast_succ, // int + cast_succ, // uint + cast_succ, // double + cast_succ, // string + cast_succ, //object + cast_succ, //array + cast_succ, // boolean + cast_succ, // date + cast_succ, //time + cast_succ, // datetime + cast_succ, //timestamp + cast_succ, // opaque + cast_succ, // empty + cast_succ, //ofloat + cast_succ, // odouble + cast_succ, // odeciaml + cast_succ, // oint + cast_succ, // olong + cast_succ, // obinary + cast_succ, // oid + cast_succ, //rawhex + cast_succ, // rawid + cast_succ, // oracledate + cast_succ, // odate + cast_succ, // otimestamp + cast_succ, // otimestamptz + cast_succ, // odaysecond + cast_succ, // oyearmonth + }, + { + // num_only -> *** + func_num_json_null, // j_null + cast_succ, // decimal + cast_succ, // int + cast_succ, // uint + cast_succ, // double + cast_succ, // string + cast_succ, //object + cast_succ, //array + cast_succ, // boolean + cast_succ, // date + cast_succ, //time + cast_succ, // datetime + cast_succ, //timestamp + cast_succ, // opaque + cast_succ, // empty + cast_succ, //ofloat + cast_succ, // odouble + cast_succ, // odeciaml + cast_succ, // oint + cast_succ, // olong + cast_succ, // obinary + cast_succ, // oid + cast_succ, //rawhex + cast_succ, // rawid + cast_succ, // oracledate + cast_succ, // odate + cast_succ, // otimestamp + cast_succ, // otimestamptz + cast_succ, // odaysecond + cast_succ, // oyearmonth + }, + { + // size ->** + cast_succ, // j_null + cast_succ, // decimal + cast_succ, // int + cast_succ, // uint + cast_succ, // double + cast_succ, // string + cast_succ, //object + cast_succ, //array + cast_succ, // boolean + cast_succ, // date + cast_succ, //time + cast_succ, // datetime + cast_succ, //timestamp + cast_succ, // opaque + cast_succ, // empty + cast_succ, //ofloat + cast_succ, // odouble + cast_succ, // odeciaml + cast_succ, // oint + cast_succ, // olong + cast_succ, // obinary + cast_succ, // oid + cast_succ, //rawhex + cast_succ, // rawid + cast_succ, // oracledate + cast_succ, // odate + cast_succ, // otimestamp + cast_succ, // otimestamptz + cast_succ, // odaysecond + cast_succ, // oyearmonth + }, + { + // string-> *** + func_str_json_null, // j_null + cast_succ, // decimal + cast_succ, // int + cast_succ, // uint + cast_succ, // double + cast_succ, // string + cast_succ, //object + cast_succ, //array + cast_succ, // boolean + cast_succ, // date + cast_succ, //time + cast_succ, // datetime + cast_succ, //timestamp + cast_succ, // opaque + cast_succ, // empty + cast_succ, //ofloat + cast_succ, // odouble + cast_succ, // odeciaml + cast_succ, // oint + cast_succ, // olong + cast_succ, // obinary + cast_succ, // oid + cast_succ, //rawhex + cast_succ, // rawid + cast_succ, // oracledate + cast_succ, // odate + cast_succ, // otimestamp + cast_succ, // otimestamptz + cast_succ, // odaysecond + cast_succ, // oyearmonth + }, + { + // str_only -> *** + func_str_json_null, // j_null + cast_succ, // decimal + cast_succ, // int + cast_succ, // uint + cast_succ, // double + cast_succ, // string + cast_succ, //object + cast_succ, //array + cast_succ, // boolean + cast_succ, // date + cast_succ, //time + cast_succ, // datetime + cast_succ, //timestamp + cast_succ, // opaque + cast_succ, // empty + cast_succ, //ofloat + cast_succ, // odouble + cast_succ, // odeciaml + cast_succ, // oint + cast_succ, // olong + cast_succ, // obinary + cast_succ, // oid + cast_succ, //rawhex + cast_succ, // rawid + cast_succ, // oracledate + cast_succ, // odate + cast_succ, // otimestamp + cast_succ, // otimestamptz + cast_succ, // odaysecond + cast_succ, // oyearmonth + }, + { + // timestamp -> *** + func_conversion_fail, // j_null + func_conversion_fail, // decimal + func_conversion_fail, // int + func_conversion_fail, // uint + func_conversion_fail, // double + cast_succ, // string + func_conversion_fail, //object + func_conversion_fail, //array + func_conversion_fail, // boolean + cast_succ, // date + cast_succ, //time + cast_succ, // datetime + cast_succ, //timestamp + func_conversion_fail, // opaque + func_conversion_fail, // empty + func_conversion_fail, //ofloat + func_conversion_fail, // odouble + func_conversion_fail, // odeciaml + func_conversion_fail, // oint + func_conversion_fail, // olong + cast_succ, // obinary + cast_succ, // oid + cast_succ, //rawhex + cast_succ, // rawid + cast_succ, // oracledate + cast_succ, // odate + cast_succ, // otimestamp + cast_succ, // otimestamptz + cast_succ, // odaysecond + cast_succ, // oyearmonth + }, + { + // type ->** + cast_succ, // j_null + cast_succ, // decimal + cast_succ, // int + cast_succ, // uint + cast_succ, // double + cast_succ, // string + cast_succ, //object + cast_succ, //array + cast_succ, // boolean + cast_succ, // date + cast_succ, //time + cast_succ, // datetime + cast_succ, //timestamp + cast_succ, // opaque + cast_succ, // empty + cast_succ, //ofloat + cast_succ, // odouble + cast_succ, // odeciaml + cast_succ, // oint + cast_succ, // olong + cast_succ, // obinary + cast_succ, // oid + cast_succ, //rawhex + cast_succ, // rawid + cast_succ, // oracledate + cast_succ, // odate + cast_succ, // otimestamp + cast_succ, // otimestamptz + cast_succ, // odaysecond + cast_succ, // oyearmonth + }, + { + // upper -> *** + func_num_json_null, // j_null + cast_succ, // decimal + cast_succ, // int + cast_succ, // uint + cast_succ, // double + func_upper_json_string, // string + cast_succ, //object + cast_succ, //array + cast_succ, // boolean + cast_succ, // date + cast_succ, //time + cast_succ, // datetime + cast_succ, //timestamp + cast_succ, // opaque + cast_succ, // empty + cast_succ, //ofloat + cast_succ, // odouble + cast_succ, // odeciaml + cast_succ, // oint + cast_succ, // olong + cast_succ, // obinary + cast_succ, // oid + cast_succ, //rawhex + cast_succ, // rawid + cast_succ, // oracledate + cast_succ, // odate + cast_succ, // otimestamp + cast_succ, // otimestamptz + cast_succ, // odaysecond + cast_succ, // oyearmonth + }, +}; + +// ItemJsonCompare + +int OB_JSON_QUERY_ITEM_METHOD_NULL_OPTION[ObMaxItemMethod][ObMaxJsonType] = +{ + { + // abs -> ** + 0, // null + 0, // decimal + 0, // int + 0, // uint + 0, // double + 0, // string + 0, //object + 0, //array + 0, // boolean + 0, // date + 0, //time + 0, // datetime + 0, //timestamp + 0, // opaque + 0, // empty + 0, //ofloat + 0, // odouble + 0, // odeciaml + 0, // oint + 0, // olong + 0, // obinary + 0, // oid + 0, //rawhex + 0, // rawid + 0, // oracledate + 0, // odate + 0, // otimestamp + 0, // otimestamptz + 0, // odaysecond + 0, // oyearmonth + }, + { + // boolean - > *** + 1, // null + 1, // decimal + 1, // int + 1, // uint + 1, // double + 0, // string + 0, //object + 0, //array + 0, // boolean + 0, // date + 0, //time + 0, // datetime + 0, //timestamp + 0, // opaque + 0, // empty + 1, //ofloat + 1, // odouble + 1, // odeciaml + 1, // oint + 1, // olong + 0, // obinary + 0, // oid + 0, //rawhex + 0, // rawid + 0, // oracledate + 0, // odate + 0, // otimestamp + 0, // otimestamptz + 0, // odaysecond + 0, // oyearmonth + }, + { + // bool_only -> *** + 0, // null + 0, // decimal + 0, // int + 0, // uint + 0, // double + 0, // string + 0, //object + 0, //array + 0, // boolean + 0, // date + 0, //time + 0, // datetime + 0, //timestamp + 0, // opaque + 0, // empty + 0, //ofloat + 0, // odouble + 0, // odeciaml + 0, // oint + 0, // olong + 0, // obinary + 0, // oid + 0, //rawhex + 0, // rawid + 0, // oracledate + 0, // odate + 0, // otimestamp + 0, // otimestamptz + 0, // odaysecond + 0, // oyearmonth + }, + { + // ceiling -> ** + 0, // null + 0, // decimal + 0, // int + 0, // uint + 0, // double + 0, // string + 0, //object + 0, //array + 0, // boolean + 0, // date + 0, //time + 0, // datetime + 0, //timestamp + 0, // opaque + 0, // empty + 0, //ofloat + 0, // odouble + 0, // odeciaml + 0, // oint + 0, // olong + 0, // obinary + 0, // oid + 0, //rawhex + 0, // rawid + 0, // oracledate + 0, // odate + 0, // otimestamp + 0, // otimestamptz + 0, // odaysecond + 0, // oyearmonth + }, + { + // date -> *** + 1, // null + 1, // decimal + 1, // int + 1, // uint + 1, // double + 1, // string + 1, //object + 1, //array + 1, // boolean + 0, // date + 0, //time + 0, // datetime + 0, //timestamp + 1, // opaque + 1, // empty + 1, //ofloat + 1, // odouble + 1, // odeciaml + 1, // oint + 1, // olong + 1, // obinary + 1, // oid + 1, //rawhex + 1, // rawid + 0, // oracledate + 0, // odate + 0, // otimestamp + 0, // otimestamptz + 1, // odaysecond + 1, // oyearmonth + }, + { + // double -> *** + 0, // null + 0, // decimal + 0, // int + 0, // uint + 0, // double + 1, // string + 1, //object + 1, //array + 1, // boolean + 1, // date + 1, //time + 1, // datetime + 1, //timestamp + 1, // opaque + 1, // empty + 0, //ofloat + 0, // odouble + 0, // odeciaml + 0, // oint + 0, // olong + 1, // obinary + 1, // oid + 1, //rawhex + 1, // rawid + 1, // oracledate + 1, // odate + 1, // otimestamp + 1, // otimestamptz + 1, // odaysecond + 1, // oyearmonth + }, + { + // floor ->** + 0, // null + 0, // decimal + 0, // int + 0, // uint + 0, // double + 0, // string + 0, //object + 0, //array + 0, // boolean + 0, // date + 0, //time + 0, // datetime + 0, //timestamp + 0, // opaque + 0, // empty + 0, //ofloat + 0, // odouble + 0, // odeciaml + 0, // oint + 0, // olong + 0, // obinary + 0, // oid + 0, //rawhex + 0, // rawid + 0, // oracledate + 0, // odate + 0, // otimestamp + 0, // otimestamptz + 0, // odaysecond + 0, // oyearmonth + }, + { + // length -> ** + 1, // null + 1, // decimal + 1, // int + 0, // uint + 1, // double + 1, // string + 1, //object + 1, //array + 1, // boolean + 1, // date + 1, //time + 1, // datetime + 1, //timestamp + 1, // opaque + 1, // empty + 1, //ofloat + 1, // odouble + 1, // odeciaml + 1, // oint + 1, // olong + 1, // obinary + 1, // oid + 1, //rawhex + 1, // rawid + 1, // oracledate + 1, // odate + 1, // otimestamp + 1, // otimestamptz + 1, // odaysecond + 1, // oyearmonth + }, + { + // lower -> *** + 0, // null + 0, // decimal + 0, // int + 0, // uint + 0, // double + 0, // string + 1, //object + 1, //array + 0, // boolean + 0, // date + 0, //time + 0, // datetime + 0, //timestamp + 0, // opaque + 0, // empty + 0, //ofloat + 0, // odouble + 0, // odeciaml + 0, // oint + 0, // olong + 0, // obinary + 0, // oid + 0, //rawhex + 0, // rawid + 0, // oracledate + 0, // odate + 0, // otimestamp + 0, // otimestamptz + 0, // odaysecond + 0, // oyearmonth + }, + { + // number -> + 0, // null + 0, // decimal + 0, // int + 0, // uint + 0, // double + 1, // string + 1, //object + 1, //array + 1, // boolean + 1, // date + 1, //time + 1, // datetime + 1, //timestamp + 1, // opaque + 1, // empty + 0, //ofloat + 0, // odouble + 0, // odeciaml + 0, // oint + 0, // olong + 1, // obinary + 1, // oid + 1, //rawhex + 1, // rawid + 1, // oracledate + 1, // odate + 1, // otimestamp + 1, // otimestamptz + 1, // odaysecond + 1, // oyearmonth + }, + { + // num_only -> *** + 0, // null + 0, // decimal + 0, // int + 0, // uint + 0, // double + 1, // string + 1, //object + 1, //array + 1, // boolean + 1, // date + 1, //time + 1, // datetime + 1, //timestamp + 1, // opaque + 1, // empty + 0, //ofloat + 0, // odouble + 0, // odeciaml + 0, // oint + 0, // olong + 1, // obinary + 1, // oid + 1, //rawhex + 1, // rawid + 1, // oracledate + 1, // odate + 1, // otimestamp + 1, // otimestamptz + 1, // odaysecond + 1, // oyearmonth + }, + { + // size ->** + 0, // j_null + 0, // decimal + 0, // int + 0, // uint + 0, // double + 0, // string + 0, //object + 0, //array + 0, // boolean + 0, // date + 0, //time + 0, // datetime + 0, //timestamp + 0, // opaque + 0, // empty + 0, //ofloat + 0, // odouble + 0, // odeciaml + 0, // oint + 0, // olong + 0, // obinary + 0, // oid + 0, //rawhex + 0, // rawid + 0, // oracledate + 0, // odate + 0, // otimestamp + 0, // otimestamptz + 0, // odaysecond + 0, // oyearmonth + }, + { + // string-> *** + 0, // null + 0, // decimal + 0, // int + 0, // uint + 0, // double + 0, // string + 1, //object + 1, //array + 0, // boolean + 0, // date + 0, //time + 0, // datetime + 0, //timestamp + 0, // opaque + 0, // empty + 0, //ofloat + 0, // odouble + 0, // odeciaml + 0, // oint + 0, // olong + 0, // obinary + 0, // oid + 0, //rawhex + 0, // rawid + 0, // oracledate + 0, // odate + 0, // otimestamp + 0, // otimestamptz + 0, // odaysecond + 0, // oyearmonth + }, + { + // str_only -> *** + 0, // null + 0, // decimal + 0, // int + 0, // uint + 0, // double + 0, // string + 1, //object + 1, //array + 0, // boolean + 0, // date + 0, //time + 0, // datetime + 0, //timestamp + 0, // opaque + 0, // empty + 0, //ofloat + 0, // odouble + 0, // odeciaml + 0, // oint + 0, // olong + 0, // obinary + 0, // oid + 0, //rawhex + 0, // rawid + 0, // oracledate + 0, // odate + 0, // otimestamp + 0, // otimestamptz + 0, // odaysecond + 0, // oyearmonth + }, + { + // timestamp -> *** + 1, // null + 1, // decimal + 1, // int + 1, // uint + 1, // double + 1, // string + 1, //object + 1, //array + 1, // boolean + 0, // date + 0, //time + 0, // datetime + 0, //timestamp + 1, // opaque + 1, // empty + 1, //ofloat + 1, // odouble + 1, // odeciaml + 1, // oint + 1, // olong + 1, // obinary + 1, // oid + 1, //rawhex + 1, // rawid + 0, // oracledate + 0, // odate + 0, // otimestamp + 0, // otimestamptz + 1, // odaysecond + 1, // oyearmonth + }, + { + // type ->** + 0, // j_null + 0, // decimal + 0, // int + 0, // uint + 0, // double + 0, // string + 0, //object + 0, //array + 0, // boolean + 0, // date + 0, //time + 0, // datetime + 0, //timestamp + 0, // opaque + 0, // empty + 0, //ofloat + 0, // odouble + 0, // odeciaml + 0, // oint + 0, // olong + 0, // obinary + 0, // oid + 0, //rawhex + 0, // rawid + 0, // oracledate + 0, // odate + 0, // otimestamp + 0, // otimestamptz + 0, // odaysecond + 0, // oyearmonth + }, + { + // upper -> *** + 0, // null + 0, // decimal + 0, // int + 0, // uint + 0, // double + 0, // string + 1, //object + 1, //array + 0, // boolean + 0, // date + 0, //time + 0, // datetime + 0, //timestamp + 0, // opaque + 0, // empty + 0, //ofloat + 0, // odouble + 0, // odeciaml + 0, // oint + 0, // olong + 0, // obinary + 0, // oid + 0, //rawhex + 0, // rawid + 0, // oracledate + 0, // odate + 0, // otimestamp + 0, // otimestamptz + 0, // odaysecond + 0, // oyearmonth + }, +}; + +int ObJsonUtil::get_query_item_method_null_option(ObJsonPath* j_path, + ObIJsonBase* j_base) +{ + size_t item_method = static_cast(j_path->get_last_node_type()); + size_t json_type = static_cast(j_base->json_type()); + // first item method pos is JPN_ABS + return OB_JSON_QUERY_ITEM_METHOD_NULL_OPTION[item_method - ObJsonPathNodeType::JPN_ABS][json_type]; +} + +ObJsonUtil::ObItemMethodValid ObJsonUtil::get_item_method_cast_res_func(ObJsonPath* j_path, + ObIJsonBase* j_base) +{ + INIT_SUCC(ret); + size_t item_method = static_cast(j_path->get_last_node_type()); + size_t json_type = static_cast(j_base->json_type()); + // first item method pos is 13 + return OB_JSON_VALUE_ITEM_METHOD_CAST_FUNC[item_method - ObJsonPathNodeType::JPN_ABS][json_type]; +} + +ObJsonUtil::ObJsonCastSqlScalar ObJsonUtil::get_json_cast_func(ObObjType dst_type) +{ + return OB_JSON_CAST_SQL_EXPLICIT[OBJ_TYPE_TO_CLASS[dst_type]]; +} + +bool ObJsonUtil::is_number_item_method(ObJsonPath* j_path) +{ + return j_path->get_last_node_type() == JPN_NUMBER + || j_path->get_last_node_type() == JPN_NUM_ONLY + || j_path->get_last_node_type() == JPN_LENGTH + || j_path->get_last_node_type() == JPN_TYPE + || j_path->get_last_node_type() == JPN_SIZE; +} + +} // sql +} // oceanbase diff --git a/src/sql/engine/expr/ob_expr_json_utils.h b/src/sql/engine/expr/ob_expr_json_utils.h new file mode 100644 index 0000000000..20e478b140 --- /dev/null +++ b/src/sql/engine/expr/ob_expr_json_utils.h @@ -0,0 +1,212 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + * This file is for implement of func json expr helper + */ + +#ifndef OCEANBASE_SQL_OB_EXPR_JSON_UTILS_H_ +#define OCEANBASE_SQL_OB_EXPR_JSON_UTILS_H_ + +#include "sql/engine/expr/ob_expr_util.h" +#include "sql/engine/expr/ob_expr_lob_utils.h" +#include "share/object/ob_obj_cast.h" +#include "objit/common/ob_item_type.h" +#include "sql/session/ob_sql_session_info.h" +#include "lib/json_type/ob_json_tree.h" +#include "lib/json_type/ob_json_base.h" +#include "lib/json_type/ob_json_bin.h" +#include "lib/json_type/ob_json_path.h" +#include "lib/json_type/ob_json_parse.h" +#include "sql/engine/expr/ob_expr_result_type_util.h" +#include "sql/engine/expr/ob_datum_cast.h" +#include "ob_expr_json_func_helper.h" + +using namespace oceanbase::common; + +namespace oceanbase +{ +namespace sql +{ + +const size_t ObMaxItemMethod = ObJsonPathNodeType::JPN_END_FUNC_FLAG - ObJsonPathNodeType::JPN_BEGIN_FUNC_FLAG - 1; +const size_t ObMaxJsonType = (size_t)ObJsonNodeType::J_MAX_TYPE; +// from sql_parser_base.h +#define DEFAULT_STR_LENGTH -1 + +/* process on mismatch { error : 0, null : 1, ignore : 2 }*/ +typedef enum JsnValueMisMatch { + OB_JSON_ON_MISMATCH_ERROR, // 0 + OB_JSON_ON_MISMATCH_NULL, // 1 + OB_JSON_ON_MISMATCH_IGNORE, // 2 + OB_JSON_ON_MISMATCH_IMPLICIT, // 3 + OB_JSON_TYPE_MISSING_DATA, // 4 + OB_JSON_TYPE_EXTRA_DATA, // 5 + OB_JSON_TYPE_TYPE_ERROR, // 6 + OB_JSON_TYPE_IMPLICIT, // 7 + OB_JSON_TYPE_DOT, // 8 +} JsnValueMisMatch; + +struct ObJsonCastParam { + ObJsonCastParam(ObObjType dst_type, ObCollationType in_coll_type, ObCollationType dst_coll_type, int8_t ascii_type) : + in_coll_type_(in_coll_type), + dst_coll_type_(dst_coll_type), + dst_type_(dst_type), + ascii_type_(ascii_type), + is_quote_(false), + is_const_(false), + is_trunc_(false), + is_pretty_(false), + is_only_check_(false), + is_json_table_(false), + rt_expr_(nullptr) + {} + ~ObJsonCastParam() {} + ObCollationType in_coll_type_; + ObCollationType dst_coll_type_; + ObObjType dst_type_; + int8_t ascii_type_; + bool is_quote_; + bool is_const_; + bool is_trunc_; + bool is_pretty_; + bool is_only_check_; // only check cast, not set result + bool is_json_table_; // cast mode for json table. + const ObExpr *rt_expr_; // get nls format expr +}; + +struct ObExprJsonQueryParamInfo : public ObIExprExtraInfo +{ + OB_UNIS_VERSION(1); +public: + ObExprJsonQueryParamInfo(common::ObIAllocator &alloc, ObExprOperatorType type) + : ObIExprExtraInfo(alloc, type), allocator_(alloc), truncate_(0), + format_json_(0), wrapper_(0), empty_type_(0), error_type_(0), + pretty_type_(0), ascii_type_(0), scalars_type_(0), + path_str_(), on_mismatch_(alloc), + on_mismatch_type_(alloc), j_path_(nullptr) + { + } + virtual int deep_copy(common::ObIAllocator &allocator, + const ObExprOperatorType type, + ObIExprExtraInfo *&copied_info) const override; + int init_jsn_val_expr_param(ObIAllocator &alloc, ObExprCGCtx &op_cg_ctx, const ObRawExpr* raw_expr); + int init_jsn_query_expr_param(ObIAllocator &alloc, ObExprCGCtx &op_cg_ctx, const ObRawExpr* raw_expr); + int get_int_val_from_raw(ObIAllocator &alloc, ObExecContext *exec_ctx, const ObRawExpr* raw_expr, ObObj &const_data); + int init_mismatch_array(const ObRawExpr* raw_expr, + ObExecContext *exec_ctx); + int parse_json_path(ObString path_str, ObJsonPath*& j_path_); + + common::ObIAllocator &allocator_; + int8_t truncate_; + int8_t format_json_; + int8_t wrapper_; + int8_t empty_type_; + int8_t error_type_; + int8_t pretty_type_; + int8_t ascii_type_; + int8_t scalars_type_; + ObString path_str_; + common::ObFixedArray on_mismatch_; + common::ObFixedArray on_mismatch_type_; + ObJsonPath *j_path_; +}; + +class ObJsonUtil final +{ +public: + static int get_query_item_method_null_option(ObJsonPath* j_path, + ObIJsonBase* j_base); + static int cast_to_res(common::ObIAllocator *allocator, + ObEvalCtx &ctx, + ObIJsonBase *j_base, + common::ObAccuracy &accuracy, + ObJsonCastParam &cast_param, + ObDatum &res, + uint8_t &is_type_mismatch); + static int set_lob_datum(common::ObIAllocator *allocator, + const ObExpr &expr, + ObEvalCtx &ctx, + ObObjType dst_type, + uint8_t ascii_type, + ObDatum &res); + static int time_scale_check(const common::ObAccuracy &accuracy, int64_t &value, + bool strict = false); + static bool type_cast_to_string(ObString &json_string, + common::ObIAllocator *allocator, + ObEvalCtx &ctx, + ObIJsonBase *j_base, + common::ObAccuracy &accuracy); + static int cast_to_number_type(common::ObIAllocator *allocator, + ObIJsonBase *j_base, + common::ObAccuracy &accuracy, + ObJsonCastParam &cast_param, + uint8_t &is_type_mismatch, + number::ObNumber &val); + typedef int (*ObItemMethodValid)(ObIJsonBase*& in, + bool &is_null_result, + common::ObIAllocator *allocator, + uint8_t &is_type_mismatch); + typedef int (*ObJsonCastSqlScalar)(common::ObIAllocator *allocator, + ObEvalCtx &ctx, + ObIJsonBase *j_base, + common::ObAccuracy &accuracy, + ObJsonCastParam &cast_param, + ObDatum &res, + uint8_t &is_type_mismatch); + static int get_accuracy_internal(common::ObAccuracy &accuracy, + ObEvalCtx& ctx, + ObObjType &dest_type, + const int64_t value, + const ObLengthSemantics &length_semantics); + static int get_accuracy(const ObExpr &expr, + ObEvalCtx& ctx, + common::ObAccuracy &accuracy, + ObObjType &dest_type, + bool &is_cover_by_error); + static int number_range_check(const common::ObAccuracy &accuracy, + ObIAllocator *allocator, + number::ObNumber &val, + bool strict = false); + static int datetime_scale_check(const common::ObAccuracy &accuracy, + int64_t &value, + bool strict = false); + static ObJsonUtil::ObItemMethodValid get_item_method_cast_res_func(ObJsonPath* j_path, + ObIJsonBase* j_base); + static ObJsonUtil::ObJsonCastSqlScalar get_json_cast_func(ObObjType dst_type); + static int get_json_path(ObExpr* expr, + ObEvalCtx &ctx, + bool &is_null_result, + ObJsonParamCacheCtx *¶m_ctx, + common::ObIAllocator &temp_allocator, + bool &is_cover_by_error); + static int get_json_doc(ObExpr *expr, + ObEvalCtx &ctx, + common::ObIAllocator &allocator, + ObIJsonBase*& j_base, + bool &is_null, bool & is_cover_by_error, + bool relax = false); + static bool is_number_item_method(ObJsonPath* j_path); + static int bit_length_check(const ObAccuracy &accuracy, + uint64_t &value); + static int padding_char_for_cast(int64_t padding_cnt, + const ObCollationType &padding_cs_type, + ObIAllocator &alloc, + ObString &padding_res); + static int set_mismatch_val(ObIArray& val, ObIArray& type, int64_t& opt_val, uint32_t& pos); + static int init_json_path(ObIAllocator &alloc, ObExprCGCtx &op_cg_ctx, + const ObRawExpr* path, + ObExprJsonQueryParamInfo& res); +private: + DISALLOW_COPY_AND_ASSIGN(ObJsonUtil); +}; + +} // sql +} // oceanbase +#endif // OCEANBASE_SQL_OB_EXPR_JSON_UTILS_H_ \ No newline at end of file diff --git a/src/sql/engine/expr/ob_expr_json_value.cpp b/src/sql/engine/expr/ob_expr_json_value.cpp index 33b53724ed..62ae68fa19 100644 --- a/src/sql/engine/expr/ob_expr_json_value.cpp +++ b/src/sql/engine/expr/ob_expr_json_value.cpp @@ -18,15 +18,14 @@ #include "sql/session/ob_sql_session_info.h" #include "share/object/ob_obj_cast_util.h" #include "share/object/ob_obj_cast.h" +#include "share/ob_json_access_utils.h" #include "sql/engine/expr/ob_expr_cast.h" #include "sql/engine/expr/ob_datum_cast.h" #include "sql/resolver/expr/ob_raw_expr_util.h" #include "lib/oblog/ob_log_module.h" #include "ob_expr_json_func_helper.h" #include "lib/charset/ob_charset.h" - -// from sql_parser_base.h -#define DEFAULT_STR_LENGTH -1 +#include "ob_expr_json_utils.h" using namespace oceanbase::common; using namespace oceanbase::sql; @@ -36,17 +35,6 @@ namespace oceanbase namespace sql { -#define CAST_FAIL(stmt) \ - (OB_UNLIKELY((OB_SUCCESS != (ret = get_cast_ret((stmt)))))) - -#define GET_SESSION() \ - ObBasicSessionInfo *session = ctx.exec_ctx_.get_my_session(); \ - if (OB_ISNULL(session)) { \ - ret = OB_ERR_UNEXPECTED; \ - LOG_WARN("session is NULL", K(ret)); \ - } else - - ObExprJsonValue::ObExprJsonValue(ObIAllocator &alloc) : ObFuncExprOperator(alloc, T_FUN_SYS_JSON_VALUE, N_JSON_VALUE, MORE_THAN_TWO, VALID_FOR_GENERATED_COL, NOT_ROW_DIMENSION) { @@ -70,38 +58,9 @@ int ObExprJsonValue::calc_result_typeN(ObExprResType& type, bool is_oracle_mode = lib::is_oracle_mode(); //type.set_json(); // json doc : 0 - ObObjType doc_type = types_stack[JSN_VAL_DOC].get_type(); - if (types_stack[JSN_VAL_DOC].get_type() == ObNullType) { - } else if (!ObJsonExprHelper::is_convertible_to_json(doc_type)) { - if (lib::is_oracle_mode()) { - ret = OB_ERR_INVALID_TYPE_FOR_OP; - LOG_USER_ERROR(OB_ERR_INVALID_TYPE_FOR_OP, ob_obj_type_str(types_stack[JSN_VAL_DOC].get_type()), "JSON"); - } else { - ret = OB_ERR_INVALID_TYPE_FOR_JSON; - LOG_USER_ERROR(OB_ERR_INVALID_TYPE_FOR_JSON, 1, "json_value"); - LOG_WARN("Invalid type for json doc", K(doc_type), K(ret)); - } - } else if (ob_is_string_type(doc_type)) { - if (is_oracle_mode) { - if (types_stack[JSN_VAL_DOC].get_collation_type() == CS_TYPE_BINARY) { - types_stack[JSN_VAL_DOC].set_calc_collation_type(CS_TYPE_BINARY); - } else if (types_stack[JSN_VAL_DOC].get_charset_type() != CHARSET_UTF8MB4) { - types_stack[JSN_VAL_DOC].set_calc_collation_type(CS_TYPE_UTF8MB4_BIN); - } - } else { - if (types_stack[JSN_VAL_DOC].get_collation_type() == CS_TYPE_BINARY) { - // unsuport string type with binary charset - ret = OB_ERR_INVALID_JSON_CHARSET; - LOG_WARN("Unsupport for string type with binary charset input.", K(ret), K(doc_type)); - } else if (types_stack[JSN_VAL_DOC].get_charset_type() != CHARSET_UTF8MB4) { - types_stack[JSN_VAL_DOC].set_calc_collation_type(CS_TYPE_UTF8MB4_BIN); - } - } - } else if (doc_type == ObJsonType) { - // do nothing - } else { - types_stack[JSN_VAL_DOC].set_calc_type(ObLongTextType); - types_stack[JSN_VAL_DOC].set_calc_collation_type(CS_TYPE_UTF8MB4_BIN); + bool is_json_input = false; + if (OB_FAIL(ObExprJsonValue::calc_input_type(types_stack[0], is_json_input))) { + LOG_WARN("fail to calc input type", K(ret)); } // json path : 1 @@ -124,80 +83,42 @@ int ObExprJsonValue::calc_result_typeN(ObExprResType& type, // returning type : 2 ObExprResType dst_type; if (OB_SUCC(ret)) { - if (OB_FAIL(get_cast_type(types_stack[JSN_VAL_RET], dst_type, type_ctx))) { + if (OB_FAIL(ObJsonExprHelper::get_cast_type(types_stack[JSN_VAL_RET], dst_type, type_ctx))) { LOG_WARN("get cast dest type failed", K(ret)); - } else if (OB_FAIL(set_dest_type(types_stack[JSN_VAL_DOC], type, dst_type, type_ctx))) { + } else if (OB_FAIL(ObJsonExprHelper::set_dest_type(types_stack[JSN_VAL_DOC], type, dst_type, type_ctx))) { LOG_WARN("set dest type failed", K(ret)); } else { type.set_calc_collation_type(type.get_collation_type()); } } - - // ascii 3 + // truncate 3 + if (OB_SUCC(ret)) { + if (types_stack[JSN_VAL_TRUNC].get_type() == ObNullType) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("param type is unexpected", K(types_stack[JSN_VAL_TRUNC].get_type()), K(JSN_VAL_TRUNC)); + } else if (types_stack[JSN_VAL_TRUNC].get_type() != ObIntType) { + types_stack[JSN_VAL_TRUNC].set_calc_type(ObIntType); + } + } + // ascii 4 if (OB_SUCC(ret) && is_oracle_mode) { - if (OB_FAIL(ObJsonExprHelper::parse_asc_option(types_stack[3], types_stack[0], type, type_ctx))) { + if (OB_FAIL(ObJsonExprHelper::parse_asc_option(types_stack[JSN_VAL_ASCII], types_stack[JSN_VAL_DOC], type, type_ctx))) { LOG_WARN("fail to parse asc option.", K(ret)); } } - - // empty : 4, 5, 6 + // empty : 5, 6 if (OB_SUCC(ret)) { - ObExprResType temp_type; - if (types_stack[JSN_VAL_EMPTY].get_type() == ObNullType) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN(" param type is unexpected", K(types_stack[JSN_VAL_EMPTY].get_type())); - } else if (types_stack[JSN_VAL_EMPTY].get_type() != ObIntType) { - types_stack[JSN_VAL_EMPTY].set_calc_type(ObIntType); - } else if (types_stack[JSN_VAL_EMPTY_DEF].get_type() == ObNullType) { - // do nothing - } else if (OB_FAIL(set_dest_type(types_stack[JSN_VAL_EMPTY_DEF], temp_type, dst_type, type_ctx))) { - LOG_WARN("set dest type failed", K(ret)); - } else { - types_stack[JSN_VAL_EMPTY_DEF].set_calc_type(temp_type.get_type()); - types_stack[JSN_VAL_EMPTY_DEF].set_calc_collation_type(temp_type.get_collation_type()); - types_stack[JSN_VAL_EMPTY_DEF].set_calc_collation_level(temp_type.get_collation_level()); - types_stack[JSN_VAL_EMPTY_DEF].set_calc_accuracy(temp_type.get_accuracy()); - } - if (types_stack[JSN_VAL_EMPTY_DEF_PRE].get_type() == ObNullType) { - // do nothing - } else { - types_stack[JSN_VAL_EMPTY_DEF_PRE].set_calc_type(types_stack[JSN_VAL_EMPTY_DEF_PRE].get_type()); - types_stack[JSN_VAL_EMPTY_DEF_PRE].set_calc_collation_type(types_stack[JSN_VAL_EMPTY_DEF_PRE].get_collation_type()); - types_stack[JSN_VAL_EMPTY_DEF_PRE].set_calc_collation_level(types_stack[JSN_VAL_EMPTY_DEF_PRE].get_collation_level()); - types_stack[JSN_VAL_EMPTY_DEF_PRE].set_calc_accuracy(types_stack[JSN_VAL_EMPTY_DEF_PRE].get_accuracy()); + if (OB_FAIL(calc_empty_error_type(types_stack, JSN_VAL_EMPTY, dst_type, type_ctx))) { + LOG_WARN("fail to parse empty value", K(ret)); } } - - // error : 7, 8,9 + // error : 7, 8 if (OB_SUCC(ret)) { - ObExprResType temp_type; - if (types_stack[JSN_VAL_ERROR].get_type() == ObNullType) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN(" param type is unexpected", K(types_stack[JSN_VAL_ERROR].get_type())); - } else if (types_stack[JSN_VAL_ERROR].get_type() != ObIntType) { - types_stack[JSN_VAL_ERROR].set_calc_type(ObIntType); - } else if (types_stack[JSN_VAL_ERROR_DEF].get_type() == ObNullType) { - // do nothing - } else if (OB_FAIL(set_dest_type(types_stack[JSN_VAL_ERROR_DEF], temp_type, dst_type, type_ctx))) { - LOG_WARN("set dest type failed", K(ret)); - } else { - types_stack[JSN_VAL_ERROR_DEF].set_calc_type(temp_type.get_type()); - types_stack[JSN_VAL_ERROR_DEF].set_calc_collation_type(temp_type.get_collation_type()); - types_stack[JSN_VAL_ERROR_DEF].set_calc_collation_level(temp_type.get_collation_level()); - types_stack[JSN_VAL_ERROR_DEF].set_calc_accuracy(temp_type.get_accuracy()); - } - if (types_stack[JSN_VAL_ERROR_DEF_PRE].get_type() == ObNullType) { - // do nothing - } else { - types_stack[JSN_VAL_ERROR_DEF_PRE].set_calc_type(types_stack[JSN_VAL_ERROR_DEF_PRE].get_type()); - types_stack[JSN_VAL_ERROR_DEF_PRE].set_calc_collation_type(types_stack[JSN_VAL_ERROR_DEF_PRE].get_collation_type()); - types_stack[JSN_VAL_ERROR_DEF_PRE].set_calc_collation_level(types_stack[JSN_VAL_ERROR_DEF_PRE].get_collation_level()); - types_stack[JSN_VAL_ERROR_DEF_PRE].set_calc_accuracy(types_stack[JSN_VAL_ERROR_DEF_PRE].get_accuracy()); + if (OB_FAIL(calc_empty_error_type(types_stack, JSN_VAL_ERROR, dst_type, type_ctx))) { + LOG_WARN("fail to parse empty value", K(ret)); } } - - // mismatch : 10, - + // mismatch : 9, if (OB_SUCC(ret)) { for (size_t i = JSN_VAL_MISMATCH; OB_SUCC(ret) && i < param_num; i++) { if (types_stack[i].get_type() == ObNullType) { @@ -208,32 +129,90 @@ int ObExprJsonValue::calc_result_typeN(ObExprResType& type, } } } - } type_ctx.set_cast_mode(type_ctx.get_cast_mode() | CM_CONST_TO_DECIMAL_INT_EQ); return ret; } - -static int get_on_truncate(const ObExpr &expr, - ObEvalCtx &ctx, - uint8_t index, - bool &is_cover_by_error, - uint8 &type) +int ObExprJsonValue::calc_input_type(ObExprResType& types_stack, bool &is_json_input) { INIT_SUCC(ret); - ObExpr *json_arg = expr.args_[index]; - ObObjType val_type = json_arg->datum_meta_.type_; - ObDatum *json_datum = NULL; - if (OB_FAIL(json_arg->eval(ctx, json_datum))) { - is_cover_by_error = false; - LOG_WARN("eval json arg failed", K(ret)); - } else if (val_type != ObIntType) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("input type error", K(val_type)); + bool is_oracle_mode = lib::is_oracle_mode(); + ObObjType doc_type = types_stack.get_type(); + if (types_stack.get_type() == ObNullType) { + } else if (!ObJsonExprHelper::is_convertible_to_json(doc_type)) { + if (is_oracle_mode) { + ret = OB_ERR_INVALID_TYPE_FOR_OP; + LOG_USER_ERROR(OB_ERR_INVALID_TYPE_FOR_OP, ob_obj_type_str(types_stack.get_type()), "JSON"); + } else { + ret = OB_ERR_INVALID_TYPE_FOR_JSON; + LOG_USER_ERROR(OB_ERR_INVALID_TYPE_FOR_JSON, 1, "json_value"); + LOG_WARN("Invalid type for json doc", K(doc_type), K(ret)); + } + } else if (ob_is_string_type(doc_type)) { + if (is_oracle_mode) { + if (types_stack.get_collation_type() == CS_TYPE_BINARY) { + types_stack.set_calc_collation_type(CS_TYPE_BINARY); + } else if (types_stack.get_charset_type() != CHARSET_UTF8MB4) { + types_stack.set_calc_collation_type(CS_TYPE_UTF8MB4_BIN); + } + } else { + if (types_stack.get_collation_type() == CS_TYPE_BINARY) { + // unsuport string type with binary charset + ret = OB_ERR_INVALID_JSON_CHARSET; + LOG_WARN("Unsupport for string type with binary charset input.", K(ret), K(doc_type)); + } else if (types_stack.get_charset_type() != CHARSET_UTF8MB4) { + types_stack.set_calc_collation_type(CS_TYPE_UTF8MB4_BIN); + } + } + } else if (doc_type == ObJsonType) { + is_json_input = true; + // do nothing } else { - int64_t option_type = json_datum->get_int(); - type = static_cast(option_type); + types_stack.set_calc_type(ObLongTextType); + types_stack.set_calc_collation_type(CS_TYPE_UTF8MB4_BIN); + } + return ret; +} + +int ObExprJsonValue::calc_empty_error_type(ObExprResType* types_stack, uint8_t pos, ObExprResType &dst_type, ObExprTypeCtx& type_ctx) +{ + INIT_SUCC(ret); + ObExprResType temp_type; + if (types_stack[pos].get_type() == ObNullType) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN(" param type is unexpected", K(types_stack[pos].get_type())); + } else if (types_stack[pos].get_type() != ObIntType) { + types_stack[pos].set_calc_type(ObIntType); + } else if (types_stack[pos + 1].get_type() == ObNullType) { + // do nothing + } else if (lib::is_oracle_mode() && OB_FAIL(check_default_value(types_stack, pos, dst_type))) { // check default value valid in oracle mode + LOG_WARN("fail to get empty value", K(ret)); + } else if (OB_FAIL(ObJsonExprHelper::set_dest_type(types_stack[pos + 1], temp_type, dst_type, type_ctx))) { + LOG_WARN("set dest type failed", K(ret)); + } else { + types_stack[pos + 1].set_calc_type(temp_type.get_type()); + types_stack[pos + 1].set_calc_collation_type(temp_type.get_collation_type()); + types_stack[pos + 1].set_calc_collation_level(temp_type.get_collation_level()); + types_stack[pos + 1].set_calc_accuracy(temp_type.get_accuracy()); + } + return ret; +} + +int ObExprJsonValue::check_default_value(ObExprResType* types_stack, int8_t pos, ObExprResType &dst_type) +{ + INIT_SUCC(ret); + int8_t type = 0; + ObObjType val_type = types_stack[pos + 1].get_type(); + if (OB_FAIL(ObJsonExprHelper::get_expr_option_value(types_stack[pos], type))) { + LOG_WARN("fail to get option", K(ret)); + } else if (type == JSN_VALUE_DEFAULT) { + ObString default_val(types_stack[pos + 1].get_param().get_string().length(), types_stack[pos + 1].get_param().get_string().ptr()); + if (val_type == ObCharType || val_type == ObNumberType || val_type == ObDecimalIntType) { + if (OB_FAIL(ObJsonExprHelper::pre_default_value_check(dst_type.get_type(), default_val, val_type, dst_type.get_length()))) { + LOG_WARN("default value pre check fail", K(ret), K(default_val)); + } + } } return ret; } @@ -241,334 +220,284 @@ static int get_on_truncate(const ObExpr &expr, int ObExprJsonValue::eval_json_value(const ObExpr &expr, ObEvalCtx &ctx, ObDatum &res) { INIT_SUCC(ret); - ObDatum *json_datum = NULL; - ObExpr *json_arg = expr.args_[0]; - ObObjType type = json_arg->datum_meta_.type_; bool is_cover_by_error = true; bool is_null_result = false; - uint8_t is_type_cast = 0; + uint8_t is_type_mismatch = 0; ObDatum *return_val = NULL; ObEvalCtx::TempAllocGuard tmp_alloc_g(ctx); common::ObArenaAllocator &temp_allocator = tmp_alloc_g.get_allocator(); - ObIJsonBase *j_base = NULL; - - // parse json doc - if (OB_FAIL(json_arg->eval(ctx, json_datum))) { - LOG_WARN("eval json arg failed", K(ret)); + ObJsonBin st_json(&temp_allocator); + ObIJsonBase *j_base = &st_json; + ObJsonSeekResult hits; + ObJsonBin res_json(&temp_allocator); + hits.res_point_ = &res_json; + ObJsonParamCacheCtx ctx_cache(&temp_allocator); + ObJsonParamCacheCtx* param_ctx = NULL; + /** + * get content point, + */ + param_ctx = ObJsonExprHelper::get_param_cache_ctx(expr.expr_ctx_id_, &ctx.exec_ctx_); + if (OB_ISNULL(param_ctx)) { + param_ctx = &ctx_cache; + } + // init flag + if (param_ctx->is_first_exec_ && OB_FAIL(init_ctx_var(expr, param_ctx))) { is_cover_by_error = false; - } else if (type == ObNullType || json_datum->is_null()) { - is_null_result = true; - } else if (type != ObJsonType && !ob_is_string_type(type)) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("input type error", K(type)); - } else { - ObString j_str = json_datum->get_string(); - ObJsonInType j_in_type = ObJsonExprHelper::get_json_internal_type(type); - if (OB_FAIL(ObJsonExprHelper::get_json_or_str_data(json_arg, ctx, temp_allocator, j_str, is_null_result))) { - LOG_WARN("fail to get real data.", K(ret), K(j_str)); - } else if (j_str.length() == 0) { // maybe input json doc is null type - is_null_result = true; - } else if (OB_FAIL(ObJsonBaseFactory::get_json_base(&temp_allocator, j_str, j_in_type, - j_in_type, j_base))) { - LOG_WARN("fail to get json base.", K(ret), K(type), K(j_str), K(j_in_type)); - if (ret == OB_ERR_JSON_OUT_OF_DEPTH) { - is_cover_by_error = false; - } - } + LOG_WARN("fail to init param ctx", K(ret)); + } else if (param_ctx->is_first_exec_ + && OB_FAIL(ObExprJsonValue::get_clause_param_value(expr, ctx, ¶m_ctx->json_param_, + is_cover_by_error))) { // get param value & check param valid + LOG_WARN("fail to get param value", K(ret)); + } else if (OB_FAIL(ObJsonUtil::get_json_doc(expr.args_[JSN_VAL_DOC], ctx, + temp_allocator, j_base, is_null_result, + is_cover_by_error))) { // parse json doc + LOG_WARN("fail to parse json doc", K(ret)); + } else if (OB_ISNULL(param_ctx->json_param_.json_path_) + && OB_FAIL(ObJsonUtil::get_json_path(expr.args_[JSN_VAL_PATH], + ctx, is_null_result, param_ctx, temp_allocator, + is_cover_by_error))) { // parse json path + LOG_WARN("fail to get json path", K(ret)); } - - // parse return node acc - ObAccuracy accuracy; - ObObjType dst_type; - if (OB_SUCC(ret) && !is_null_result) { - ret = get_accuracy(expr, ctx, accuracy, dst_type, is_cover_by_error); - } else if (is_cover_by_error) { // when need error option, should do get accuracy - get_accuracy(expr, ctx, accuracy, dst_type, is_cover_by_error); - } - - // parse empty option - ObDatum *empty_datum = NULL; - ObObjType empty_val_type; - uint8_t empty_type = JSN_VALUE_IMPLICIT; - if (OB_SUCC(ret) && !is_null_result) { - ret = get_on_empty_or_error(expr, ctx, 5, is_cover_by_error, accuracy, empty_type, &empty_datum, dst_type, empty_val_type); - } - - // parse error option - ObDatum *error_val = NULL; - ObObjType error_val_type; - uint8_t error_type = JSN_VALUE_IMPLICIT; - if (OB_SUCC(ret) && !is_null_result) { - ret = get_on_empty_or_error(expr, ctx, 8, is_cover_by_error, accuracy, error_type, &error_val, dst_type, error_val_type); - } else if (is_cover_by_error) { // always get error option for return default value on error - int temp_ret = get_on_empty_or_error(expr, ctx, 8, is_cover_by_error, accuracy, - error_type, &error_val, dst_type, error_val_type); + // parse empty error default value + if ((OB_SUCC(ret) && !is_null_result) || is_cover_by_error) { + int temp_ret = get_default_empty_error_value(expr, ¶m_ctx->json_param_, ctx); if (temp_ret != OB_SUCCESS) { - LOG_WARN("failed to get error option.", K(temp_ret)); - } - } - - // parse json path and do seek - ObJsonBaseVector hits; - if (OB_SUCC(ret) && !is_null_result) { - json_arg = expr.args_[1]; - type = json_arg->datum_meta_.type_; - if (OB_FAIL(json_arg->eval(ctx, json_datum))) { is_cover_by_error = false; - LOG_WARN("eval json arg failed", K(ret)); - } else if (type == ObNullType || json_datum->is_null()) { - is_null_result = true; - } else if (!ob_is_string_type(type)) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("input type error", K(type)); - } else { - ObString j_path_text = json_datum->get_string(); - ObJsonPath *j_path; - if (OB_FAIL(ObJsonExprHelper::get_json_or_str_data(json_arg, ctx, temp_allocator, j_path_text, is_null_result))) { - LOG_WARN("fail to get real data.", K(ret), K(j_path_text)); - } else if (j_path_text.length() == 0) { - is_null_result = true; - } - - ObJsonPathCache ctx_cache(&temp_allocator); - ObJsonPathCache* path_cache = ObJsonExprHelper::get_path_cache_ctx(expr.expr_ctx_id_, &ctx.exec_ctx_); - path_cache = ((path_cache != NULL) ? path_cache : &ctx_cache); - - if (OB_FAIL(ret)) { - } else if (OB_FAIL(ObJsonExprHelper::find_and_add_cache(path_cache, j_path, j_path_text, 1, true))) { - is_cover_by_error = false; - LOG_WARN("parse text to path failed", K(json_datum->get_string()), K(ret)); - } else if (OB_FAIL(doc_do_seek(hits, is_null_result, json_datum, j_path, j_base, expr, ctx, is_cover_by_error, accuracy, - dst_type, return_val, error_val, error_type, empty_datum, empty_type, empty_val_type, is_type_cast))) { - LOG_WARN("doc do seek fail", K(ret)); - } + ret = temp_ret; } } - // parse mismatch, mysql don't need this clause - ObVector mismatch_val; - ObVector mismatch_type; //OB_JSON_TYPE_IMPLICIT - if (OB_SUCC(ret) && !is_null_result) { - ret = get_on_mismatch(expr, ctx, JSN_VAL_MISMATCH, is_cover_by_error, accuracy, mismatch_val, mismatch_type); - if (ret != OB_SUCCESS || mismatch_type.size() == 0 || mismatch_val.size() == 0) { - LOG_WARN("failed to get mismatch option.", K(ret), K(mismatch_type.size()), K(mismatch_val.size())); - } + if (OB_SUCC(ret) && !is_null_result && OB_FAIL(doc_do_seek(hits, + is_null_result, ¶m_ctx->json_param_, j_base, + expr, ctx, is_cover_by_error, + return_val, is_type_mismatch))) { // do seek + LOG_WARN("doc do seek fail", K(ret)); } - // fill output - if (OB_UNLIKELY(OB_FAIL(ret))) { - if (is_cover_by_error) { - if (!try_set_error_val(expr, ctx, res, ret, error_type, error_val, mismatch_val, mismatch_type, is_type_cast, accuracy, dst_type)) { - LOG_WARN("set error val fail", K(ret)); - } + // fill output and deal error case + if (OB_FAIL(ret)) { + if (is_cover_by_error && !try_set_error_val(expr, ctx, res, ret, ¶m_ctx->json_param_, is_type_mismatch)) { + LOG_WARN("set error val fail", K(ret)); } LOG_WARN("json_values failed", K(ret)); - } else if (is_null_result) { - res.set_null(); } else { - if (return_val != NULL) { - res.set_datum(*return_val); - } else { - ObCollationType in_coll_type = expr.args_[0]->datum_meta_.cs_type_; - ObCollationType dst_coll_type = expr.datum_meta_.cs_type_; - ret = cast_to_res(&temp_allocator, expr, ctx, hits[0], error_type, error_val, - accuracy, dst_type, in_coll_type, dst_coll_type, res, mismatch_val, mismatch_type, is_type_cast, 0, 0); + ret = set_result(expr, ¶m_ctx->json_param_, ctx, is_null_result, is_cover_by_error, is_type_mismatch, + res, return_val, &temp_allocator, hits); + } + if (OB_SUCC(ret)) { + param_ctx->is_first_exec_ = false; + } + return ret; +} + +int ObExprJsonValue::extract_plan_cache_param(const ObExprJsonQueryParamInfo *info, ObJsonExprParam& json_param) +{ + INIT_SUCC(ret); + json_param.truncate_ = info->truncate_; + json_param.empty_type_ = info->empty_type_; + json_param.error_type_ = info->error_type_; + json_param.ascii_type_ = info->ascii_type_; + json_param.json_path_ = info->j_path_; + json_param.is_init_from_cache_ = true; + for (int i = 0; OB_SUCC(ret) && i < info->on_mismatch_.count(); i++) { + if (OB_FAIL(json_param.on_mismatch_.push_back(info->on_mismatch_.at(i)))) { + LOG_WARN("fail to push node to mismatch type", K(ret)); + } else if (OB_FAIL(json_param.on_mismatch_type_.push_back(info->on_mismatch_type_.at(i)))) { + LOG_WARN("fail to push node to mismatch type", K(ret)); } } - return ret; } int ObExprJsonValue::eval_ora_json_value(const ObExpr &expr, ObEvalCtx &ctx, ObDatum &res) { INIT_SUCC(ret); - ObDatum *json_datum = NULL; - ObExpr *json_arg = expr.args_[JSN_VAL_PATH]; - ObObjType type = json_arg->datum_meta_.type_; bool is_cover_by_error = true; bool is_null_result = false; - uint8_t is_type_cast = 0; + uint8_t is_type_mismatch = 0; ObDatum *return_val = NULL; ObEvalCtx::TempAllocGuard tmp_alloc_g(ctx); common::ObArenaAllocator &temp_allocator = tmp_alloc_g.get_allocator(); - ObIJsonBase *j_base = NULL; - - // parse json path - ObJsonPath *j_path; - if (OB_SUCC(ret) && !is_null_result) { - type = json_arg->datum_meta_.type_; - if (OB_FAIL(json_arg->eval(ctx, json_datum))) { - is_cover_by_error = false; - LOG_WARN("eval json arg failed", K(ret)); - } else if (type == ObNullType || json_datum->is_null()) { - is_null_result = true; - } else if (!ob_is_string_type(type)) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("input type error", K(type)); - } - if OB_SUCC(ret) { - ObString j_path_text = json_datum->get_string(); - if (OB_FAIL(ObJsonExprHelper::get_json_or_str_data(json_arg, ctx, temp_allocator, j_path_text, is_null_result))) { - LOG_WARN("fail to get real data.", K(ret), K(j_path_text)); - } else if (j_path_text.length() == 0) { // maybe input json doc is null type - is_null_result = true; - } - ObJsonPathCache ctx_cache(&temp_allocator); - ObJsonPathCache* path_cache = ObJsonExprHelper::get_path_cache_ctx(expr.expr_ctx_id_, &ctx.exec_ctx_); - path_cache = ((path_cache != NULL) ? path_cache : &ctx_cache); - - if (OB_SUCC(ret) && OB_FAIL(ObJsonExprHelper::find_and_add_cache(path_cache, j_path, j_path_text, 1, true))) { - is_cover_by_error = false; - ret = OB_ERR_JSON_PATH_EXPRESSION_SYNTAX_ERROR; - LOG_USER_ERROR(OB_ERR_JSON_PATH_EXPRESSION_SYNTAX_ERROR, j_path_text.length(), j_path_text.ptr()); - } - } + ObJsonBin st_json(&temp_allocator); + ObIJsonBase *j_base = &st_json; + ObJsonSeekResult hits; + ObJsonBin res_json(&temp_allocator); + hits.res_point_ = &res_json; + ObJsonParamCacheCtx ctx_cache(&temp_allocator); + ObJsonParamCacheCtx* param_ctx = NULL; + param_ctx = ObJsonExprHelper::get_param_cache_ctx(expr.expr_ctx_id_, &ctx.exec_ctx_); + if (OB_ISNULL(param_ctx)) { + param_ctx = &ctx_cache; } - - // parse return node acc - ObAccuracy accuracy; - ObObjType dst_type; - if (OB_SUCC(ret) && !is_null_result) { - ret = get_accuracy(expr, ctx, accuracy, dst_type, is_cover_by_error); - } else if (is_cover_by_error) { // when need error option, should do get accuracy - get_accuracy(expr, ctx, accuracy, dst_type, is_cover_by_error); - } - // parse ascii - uint8_t ascii_type = OB_JSON_ON_ASCII_IMPLICIT; - if (OB_SUCC(ret) && !is_null_result) { - ret = get_on_ascii(expr, ctx, JSN_VAL_ASCII, is_cover_by_error, ascii_type); - } - - uint8_t is_truncate = 0; - if (OB_SUCC(ret) && !is_null_result) { - if (OB_FAIL(get_on_truncate(expr, ctx, JSN_VAL_TRUNC, is_cover_by_error, is_truncate))) { - LOG_WARN("eval truncate option error", K(ret)); - } - } - - if ((expr.datum_meta_.cs_type_ == CS_TYPE_BINARY || !(ob_is_string_tc(dst_type) || ob_is_text_tc(dst_type))) - && ascii_type > 0) { + // init flag + if (param_ctx->is_first_exec_ && OB_FAIL(init_ctx_var(expr, param_ctx))) { is_cover_by_error = false; - ret = OB_ERR_NON_TEXT_RET_NOTSUPPORT; - LOG_WARN("ASCII or PRETTY not supported for non-textual return data type", K(ret)); - } - - if (OB_SUCC(ret) && dst_type == ObRawType) { + LOG_WARN("fail to init param ctx", K(ret)); + } else if (OB_ISNULL(param_ctx->json_param_.json_path_) // parse json path + && OB_FAIL(ObJsonUtil::get_json_path(expr.args_[JSN_VAL_PATH], ctx, + is_null_result, param_ctx, temp_allocator, is_cover_by_error))) { + LOG_WARN("fail to get json path", K(ret)); + } else if (param_ctx->is_first_exec_ + && OB_FAIL(ObExprJsonValue::get_clause_param_value(expr, ctx, + ¶m_ctx->json_param_, is_cover_by_error))) { // get param value & check param valid + LOG_WARN("fail to get param value", K(ret)); + } else if (param_ctx->json_param_.dst_type_ == ObRawType) { ret = OB_ERR_UNIMPLEMENT_JSON_FEATURE; LOG_WARN("Unimplement json returning type", K(ret)); - } - - int8_t JSON_VALUE_EXPR = 0; - if (OB_SUCC(ret) && j_path->get_last_node_type() > JPN_BEGIN_FUNC_FLAG && j_path->get_last_node_type() < JPN_END_FUNC_FLAG - && OB_FAIL( ObJsonExprHelper::check_item_func_with_return(j_path->get_last_node_type(), dst_type, expr.datum_meta_.cs_type_, JSON_VALUE_EXPR))) { - is_cover_by_error = false; - LOG_WARN("check item func with return type fail", K(ret)); - } - - // parse json doc - json_arg = expr.args_[JSN_VAL_DOC]; - type = json_arg->datum_meta_.type_; - ObCollationType cs_type = json_arg->datum_meta_.cs_type_; - ObJsonInType j_in_type; - if (OB_FAIL(ret)) { // - } else if (OB_FAIL(json_arg->eval(ctx, json_datum))) { - LOG_WARN("eval json arg failed", K(ret)); - is_cover_by_error = false; - } else if (type == ObNullType || json_datum->is_null()) { - is_null_result = true; - } else if (type != ObJsonType && !ob_is_string_type(type)) { - ret = OB_ERR_INVALID_TYPE_FOR_OP; - LOG_USER_ERROR(OB_ERR_INVALID_TYPE_FOR_OP, ob_obj_type_str(dst_type), ob_obj_type_str(type)); - } else { - ObString j_str = json_datum->get_string(); - if (OB_FAIL(ObJsonExprHelper::get_json_or_str_data(json_arg, ctx, temp_allocator, j_str, is_null_result))) { - LOG_WARN("fail to get real data.", K(ret), K(j_str)); - } else if (j_str.length() == 0) { - is_null_result = true; - } else { - j_in_type = ObJsonExprHelper::get_json_internal_type(type); - uint32_t parse_flag = ObJsonParser::JSN_RELAXED_FLAG; - if (j_str.length() == 0) { // maybe input json doc is null type - is_null_result = true; - } else if (OB_FAIL(ObJsonBaseFactory::get_json_base(&temp_allocator, j_str, j_in_type, - j_in_type, j_base, parse_flag))) { - LOG_WARN("fail to get json base.", K(ret), K(type), K(j_str), K(j_in_type)); - if (ret == OB_ERR_JSON_OUT_OF_DEPTH) { - is_cover_by_error = false; - } - ret = OB_ERR_JSON_SYNTAX_ERROR; - } + } else if (OB_FAIL(ObJsonUtil::get_json_doc(expr.args_[JSN_VAL_DOC], ctx, + temp_allocator, j_base, is_null_result, + is_cover_by_error, true))) { + LOG_WARN("fail to get json doc", K(ret)); + } else if (!is_null_result + && OB_FAIL(doc_do_seek(hits, is_null_result, ¶m_ctx->json_param_, + j_base, expr, ctx, is_cover_by_error, + return_val, is_type_mismatch))) { // do seek + if (ret == OB_ERR_JSON_PATH_EXPRESSION_SYNTAX_ERROR) { + is_cover_by_error = false; } - } - - // parse empty option - // error default val type - ObObjType empty_val_type; - ObDatum *empty_datum = NULL; - uint8_t empty_type = JSN_VALUE_IMPLICIT; - if (OB_SUCC(ret) && !is_null_result) { - ret = get_on_empty_or_error(expr, ctx, JSN_VAL_EMPTY, is_cover_by_error, accuracy, empty_type, &empty_datum, dst_type, empty_val_type); - } - - // parse error option - ObDatum *error_val = NULL; - uint8_t error_type = JSN_VALUE_IMPLICIT; - json_arg = expr.args_[JSN_VAL_ERROR + 2]; - ObObjType val_type = json_arg->datum_meta_.type_; - if ((OB_SUCC(ret) && !is_null_result) || is_cover_by_error) { - int temp_ret = OB_SUCCESS; - if (ret != OB_SUCCESS) { - temp_ret = ret; - ret = OB_SUCCESS; - } - if (lib::is_oracle_mode() - && (val_type == ObCharType || val_type == ObNumberType || val_type == ObDecimalIntType)) { - if (OB_FAIL(json_arg->eval(ctx, json_datum))) { - is_cover_by_error = false; - LOG_WARN("pre eval json arg failed", K(ret)); - } else { - ObString in_str(json_datum->len_, json_datum->ptr_); - if (OB_FAIL(ObJsonExprHelper::pre_default_value_check(dst_type, in_str, val_type))) { - is_cover_by_error = false; - LOG_WARN("default value pre check fail", K(ret), K(in_str)); - } - } - } - if (OB_SUCC(ret) && temp_ret != OB_SUCCESS) { - ret = temp_ret; - } - } - - // do seek - ObJsonBaseVector hits; - if (OB_SUCC(ret) && OB_FAIL(doc_do_seek(hits, is_null_result, json_datum, j_path, j_base, expr, ctx, is_cover_by_error, accuracy, - dst_type, return_val, error_val, error_type, empty_datum, empty_type, empty_val_type, is_type_cast))) { - if (ret == OB_ERR_JSON_PATH_EXPRESSION_SYNTAX_ERROR) is_cover_by_error = false; LOG_WARN("doc do seek fail", K(ret)); } - // parser mismatch TODO: type cast need complete, take type cast error from all error ORA_JV_TYPE_CAST - ObVector mismatch_val; - ObVector mismatch_type; //OB_JSON_TYPE_IMPLICIT - if (OB_SUCC(ret) && !is_null_result) { - ret = get_on_mismatch(expr, ctx, JSN_VAL_MISMATCH, is_cover_by_error, accuracy, mismatch_val, mismatch_type); - if (ret != OB_SUCCESS || mismatch_type.size() == 0 || mismatch_val.size() == 0) { - LOG_WARN("failed to get mismatch option.", K(ret), K(mismatch_type.size()), K(mismatch_val.size())); - } - } else if (is_type_cast) { - int tmp_ret = get_on_mismatch(expr, ctx, JSN_VAL_MISMATCH, is_cover_by_error, accuracy, mismatch_val, mismatch_type); - if (tmp_ret != OB_SUCCESS || mismatch_type.size() == 0 || mismatch_val.size() == 0) { - LOG_WARN("failed to get mismatch option.", K(ret), K(mismatch_type.size()), K(mismatch_val.size())); - } - } - // fill output - if (OB_UNLIKELY(OB_FAIL(ret))) { - if (is_cover_by_error) { - if (!try_set_error_val(expr, ctx, res, ret, error_type, error_val, mismatch_val, mismatch_type, is_type_cast, accuracy, dst_type)) { - LOG_WARN("set error val fail", K(ret)); - } + // fill output and deal error case + if (OB_FAIL(ret)) { + if (is_cover_by_error && !try_set_error_val(expr, ctx, res, ret, ¶m_ctx->json_param_, is_type_mismatch)) { + LOG_WARN("set error val fail", K(ret)); } LOG_WARN("json_values failed", K(ret)); - } else if (is_null_result) { + } else { + ret = set_result(expr, ¶m_ctx->json_param_, ctx, is_null_result, is_cover_by_error, is_type_mismatch, + res, return_val, &temp_allocator, hits); + } + if (OB_SUCC(ret)) { + param_ctx->is_first_exec_ = false; + } + return ret; +} + +int ObExprJsonValue::cg_expr(ObExprCGCtx &expr_cg_ctx, const ObRawExpr &raw_expr, ObExpr &rt_expr) const +{ + INIT_SUCC(ret); + ObIAllocator &alloc = *expr_cg_ctx.allocator_; + ObExprJsonQueryParamInfo* info + = OB_NEWx(ObExprJsonQueryParamInfo, (&alloc), alloc, T_FUN_SYS_JSON_VALUE); + if (OB_ISNULL(info)) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("allocate memory failed", K(ret)); + } else if (OB_FAIL(info->init_jsn_val_expr_param(alloc, expr_cg_ctx, &raw_expr))) { + ret = OB_SUCCESS; // not use plan cache + } else { + rt_expr.extra_info_ = info; + } + if (lib::is_oracle_mode()) { + rt_expr.eval_func_ = eval_ora_json_value; + } else { + rt_expr.eval_func_ = eval_json_value; + } + return ret; +} + +int ObExprJsonQueryParamInfo::get_int_val_from_raw(ObIAllocator &alloc, ObExecContext *exec_ctx, const ObRawExpr* raw_expr, ObObj &const_data) +{ + INIT_SUCC(ret); + const_data.reset(); + bool got_data = false; + if (OB_FAIL(ObSQLUtils::calc_const_or_calculable_expr(exec_ctx, + raw_expr, + const_data, + got_data, + alloc))) { + LOG_WARN("failed to calc offset expr", K(ret)); + } else if (!got_data || const_data.is_null() + || !ob_is_integer_type(const_data.get_type())) { + ret = OB_ERR_INVALID_INPUT_ARGUMENT; + LOG_WARN("fail to get int value", K(ret)); + } + return ret; +} + +int ObExprJsonQueryParamInfo::init_mismatch_array(const ObRawExpr* raw_expr, + ObExecContext *exec_ctx) +{ + INIT_SUCC(ret); + uint32_t pos = -1; + ObArray val; + ObArray type; + ObObj const_data; + for(uint32_t i = JSN_VAL_MISMATCH; OB_SUCC(ret) + && i < raw_expr->get_param_count(); i++) { + if (OB_FAIL(get_int_val_from_raw(allocator_, exec_ctx, raw_expr->get_param_expr(i), const_data))) { + LOG_WARN("failed to calc offset expr", K(ret)); + } else { + int64_t option_type = const_data.get_int(); + if (OB_FAIL(ObJsonUtil::set_mismatch_val(val, type, option_type, pos))) { + LOG_WARN("fail to eval mismatch value", K(ret)); + } + } + } + pos = val.size(); + if (OB_FAIL(ret)) { + } else if (OB_FAIL(on_mismatch_.init(pos))) { + LOG_WARN("fail to init type array", K(ret)); + } else if (OB_FAIL(on_mismatch_type_.init(pos))) { + LOG_WARN("fail to init type array", K(ret)); + } else { + for (uint32_t i = 0; OB_SUCC(ret) && i < pos; i++) { + if (OB_FAIL(on_mismatch_.push_back(val.at(i)))) { + LOG_WARN("fail to init type array", K(ret)); + } else if (OB_FAIL(on_mismatch_type_.push_back(type.at(i)))) { + LOG_WARN("fail to init type array", K(ret)); + } + } + } + return ret; +} + +int ObExprJsonQueryParamInfo::init_jsn_val_expr_param(ObIAllocator &alloc, ObExprCGCtx &op_cg_ctx, const ObRawExpr* raw_expr) +{ + INIT_SUCC(ret); + ObExecContext *exec_ctx = op_cg_ctx.session_->get_cur_exec_ctx(); + const ObRawExpr *path = raw_expr->get_param_expr(JSN_VAL_PATH); + ObObj const_data; + ObArray param_vec; + uint32_t pos = -1; + // parse clause node + for (int64_t i = JSN_VAL_TRUNC; OB_SUCC(ret) && i <= JSN_VAL_ERROR_DEF; i ++) { + if (i == JSN_VAL_EMPTY_DEF + || i == JSN_VAL_ERROR_DEF) { + } else if (OB_FAIL(get_int_val_from_raw(alloc, exec_ctx, raw_expr->get_param_expr(i), const_data))) { + LOG_WARN("failed to calc offset expr", K(ret)); + } else if (OB_FAIL(param_vec.push_back(const_data.get_tinyint()))) { + LOG_WARN("fail to push val into array", K(ret)); + } + } + if (OB_SUCC(ret)) { + truncate_ = param_vec[JSN_VAL_TRUNC_OPT]; + ascii_type_ = param_vec[JSN_VAL_ASCII_OPT]; + empty_type_ = param_vec[JSN_VAL_EMPTY_OPT]; + error_type_ = param_vec[JSN_VAL_ERROR_OPT]; + } + // parse mismatch 1. init array 2. push_back node + if (OB_FAIL(ret)) { + } else if (OB_FAIL(init_mismatch_array(raw_expr, exec_ctx))) { + LOG_WARN("fail to eval mismatch array", K(ret)); + } else if (OB_FAIL(ObJsonUtil::init_json_path(alloc, op_cg_ctx, path, *this))) { // parse json path + LOG_WARN("fail to init path from str", K(ret)); + } + return ret; +} + +int ObExprJsonValue::set_result(const ObExpr &expr, + ObJsonExprParam* json_param, + ObEvalCtx &ctx, + bool &is_null_result, + bool &is_cover_by_error, + uint8_t &is_type_mismatch, + ObDatum &res, + ObDatum *return_val, + ObIAllocator *allocator, + ObJsonSeekResult &hits) +{ + INIT_SUCC(ret); + if (is_null_result) { res.set_null(); } else { if (return_val != NULL) { @@ -576,26 +505,147 @@ int ObExprJsonValue::eval_ora_json_value(const ObExpr &expr, ObEvalCtx &ctx, ObD } else { ObCollationType in_coll_type = expr.args_[0]->datum_meta_.cs_type_; ObCollationType dst_coll_type = expr.datum_meta_.cs_type_; - ret = cast_to_res(&temp_allocator, expr, ctx, hits[0], error_type, error_val, - accuracy, dst_type, in_coll_type, dst_coll_type, res, mismatch_val, mismatch_type, is_type_cast, ascii_type, is_truncate); + ObJsonCastParam cast_param(json_param->dst_type_, in_coll_type, dst_coll_type, json_param->ascii_type_); + cast_param.rt_expr_ = &expr; + ret = ObJsonUtil::cast_to_res(allocator, ctx, hits[0], + json_param->accuracy_, cast_param, res, is_type_mismatch); + if (OB_FAIL(ret)) { + try_set_error_val(expr, ctx, res, ret, json_param, is_type_mismatch); + } else if (OB_FAIL(ObJsonUtil::set_lob_datum(allocator, expr, ctx, json_param->dst_type_, json_param->ascii_type_,res))) { + LOG_WARN("fail to set lob datum from string val", K(ret)); + } + } + } + return ret; +} + +int ObExprJsonValue::get_default_empty_error_value(const ObExpr &expr, + ObJsonExprParam* json_param, + ObEvalCtx &ctx) +{ + INIT_SUCC(ret); + // parse empty option + if (json_param->empty_type_ == JSN_VALUE_DEFAULT) { + if (!json_param->is_empty_default_const_ || OB_ISNULL(json_param->empty_val_)) { + if (OB_FAIL(get_default_value(expr.args_[JSN_VAL_EMPTY + 1], ctx, + json_param->accuracy_, &json_param->empty_val_))) { + LOG_WARN("failed to get empty datum", K(ret)); + } + } + } + // parse error option + if (lib::is_mysql_mode() && OB_SUCC(ret) && json_param->error_type_ == JSN_VALUE_DEFAULT) { // always get error option for return default value on error + if (!json_param->is_error_default_const_ || OB_ISNULL(json_param->error_val_)) { + if (OB_FAIL(get_default_value(expr.args_[JSN_VAL_ERROR + 1], ctx, + json_param->accuracy_, &json_param->error_val_))) { + LOG_WARN("failed to get empty datum", K(ret)); + } + } + } + return ret; +} + +int ObExprJsonValue::init_ctx_var(const ObExpr &expr, ObJsonParamCacheCtx* param_ctx) +{ + INIT_SUCC(ret); + // init json path flag + param_ctx->is_json_path_const_ = expr.args_[JSN_VAL_PATH]->is_const_expr(); + // init empty default value flag + param_ctx->json_param_.is_empty_default_const_ = expr.args_[JSN_VAL_EMPTY_DEF]->is_const_expr(); + // init error default value flag + param_ctx->json_param_.is_error_default_const_ = expr.args_[JSN_VAL_ERROR_DEF]->is_const_expr(); + // extract value from paln cache + const ObExprJsonQueryParamInfo *info + = static_cast(expr.extra_info_); + if (OB_NOT_NULL(info) + && OB_FAIL(extract_plan_cache_param(info, param_ctx->json_param_))) { + LOG_WARN("fail to extract param from plan cache", K(ret)); + } + return ret; +} + +int ObExprJsonValue::check_param_valid(const ObExpr &expr, ObJsonExprParam* json_param, + ObJsonPath *j_path, bool &is_cover_by_error) +{ + INIT_SUCC(ret); + // binary can not use with ascii + if ((expr.datum_meta_.cs_type_ == CS_TYPE_BINARY || !(ob_is_string_tc(json_param->dst_type_) || ob_is_text_tc(json_param->dst_type_))) + && json_param->ascii_type_ > 0) { + is_cover_by_error = false; + ret = OB_ERR_NON_TEXT_RET_NOTSUPPORT; + LOG_WARN("ASCII or PRETTY not supported for non-textual return data type", K(ret)); + } + + int8_t JSON_VALUE_EXPR = 0; + ObExpr *json_arg_ret = expr.args_[JSN_VAL_RET]; + ObObjType val_type = json_arg_ret->datum_meta_.type_; + if (OB_SUCC(ret) && val_type != ObNullType && j_path->is_last_func() + && OB_FAIL( ObJsonExprHelper::check_item_func_with_return(j_path->get_last_node_type(), + json_param->dst_type_, expr.datum_meta_.cs_type_, JSON_VALUE_EXPR))) { + is_cover_by_error = false; + LOG_WARN("check item func with return type fail", K(ret)); + } + return ret; +} + +int ObExprJsonValue::get_clause_param_value(const ObExpr &expr, ObEvalCtx &ctx, + ObJsonExprParam* json_param, + bool &is_cover_by_error) +{ + INIT_SUCC(ret); + ObArray param_vec; + int8_t val = 0; + // parse return node acc + if (OB_SUCC(ret)) { + ret = ObJsonUtil::get_accuracy(expr, ctx, json_param->accuracy_, json_param->dst_type_, is_cover_by_error); + } else if (is_cover_by_error) { // when need error option, should do get accuracy + ObJsonUtil::get_accuracy(expr, ctx, json_param->accuracy_, json_param->dst_type_, is_cover_by_error); + } + + // truncate 3, ascii 4, empty_type 5, empty_val 6, error_type 7, error_val 8 + for (size_t i = JSN_VAL_TRUNC; OB_SUCC(ret) && !json_param->is_init_from_cache_ + && i <= JSN_VAL_ERROR_DEF; i ++) { + if (i == JSN_VAL_EMPTY_DEF + || i == JSN_VAL_ERROR_DEF) { + } else if (OB_FAIL(ObJsonExprHelper::get_clause_opt(expr.args_[i], ctx, val))) { + is_cover_by_error = false; + LOG_WARN("fail to get clause option", K(ret)); + } else if (OB_FAIL(param_vec.push_back(val))) { + is_cover_by_error = false; + LOG_WARN("fail to push val into array", K(ret)); + } + } + if (json_param->is_init_from_cache_) { + } else if (OB_FAIL(ret) && is_cover_by_error) { + ret = ObJsonExprHelper::get_clause_opt(expr.args_[JSN_VAL_ERROR], ctx, json_param->error_type_); + if (OB_FAIL(ret)) { + is_cover_by_error = false; + } + } else if (OB_SUCC(ret) && param_vec.size() == 4) { + json_param->truncate_ = param_vec[JSN_VAL_TRUNC_OPT]; + json_param->ascii_type_ = param_vec[JSN_VAL_ASCII_OPT]; + json_param->empty_type_ = param_vec[JSN_VAL_EMPTY_OPT]; + json_param->error_type_ = param_vec[JSN_VAL_ERROR_OPT]; + } else if (OB_SUCC(ret)) { // should use prior branch + is_cover_by_error = false; + ret = OB_ERR_UNEXPECTED; + LOG_WARN("fail to get param value", K(ret)); + } + + // parser mismatch TODO: type cast need complete, take type cast error from all error ORA_JV_TYPE_CAST + if (OB_SUCC(ret) && lib::is_oracle_mode()) { + if (!json_param->is_init_from_cache_ + && OB_FAIL(get_on_mismatch(expr, ctx, JSN_VAL_MISMATCH, is_cover_by_error, json_param->accuracy_, json_param->on_mismatch_, json_param->on_mismatch_type_))) { + LOG_WARN("failed to get mismatch option.", K(ret), K(json_param->on_mismatch_.count()), K(json_param->on_mismatch_type_.count())); + } else if (OB_FAIL(ObExprJsonValue::check_param_valid(expr, json_param, + json_param->json_path_, is_cover_by_error))) { + LOG_WARN("fail to check param_valid", K(ret)); } } return ret; } -int ObExprJsonValue::cg_expr(ObExprCGCtx &expr_cg_ctx, const ObRawExpr &raw_expr, ObExpr &rt_expr) const -{ - UNUSED(expr_cg_ctx); - UNUSED(raw_expr); - if (lib::is_oracle_mode()) { - rt_expr.eval_func_ = eval_ora_json_value; - } else { - rt_expr.eval_func_ = eval_json_value; - } - return OB_SUCCESS; -} - template int ObExprJsonValue::check_default_val_accuracy(const ObAccuracy &accuracy, const ObObjType &type, @@ -607,7 +657,7 @@ int ObExprJsonValue::check_default_val_accuracy(const ObAccuracy &accuracy, switch (tc) { case ObNumberTC: { number::ObNumber temp(obj->get_number()); - ret = number_range_check(accuracy, NULL, temp, true); + ret = ObJsonUtil::number_range_check(accuracy, NULL, temp, true); LOG_WARN("number range is invalid for json_value", K(ret)); break; } @@ -617,7 +667,7 @@ int ObExprJsonValue::check_default_val_accuracy(const ObAccuracy &accuracy, if (OB_FAIL(wide::to_number(obj->get_decimal_int(), obj->get_int_bytes(), accuracy.scale_, tmp_alloc, temp))) { LOG_WARN("to_number failed", K(ret)); - } else if (OB_FAIL(number_range_check(accuracy, NULL, temp, true))) { + } else if (OB_FAIL(ObJsonUtil::number_range_check(accuracy, NULL, temp, true))) { LOG_WARN("number range is invalid for json_value", K(ret)); } break; @@ -633,12 +683,12 @@ int ObExprJsonValue::check_default_val_accuracy(const ObAccuracy &accuracy, } case ObDateTimeTC: { int64_t val = obj->get_datetime(); - ret = datetime_scale_check(accuracy, val, true); + ret = ObJsonUtil::datetime_scale_check(accuracy, val, true); break; } case ObTimeTC: { int64_t val = obj->get_time(); - ret = time_scale_check(accuracy, val, true); + ret = ObJsonUtil::time_scale_check(accuracy, val, true); break; } case ObStringTC : @@ -668,148 +718,49 @@ int ObExprJsonValue::check_default_val_accuracy(const ObAccuracy &accuracy, return ret; } -int ObExprJsonValue::doc_do_seek(ObJsonBaseVector &hits, bool &is_null_result, ObDatum *json_datum, ObJsonPath *j_path, +int ObExprJsonValue::doc_do_seek(ObJsonSeekResult &hits, bool &is_null_result, ObJsonExprParam* json_param, ObIJsonBase *j_base, const ObExpr &expr, ObEvalCtx &ctx, bool &is_cover_by_error, - const ObAccuracy &accuracy, ObObjType dst_type, ObDatum *&return_val, ObDatum *error_datum, - uint8_t error_type, ObDatum *empty_datum, uint8_t &empty_type, ObObjType &default_val_type, - uint8_t &is_type_cast) + ObDatum *&return_val, + uint8_t &is_type_mismatch) { INIT_SUCC(ret); ObEvalCtx::TempAllocGuard tmp_alloc_g(ctx); common::ObArenaAllocator &temp_allocator = tmp_alloc_g.get_allocator(); - if (OB_SUCC(ret) && !is_null_result && json_datum != nullptr) { - - if (OB_FAIL(j_base->seek(*j_path, j_path->path_node_cnt(), true, false, hits))) { + if (OB_SUCC(ret) && !is_null_result) { + if (OB_FAIL(j_base->seek(*json_param->json_path_, json_param->json_path_->path_node_cnt(), true, false, hits))) { if (ret == OB_ERR_JSON_PATH_EXPRESSION_SYNTAX_ERROR) { is_cover_by_error = false; } else if (ret == OB_ERR_DOUBLE_TRUNCATED) { - is_type_cast = true; + is_type_mismatch = true; ret = OB_INVALID_NUMERIC; } - LOG_WARN("json seek failed", K(json_datum->get_string()), K(ret)); + LOG_WARN("json seek failed", K(ret)); } else if (lib::is_oracle_mode() && hits.size() == 1) { - if (hits[0]->json_type() == ObJsonNodeType::J_OBJECT - || hits[0]->json_type() == ObJsonNodeType::J_ARRAY) { - ret = OB_ERR_JSON_VALUE_NO_SCALAR; - } else if (j_path->is_last_func()) { - if (j_path->get_last_node_type() == ObJsonPathNodeType::JPN_BOOLEAN - && hits[0]->json_type() != ObJsonNodeType::J_BOOLEAN) { - if ((hits[0]->json_type() == ObJsonNodeType::J_INT - && (hits[0]->get_int() == 1 || hits[0]->get_int() == 0) ) - || (hits[0]->json_type() == ObJsonNodeType::J_DOUBLE - && (hits[0]->get_double() == 1.0 || hits[0]->get_double() == 0.0))) { - bool is_true = hits[0]->json_type() == ObJsonNodeType::J_INT ? (hits[0]->get_int() == 1) : (hits[0]->get_double() == 1.0); - hits.reset(); - ObJsonBoolean* tmp_ans = static_cast (temp_allocator.alloc(sizeof(ObJsonBoolean))); - if (OB_ISNULL(tmp_ans)) { - ret = OB_ALLOCATE_MEMORY_FAILED; - LOG_WARN("allocate row buffer failed at ObJsonDecimal", K(ret)); - } else { - tmp_ans = new (tmp_ans) ObJsonBoolean(is_true); - hits.push_back(tmp_ans); - } - } else { - is_null_result = true; - } - } else if (j_path->get_last_node_type() == ObJsonPathNodeType::JPN_BOOL_ONLY - && hits[0]->json_type() != ObJsonNodeType::J_BOOLEAN) { - ret = OB_ERR_JSON_PATH_SYNTAX_ERROR; - LOG_WARN("boolean only function meet non-boolean data", K(ret)); - } else if ((j_path->get_last_node_type() == ObJsonPathNodeType::JPN_NUM_ONLY - || j_path->get_last_node_type() == ObJsonPathNodeType::JPN_NUMBER) - && hits[0]->json_type() == ObJsonNodeType::J_NULL && !hits[0]->is_real_json_null(hits[0])) { - ret = OB_INVALID_NUMERIC; - is_type_cast = 1; - LOG_WARN("number only function meet non-number data", K(ret)); - } else if ((j_path->get_last_node_type() == ObJsonPathNodeType::JPN_STR_ONLY - || j_path->get_last_node_type() == ObJsonPathNodeType::JPN_STRING) - && hits[0]->json_type() == ObJsonNodeType::J_NULL && !hits[0]->is_real_json_null(hits[0])) { - ret = OB_INVALID_NUMERIC; - is_type_cast = 1; - LOG_WARN("string only function meet non-string data", K(ret)); - } else if ((j_path->get_last_node_type() == ObJsonPathNodeType::JPN_DATE - || j_path->get_last_node_type() == ObJsonPathNodeType::JPN_TIMESTAMP) - && !hits[0]->is_json_date(hits[0]->json_type()) - && !hits[0]->is_json_string(hits[0]->json_type())){ - ret = OB_ERR_CONVERSION_FAIL; - LOG_WARN("data seek fail", K(ret)); - } else if (j_path->get_last_node_type() == ObJsonPathNodeType::JPN_DOUBLE && !hits[0]->is_json_number(hits[0]->json_type())) { - ret = OB_ERR_CONVERSION_FAIL; - LOG_WARN("data seek fail", K(ret)); - } else if ((j_path->get_last_node_type() == ObJsonPathNodeType::JPN_UPPER || j_path->get_last_node_type() == ObJsonPathNodeType::JPN_LOWER) - && hits[0]->json_type() == ObJsonNodeType::J_STRING && ((ObJsonString *)hits[0])->get_is_null_to_str()) { - is_null_result = true; - } - } - if (OB_SUCC(ret) && hits[0]->json_type() == ObJsonNodeType::J_NULL) { - is_null_result = true; + ObIJsonBase* data = hits[0]; + if (OB_FAIL(deal_item_method_in_seek(data, is_null_result, json_param->json_path_, + &temp_allocator, is_type_mismatch))) { + LOG_WARN("fail to deal item method and special case", K(ret)); + } else { + hits.set_node(0, data); } } else if (hits.size() == 0) { - if (OB_SUCC(ret)) { - switch (empty_type) { - case JSN_VALUE_ERROR -: { - is_cover_by_error = false; - if (lib::is_oracle_mode()) { - ret = OB_ERR_JSON_VALUE_NO_VALUE; - LOG_USER_ERROR(OB_ERR_JSON_VALUE_NO_VALUE); - } else { - ret = OB_ERR_MISSING_JSON_VALUE; - LOG_USER_ERROR(OB_ERR_MISSING_JSON_VALUE, "json_value"); - } - LOG_WARN("json value seek result empty.", K(hits.size())); - break; - } - case JSN_VALUE_DEFAULT: { - return_val = empty_datum; - break; - } - case JSN_VALUE_NULL: { - is_null_result = true; - break; - } - case JSN_VALUE_IMPLICIT: { - if (lib::is_oracle_mode()) { - ret = OB_ERR_JSON_VALUE_NO_VALUE; - LOG_USER_ERROR(OB_ERR_JSON_VALUE_NO_VALUE); - LOG_WARN("json value seek result empty.", K(hits.size())); - is_cover_by_error = true; - } else { - is_null_result = true; - } - break; - } - default: // empty_type from get_on_empty_or_error has done range check, do nothing for default - break; + // get empty clause + if (lib::is_oracle_mode() && OB_FAIL(get_default_empty_error_value(expr, json_param, ctx))) { + if (ret == OB_ERR_VALUE_LARGER_THAN_ALLOWED) { + is_cover_by_error = false; } + LOG_WARN("fail to get empty clause", K(ret)); + } else if (OB_FAIL(get_empty_option(return_val, is_cover_by_error, + json_param->empty_type_, + json_param->empty_val_, + is_null_result))) { + LOG_WARN("fail to get empty option", K(ret)); } } else if (hits.size() > 1) { // return val decide by error option - if (lib::is_mysql_mode()) { - switch (error_type) { - case JSN_VALUE_ERROR: { - ret = OB_ERR_MULTIPLE_JSON_VALUES; - LOG_USER_ERROR(OB_ERR_MULTIPLE_JSON_VALUES, "json_value"); - LOG_WARN("json value seek result more than one.", K(hits.size())); - break; - } - case JSN_VALUE_DEFAULT: { - return_val = error_datum; - break; - } - case JSN_VALUE_NULL: - case JSN_VALUE_IMPLICIT: { - is_null_result = true; - break; - } - default: // error_type from get_on_empty_or_error has done range check, do nothing for default - break; - } - } else { - ret = OB_ERR_MULTIPLE_JSON_VALUES; - LOG_USER_ERROR(OB_ERR_MULTIPLE_JSON_VALUES, "json_value"); - LOG_WARN("json value seek result more than one.", K(hits.size())); - } + ret = OB_ERR_MULTIPLE_JSON_VALUES; + LOG_USER_ERROR(OB_ERR_MULTIPLE_JSON_VALUES, "json_value"); + LOG_WARN("json value seek result more than one.", K(hits.size())); } else if (hits[0]->json_type() == ObJsonNodeType::J_NULL) { is_null_result = true; } @@ -817,1006 +768,152 @@ int ObExprJsonValue::doc_do_seek(ObJsonBaseVector &hits, bool &is_null_result, O return ret; } -int ObExprJsonValue::get_accuracy_internal( - ObEvalCtx& ctx, - ObAccuracy &accuracy, - ObObjType &dest_type, - const int64_t value, - const ObLengthSemantics &length_semantics) +int ObExprJsonValue::get_empty_option(ObDatum *&empty_res, + bool &is_cover_by_error, + int8_t empty_type, + ObDatum *empty_datum, + bool &is_null_result) { INIT_SUCC(ret); - ParseNode node; - node.value_ = value; - dest_type = static_cast(node.int16_values_[0]); - - if (ObFloatType == dest_type) { - // boundaries already checked in calc result type - if (node.int16_values_[OB_NODE_CAST_N_PREC_IDX] > OB_MAX_FLOAT_PRECISION) { - dest_type = ObDoubleType; - } - } - ObObjTypeClass dest_tc = ob_obj_type_class(dest_type); - if (ObStringTC == dest_tc) { - // parser will abort all negative number - // if length < 0 means DEFAULT_STR_LENGTH or OUT_OF_STR_LEN. - accuracy.set_full_length(node.int32_values_[1], length_semantics, - lib::is_oracle_mode()); - } else if (ObRawTC == dest_tc) { - accuracy.set_length(node.int32_values_[1]); - } else if(ObTextTC == dest_tc || ObJsonTC == dest_tc) { - accuracy.set_length(node.int32_values_[1] < 0 ? - ObAccuracy::DDL_DEFAULT_ACCURACY[dest_type].get_length() : node.int32_values_[1]); - } else if (ObIntervalTC == dest_tc) { - if (OB_UNLIKELY(!ObIntervalScaleUtil::scale_check(node.int16_values_[3]) || - !ObIntervalScaleUtil::scale_check(node.int16_values_[2]))) { - ret = OB_ERR_DATETIME_INTERVAL_PRECISION_OUT_OF_RANGE; - LOG_WARN("Invalid scale.", K(ret), K(node.int16_values_[3]), K(node.int16_values_[2])); - } else { - ObScale scale = (dest_type == ObIntervalYMType) ? - ObIntervalScaleUtil::interval_ym_scale_to_ob_scale( - static_cast(node.int16_values_[3])) - : ObIntervalScaleUtil::interval_ds_scale_to_ob_scale( - static_cast(node.int16_values_[2]), - static_cast(node.int16_values_[3])); - accuracy.set_scale(scale); - } - } else { - const ObAccuracy &def_acc = - ObAccuracy::DDL_DEFAULT_ACCURACY2[lib::is_oracle_mode()][dest_type]; - if (ObNumberType == dest_type && 0 == node.int16_values_[2]) { - accuracy.set_precision(def_acc.get_precision()); - } else { - accuracy.set_precision(node.int16_values_[2]); - } - accuracy.set_scale(node.int16_values_[3]); - if (lib::is_oracle_mode() && ObDoubleType == dest_type) { - accuracy.set_accuracy(def_acc.get_precision()); - } - if (ObNumberType == dest_type - && is_decimal_int_accuracy_valid(accuracy.get_precision(), accuracy.get_scale())) { - bool enable_decimalint = false; - if (OB_ISNULL(ctx.exec_ctx_.get_my_session())) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("type_ctx.get_session() is null", K(ret)); - } else if (OB_FAIL(ObSQLUtils::check_enable_decimalint(ctx.exec_ctx_.get_my_session(), - enable_decimalint))) { - LOG_WARN("fail to check_enable_decimalint_type", - K(ret), K(ctx.exec_ctx_.get_my_session()->get_effective_tenant_id())); - } else if (enable_decimalint) { - dest_type = ObDecimalIntType; - } - } - } - return ret; -} - -int ObExprJsonValue::get_accuracy(const ObExpr &expr, - ObEvalCtx &ctx, - ObAccuracy &accuracy, - ObObjType &dest_type, - bool &is_cover_by_error) -{ - INIT_SUCC(ret); - ObDatum *dst_type_dat = NULL; - - if (OB_ISNULL(expr.args_) || OB_ISNULL(expr.args_[2])) { - ret = OB_ERR_UNEXPECTED; - is_cover_by_error = false; - LOG_WARN("unexpected expr", K(ret), K(expr.arg_cnt_), KP(expr.args_)); - } else if (OB_FAIL(expr.args_[2]->eval(ctx, dst_type_dat))) { - is_cover_by_error = false; - LOG_WARN("eval dst type datum failed", K(ret)); - } else { - ret = get_accuracy_internal(ctx, - accuracy, - dest_type, - dst_type_dat->get_int(), - expr.datum_meta_.length_semantics_); - } - - return ret; -} - -int ObExprJsonValue::number_range_check(const ObAccuracy &accuracy, - ObIAllocator *allocator, - number::ObNumber &val, - bool strict) -{ - INIT_SUCC(ret); - ObPrecision precision = accuracy.get_precision(); - ObScale scale = accuracy.get_scale(); - const number::ObNumber *min_check_num = NULL; - const number::ObNumber *max_check_num = NULL; - const number::ObNumber *min_num_mysql = NULL; - const number::ObNumber *max_num_mysql = NULL; - bool is_finish = false; - if (lib::is_oracle_mode()) { - if (OB_MAX_NUMBER_PRECISION >= precision - && precision >= OB_MIN_NUMBER_PRECISION - && number::ObNumber::MAX_SCALE >= scale - && scale >= number::ObNumber::MIN_SCALE) { - min_check_num = &(ObNumberConstValue::ORACLE_CHECK_MIN[precision][scale + ObNumberConstValue::MAX_ORACLE_SCALE_DELTA]); - max_check_num = &(ObNumberConstValue::ORACLE_CHECK_MAX[precision][scale + ObNumberConstValue::MAX_ORACLE_SCALE_DELTA]); - } else if (ORA_NUMBER_SCALE_UNKNOWN_YET == scale - && PRECISION_UNKNOWN_YET == precision) { - is_finish = true; - } else if (PRECISION_UNKNOWN_YET == precision - && number::ObNumber::MAX_SCALE >= scale - && scale >= number::ObNumber::MIN_SCALE) { - number::ObNumber num; - if (OB_FAIL(num.from(val, *allocator))) { - } else if (OB_FAIL(num.round(scale))) { - } else if (val.compare(num) != 0) { - ret = OB_OPERATE_OVERFLOW; - LOG_WARN("input value is out of range.", K(scale), K(val)); - } else { - is_finish = true; - } - } else { - ret = OB_INVALID_ARGUMENT; - LOG_WARN("invalid arguments", K(ret), K(precision), K(scale)); - } - } else { - if (OB_UNLIKELY(precision < scale)) { - ret = OB_ERR_M_BIGGER_THAN_D; - LOG_WARN("Invalid accuracy.", K(ret), K(scale), K(precision)); - } else if (number::ObNumber::MAX_PRECISION >= precision - && precision >= OB_MIN_DECIMAL_PRECISION - && number::ObNumber::MAX_SCALE >= scale - && scale >= 0) { - min_check_num = &(ObNumberConstValue::MYSQL_CHECK_MIN[precision][scale]); - max_check_num = &(ObNumberConstValue::MYSQL_CHECK_MAX[precision][scale]); - min_num_mysql = &(ObNumberConstValue::MYSQL_MIN[precision][scale]); - max_num_mysql = &(ObNumberConstValue::MYSQL_MAX[precision][scale]); - } else { - ret = OB_INVALID_ARGUMENT; - LOG_WARN("invalid arguments", K(ret), K(precision), K(scale)); - } - } - if (OB_SUCC(ret) && !is_finish) { - if (OB_ISNULL(min_check_num) || OB_ISNULL(max_check_num) - || (!lib::is_oracle_mode() - && (OB_ISNULL(min_num_mysql) || OB_ISNULL(max_num_mysql)))) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("min_num or max_num is null", K(ret), KPC(min_check_num), KPC(max_check_num)); - } else if (val <= *min_check_num) { + switch (empty_type) { + case JSN_VALUE_ERROR: { + is_cover_by_error = false; if (lib::is_oracle_mode()) { - ret = OB_ERR_VALUE_LARGER_THAN_ALLOWED; + ret = OB_ERR_JSON_VALUE_NO_VALUE; + LOG_USER_ERROR(OB_ERR_JSON_VALUE_NO_VALUE); } else { - ret = OB_OPERATE_OVERFLOW; + ret = OB_ERR_MISSING_JSON_VALUE; + LOG_USER_ERROR(OB_ERR_MISSING_JSON_VALUE, "json_value"); } - LOG_WARN("val is out of min range check.", K(val), K(*min_check_num)); - is_finish = true; - } else if (val >= *max_check_num) { + LOG_WARN("json value seek result empty.", K(ret)); + break; + } + case JSN_VALUE_DEFAULT: { + empty_res = empty_datum; + break; + } + case JSN_VALUE_NULL: { + is_null_result = true; + break; + } + case JSN_VALUE_IMPLICIT: { if (lib::is_oracle_mode()) { - ret = OB_ERR_VALUE_LARGER_THAN_ALLOWED; + ret = OB_ERR_JSON_VALUE_NO_VALUE; + LOG_USER_ERROR(OB_ERR_JSON_VALUE_NO_VALUE); + LOG_WARN("json value seek result empty.", K(ret)); + is_cover_by_error = true; } else { - ret = OB_OPERATE_OVERFLOW; - } - LOG_WARN("val is out of max range check.", K(val), K(*max_check_num)); - is_finish = true; - } else { - ObNumStackOnceAlloc tmp_alloc; - number::ObNumber num; - if (OB_FAIL(num.from(val, tmp_alloc))) { - } else if (OB_FAIL(num.round(scale))) { - LOG_WARN("num.round failed", K(ret), K(scale)); - } else { - if (strict) { - if (num.compare(val) != 0) { - ret = OB_OPERATE_OVERFLOW; - LOG_WARN("input value is out of range.", K(scale), K(val)); - } else { - is_finish = true; - } - } else { - if (OB_ISNULL(allocator)) { - ret = OB_ERR_NULL_VALUE; - LOG_WARN("allocator is null", K(ret)); - } else if (OB_FAIL(val.deep_copy_v3(num, *allocator))) { - LOG_WARN("val.deep_copy_v3 failed", K(ret), K(num)); - } else { - is_finish = true; - } - } - } - } - } - if (OB_SUCC(ret) && !is_finish) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("unexpected situation, res is not set", K(ret)); - } - LOG_DEBUG("number_range_check_v2 done", K(ret), K(is_finish), K(accuracy), K(val), - KPC(min_check_num), KPC(max_check_num)); - - return ret; -} - -int ObExprJsonValue::datetime_scale_check(const ObAccuracy &accuracy, - int64_t &value, - bool strict) -{ - INIT_SUCC(ret); - ObScale scale = accuracy.get_scale(); - - if (OB_UNLIKELY(scale > MAX_SCALE_FOR_TEMPORAL)) { - ret = OB_ERR_TOO_BIG_PRECISION; - LOG_USER_ERROR(OB_ERR_TOO_BIG_PRECISION, scale, "CAST", - static_cast(MAX_SCALE_FOR_TEMPORAL)); - } else if (OB_UNLIKELY(0 <= scale && scale < MAX_SCALE_FOR_TEMPORAL)) { - // first check zero - if (strict && - (value == ObTimeConverter::ZERO_DATE || - value == ObTimeConverter::ZERO_DATETIME)) { - ret = OB_INVALID_DATE_VALUE; - LOG_WARN("Zero datetime is invalid in json_value.", K(value)); - } else { - int64_t temp_value = value; - ObTimeConverter::round_datetime(scale, temp_value); - if (strict && temp_value != value) { - ret = OB_OPERATE_OVERFLOW; - LOG_WARN("Invalid input value.", K(value), K(scale)); - } else if (ObTimeConverter::is_valid_datetime(temp_value)) { - value = temp_value; - } else { - ret = OB_ERR_NULL_VALUE; // set null for res - LOG_DEBUG("Invalid datetime val, return set_null", K(temp_value)); - } - } - } - - return ret; -} - -int ObExprJsonValue::time_scale_check(const ObAccuracy &accuracy, int64_t &value, bool strict) -{ - INIT_SUCC(ret); - ObScale scale = accuracy.get_scale(); - - if (OB_LIKELY(0 <= scale && scale < MAX_SCALE_FOR_TEMPORAL)) { - int64_t temp_value = value; - ObTimeConverter::round_datetime(scale, temp_value); - if (strict && temp_value != value) { // round success - ret = OB_OPERATE_OVERFLOW; - LOG_WARN("Invalid input value.", K(value), K(scale)); - } else { - value = temp_value; - } - } - - return ret; -} - -int ObExprJsonValue::get_cast_ret(int ret) -{ - // compatibility for old ob - if (OB_UNLIKELY(OB_ERR_UNEXPECTED_TZ_TRANSITION == ret) || - OB_UNLIKELY(OB_ERR_UNKNOWN_TIME_ZONE == ret)) { - ret = OB_INVALID_DATE_VALUE; - } - - return ret; -} - -int ObExprJsonValue::cast_to_int(ObIJsonBase *j_base, ObObjType dst_type, int64_t &val) -{ - INIT_SUCC(ret); - - if (OB_ISNULL(j_base)) { - ret = OB_ERR_NULL_VALUE; - LOG_WARN("json base is null", K(ret)); - } else if (CAST_FAIL(j_base->to_int(val, true))) { - ret = OB_OPERATE_OVERFLOW; - LOG_USER_ERROR(OB_OPERATE_OVERFLOW, "SIGNED", "json_value"); - LOG_WARN("cast to int failed", K(ret), K(*j_base)); - } else if (dst_type < ObIntType && - CAST_FAIL(int_range_check(dst_type, val, val))) { - ret = OB_OPERATE_OVERFLOW; - LOG_USER_ERROR(OB_OPERATE_OVERFLOW, "SIGNED", "json_value"); - } - - return ret; -} - -int ObExprJsonValue::cast_to_uint(ObIJsonBase *j_base, ObObjType dst_type, uint64_t &val) -{ - INIT_SUCC(ret); - - if (OB_ISNULL(j_base)) { - ret = OB_ERR_NULL_VALUE; - LOG_WARN("json base is null", K(ret)); - } else if (CAST_FAIL(j_base->to_uint(val, true, true))) { - LOG_WARN("cast to uint failed", K(ret), K(*j_base)); - if (ret == OB_OPERATE_OVERFLOW) { - LOG_USER_ERROR(OB_OPERATE_OVERFLOW, "UNSIGNED", "json_value"); - } - } else if (dst_type < ObUInt64Type && - CAST_FAIL(uint_upper_check(dst_type, val))) { - LOG_WARN("uint_upper_check failed", K(ret)); - } - - return ret; -} - -int ObExprJsonValue::cast_to_datetime(ObIJsonBase *j_base, - common::ObIAllocator *allocator, - const ObBasicSessionInfo *session, - ObEvalCtx &ctx, - const ObExpr *expr, - common::ObAccuracy &accuracy, - int64_t &val, - uint8_t &is_type_cast) -{ - INIT_SUCC(ret); - ObString json_string; - if (OB_ISNULL(session)) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("session is NULL", K(ret)); - } else { - oceanbase::common::ObTimeConvertCtx cvrt_ctx(session->get_timezone_info(), false); - if (lib::is_oracle_mode()) { - if (OB_FAIL(common_get_nls_format(session, ctx, expr, ObDateTimeType, - true, - cvrt_ctx.oracle_nls_format_))) { - LOG_WARN("common_get_nls_format failed", K(ret)); - } else if (OB_ISNULL(j_base)) { - ret = OB_ERR_NULL_VALUE; - LOG_WARN("json base is null", K(ret)); - } else if (type_cast_to_string(json_string, allocator, j_base, accuracy) && json_string.length() > 0) { - ObJsonString json_str(json_string.ptr(),json_string.length()); - if (CAST_FAIL(json_str.to_datetime(val, &cvrt_ctx))) { - is_type_cast = 1; - LOG_WARN("wrapper to datetime failed.", K(ret), K(*j_base)); - } - } else if (CAST_FAIL(j_base->to_datetime(val, &cvrt_ctx))) { - is_type_cast = 1; - LOG_WARN("wrapper to datetime failed.", K(ret), K(*j_base)); - } - if (OB_SUCC(ret) && CAST_FAIL(datetime_scale_check(accuracy, val))) { - LOG_WARN("datetime_scale_check failed.", K(ret)); - } - } else { - if (OB_ISNULL(j_base)) { - ret = OB_ERR_NULL_VALUE; - LOG_WARN("json base is null", K(ret)); - } else if (CAST_FAIL(j_base->to_datetime(val, &cvrt_ctx))) { - LOG_WARN("wrapper to datetime failed.", K(ret), K(*j_base)); - } else if (CAST_FAIL(datetime_scale_check(accuracy, val))) { - LOG_WARN("datetime_scale_check failed.", K(ret)); - } - } - - } - - return ret; -} - -int ObExprJsonValue::cast_to_otimstamp(ObIJsonBase *j_base, - const ObBasicSessionInfo *session, - ObEvalCtx &ctx, - const ObExpr *expr, - common::ObAccuracy &accuracy, - ObObjType dst_type, - ObOTimestampData &out_val, - uint8_t &is_type_cast) -{ - INIT_SUCC(ret); - int64_t val; - - oceanbase::common::ObTimeConvertCtx cvrt_ctx(NULL, dst_type == ObTimestampType); - if (OB_ISNULL(session)) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("session is NULL", K(ret)); - } else if (OB_ISNULL(j_base)) { - ret = OB_ERR_NULL_VALUE; - LOG_WARN("json base is null", K(ret)); - } else { - cvrt_ctx.tz_info_ = session->get_timezone_info(); - if (lib::is_oracle_mode()) { - if (OB_FAIL(common_get_nls_format(session, ctx, expr, - ObDateTimeType, true, - cvrt_ctx.oracle_nls_format_))) { - LOG_WARN("common_get_nls_format failed", K(ret)); - } - } - } - if (OB_SUCC(ret)) { - if (CAST_FAIL(j_base->to_datetime(val, &cvrt_ctx))) { - is_type_cast = 1; - LOG_WARN("wrapper to datetime failed.", K(ret), K(*j_base)); - } else { - ObScale scale = accuracy.get_scale(); - if (OB_FAIL(ObTimeConverter::odate_to_otimestamp(val, cvrt_ctx.tz_info_, dst_type, out_val))) { - is_type_cast = 1; - LOG_WARN("fail to timestamp_to_timestamp_tz", K(ret), K(val), K(dst_type)); - } else if (OB_UNLIKELY(0 <= scale && scale < MAX_SCALE_FOR_ORACLE_TEMPORAL)) { - ObOTimestampData ot_data = ObTimeConverter::round_otimestamp(scale, out_val); - if (ObTimeConverter::is_valid_otimestamp(ot_data.time_us_, - static_cast(ot_data.time_ctx_.tail_nsec_))) { - out_val = ot_data; - } else { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("invalid otimestamp, set it null ", K(ot_data), K(scale), "orig_date", out_val); - } - } - } - } - return ret; -} - -int ObExprJsonValue::cast_to_date(ObIJsonBase *j_base, int32_t &val, uint8_t &is_type_cast) -{ - INIT_SUCC(ret); - - if (OB_ISNULL(j_base)) { - ret = OB_ERR_NULL_VALUE; - LOG_WARN("json base is null", K(ret)); - } else if (j_base->json_type() == ObJsonNodeType::J_INT) { - ret = OB_OPERATE_OVERFLOW; - LOG_USER_ERROR(OB_OPERATE_OVERFLOW, "DATE", "json_value"); - LOG_WARN("fail to cast json type to time", K(ret), K(j_base->json_type())); - } else if (CAST_FAIL(j_base->to_date(val))) { - is_type_cast = 1; - LOG_WARN("wrapper to date failed.", K(ret), K(*j_base)); - ret = OB_OPERATE_OVERFLOW; - LOG_USER_ERROR(OB_OPERATE_OVERFLOW, "DATE", "json_value"); - } - - return ret; -} - -int ObExprJsonValue::cast_to_time(ObIJsonBase *j_base, - common::ObAccuracy &accuracy, - int64_t &val) -{ - INIT_SUCC(ret); - if (OB_ISNULL(j_base)) { - ret = OB_ERR_NULL_VALUE; - LOG_WARN("json base is null", K(ret)); - } else if (j_base->json_type() == ObJsonNodeType::J_INT) { - ret = OB_OPERATE_OVERFLOW; - LOG_USER_ERROR(OB_OPERATE_OVERFLOW, "TIME", "json_value"); - } else if (CAST_FAIL(j_base->to_time(val))) { - LOG_WARN("wrapper to time failed.", K(ret), K(*j_base)); - ret = OB_OPERATE_OVERFLOW; - LOG_USER_ERROR(OB_OPERATE_OVERFLOW, "TIME", "json_value"); - } else if (CAST_FAIL(time_scale_check(accuracy, val))) { - LOG_WARN("time_scale_check failed.", K(ret)); - } - - return ret; -} - -int ObExprJsonValue::cast_to_year(ObIJsonBase *j_base, uint8_t &val) -{ - INIT_SUCC(ret); - // Compatible with mysql. - // There is no year type in json binary, it is store as a full int. - // For example, 1901 is stored as 1901, not 01. - // in mysql 8.0, json is converted to int first, then converted to year. - // However, json value returning as different behavior to cast expr. - int64_t int_val; - const uint16 min_year = 1901; - const uint16 max_year = 2155; - - if (OB_ISNULL(j_base)) { - ret = OB_ERR_NULL_VALUE; - LOG_WARN("json base is null", K(ret)); - } else if (CAST_FAIL(j_base->to_int(int_val))) { - LOG_WARN("wrapper to year failed.", K(ret), K(*j_base)); - } else if (0 != int_val && (int_val < min_year || int_val > max_year)) { - // different with cast, if 0 < int val < 100, do not add base year - LOG_DEBUG("int out of year range", K(int_val)); - ret = OB_DATA_OUT_OF_RANGE; - } else if(CAST_FAIL(ObTimeConverter::int_to_year(int_val, val))) { - LOG_WARN("int to year failed.", K(ret), K(int_val)); - } - - return ret; -} - -int ObExprJsonValue::cast_to_float(ObIJsonBase *j_base, ObObjType dst_type, float &val) -{ - INIT_SUCC(ret); - double tmp_val; - - if (OB_ISNULL(j_base)) { - ret = OB_ERR_NULL_VALUE; - LOG_WARN("json base is null", K(ret)); - } else if (CAST_FAIL(j_base->to_double(tmp_val))) { - LOG_WARN("wrapper to date failed.", K(ret), K(*j_base)); - } else { - val = static_cast(tmp_val); - if (lib::is_mysql_mode() && CAST_FAIL(real_range_check(dst_type, tmp_val, val))) { - LOG_WARN("real_range_check failed", K(ret), K(tmp_val)); - } - } - - return ret; -} - -int ObExprJsonValue::cast_to_double(ObIJsonBase *j_base, ObObjType dst_type, double &val) -{ - INIT_SUCC(ret); - - if (OB_ISNULL(j_base)) { - ret = OB_ERR_NULL_VALUE; - LOG_WARN("json base is null", K(ret)); - } else if (CAST_FAIL(j_base->to_double(val))) { - LOG_WARN("wrapper to date failed.", K(ret), K(*j_base)); - } else if (ObUDoubleType == dst_type && CAST_FAIL(numeric_negative_check(val))) { - LOG_WARN("numeric_negative_check failed", K(ret), K(val)); - } - - return ret; -} - -int ObExprJsonValue::cast_to_number(common::ObIAllocator *allocator, - ObIJsonBase *j_base, - common::ObAccuracy &accuracy, - ObObjType dst_type, - number::ObNumber &val, - uint8_t &is_type_cast) -{ - INIT_SUCC(ret); - - if (OB_ISNULL(j_base)) { - ret = OB_ERR_NULL_VALUE; - LOG_WARN("json base is null", K(ret)); - } else if (CAST_FAIL(j_base->to_number(allocator, val))) { - is_type_cast = 1; - LOG_WARN("fail to cast json as decimal", K(ret)); - } else if (ObUNumberType == dst_type && CAST_FAIL(numeric_negative_check(val))) { - LOG_WARN("numeric_negative_check failed", K(ret), K(val)); - } else if (CAST_FAIL(number_range_check(accuracy, allocator, val))) { - LOG_WARN("number_range_check failed", K(ret), K(val)); - } - - return ret; -} - -int ObExprJsonValue::cast_to_string(common::ObIAllocator *allocator, - ObIJsonBase *j_base, - ObCollationType in_cs_type, - ObCollationType dst_cs_type, - common::ObAccuracy &accuracy, - ObObjType dst_type, - ObString &val, - uint8_t &is_type_cast, - uint8_t is_truncate) -{ - INIT_SUCC(ret); - - if (OB_ISNULL(j_base)) { - ret = OB_ERR_NULL_VALUE; - LOG_WARN("json base is null", K(ret)); - } else if (OB_ISNULL(allocator)) { - ret = OB_ERR_NULL_VALUE; - LOG_WARN("allocator is null", K(ret)); - } else { - ObJsonBuffer j_buf(allocator); - if (CAST_FAIL(j_base->print(j_buf, false))) { - is_type_cast = 1; - LOG_WARN("fail to_string as json", K(ret)); - } else { - ObObjType in_type = ObLongTextType; - ObString temp_str_val(j_buf.length(), j_buf.ptr()); - bool is_need_string_string_convert = ((CS_TYPE_BINARY == dst_cs_type) || - (ObCharset::charset_type_by_coll(in_cs_type) != - ObCharset::charset_type_by_coll(dst_cs_type))); - if (is_need_string_string_convert) { - if (CS_TYPE_BINARY != in_cs_type - && CS_TYPE_BINARY != dst_cs_type - && (ObCharset::charset_type_by_coll(in_cs_type) != - ObCharset::charset_type_by_coll(dst_cs_type))) { - char *buf = NULL; - int64_t buf_len = (temp_str_val.length() == 0 ? 1 : temp_str_val.length()) * ObCharset::CharConvertFactorNum; - uint32_t result_len = 0; - buf = reinterpret_cast(allocator->alloc(buf_len)); - if (OB_ISNULL(buf)) { - ret = OB_ALLOCATE_MEMORY_FAILED; - LOG_WARN("alloc memory failed", K(ret)); - } else if (OB_FAIL(ObCharset::charset_convert(in_cs_type, temp_str_val.ptr(), - temp_str_val.length(), dst_cs_type, buf, - buf_len, result_len))) { - LOG_WARN("charset convert failed", K(ret)); - } else { - val.assign_ptr(buf, result_len); - } - } else { - if (CS_TYPE_BINARY == in_cs_type || CS_TYPE_BINARY == dst_cs_type) { - // just copy string when in_cs_type or out_cs_type is binary - const ObCharsetInfo *cs = NULL; - int64_t align_offset = 0; - if (CS_TYPE_BINARY == in_cs_type && lib::is_mysql_mode() - && (NULL != (cs = ObCharset::get_charset(dst_cs_type)))) { - if (cs->mbminlen > 0 && temp_str_val.length() % cs->mbminlen != 0) { - align_offset = cs->mbminlen - temp_str_val.length() % cs->mbminlen; - } - } - int64_t len = align_offset + temp_str_val.length(); - char *buf = reinterpret_cast(allocator->alloc(len)); - if (OB_ISNULL(buf)) { - ret = OB_ALLOCATE_MEMORY_FAILED; - LOG_WARN("allocate memory failed", K(ret)); - } else { - MEMMOVE(buf + align_offset, temp_str_val.ptr(), len - align_offset); - MEMSET(buf, 0, align_offset); - val.assign_ptr(buf, len); - } - } else { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("same charset should not be here, just use cast_eval_arg", K(ret), - K(in_type), K(dst_type), K(in_cs_type), K(dst_cs_type)); - } - } - } else { - val.assign_ptr(temp_str_val.ptr(), temp_str_val.length()); - } - // do str length check - const int32_t str_len_char = static_cast(ObCharset::strlen_char(dst_cs_type, - val.ptr(), val.length())); - const ObLength max_accuracy_len = (lib::is_oracle_mode() && dst_type == ObLongTextType) ? OB_MAX_LONGTEXT_LENGTH : accuracy.get_length(); - if (OB_SUCC(ret)) { - if (max_accuracy_len == DEFAULT_STR_LENGTH) { // default string len - } else if (max_accuracy_len <= 0 || str_len_char > max_accuracy_len) { - if (str_len_char > max_accuracy_len && is_truncate && dst_type == ObVarcharType) { - val.assign_ptr(val.ptr(), max_accuracy_len); - } else { - ret = OB_OPERATE_OVERFLOW; - LOG_USER_ERROR(OB_OPERATE_OVERFLOW, "STRING", "json_value"); - } - } - } - } - } - - return ret; -} - -int ObExprJsonValue::cast_to_bit(ObIJsonBase *j_base, uint64_t &val) -{ - INIT_SUCC(ret); - - if (OB_ISNULL(j_base)) { - ret = OB_ERR_NULL_VALUE; - LOG_WARN("json base is null", K(ret)); - } else if (CAST_FAIL(j_base->to_bit(val))) { - LOG_WARN("fail get bit from json", K(ret)); - } - - return ret; -} - -int ObExprJsonValue::cast_to_json(common::ObIAllocator *allocator, - ObIJsonBase *j_base, ObString &val, uint8_t &is_type_cast) -{ - INIT_SUCC(ret); - - if (OB_ISNULL(j_base)) { - ret = OB_ERR_NULL_VALUE; - LOG_WARN("json base is null", K(ret)); - } else if (CAST_FAIL(j_base->get_raw_binary(val, allocator))) { - is_type_cast = 1; - LOG_WARN("failed to get raw binary", K(ret)); - } - - return ret; -} - -int ObExprJsonValue::cast_to_res(common::ObIAllocator *allocator, - const ObExpr &expr, - ObEvalCtx &ctx, - ObIJsonBase *j_base, - uint8_t error_type, - ObDatum *error_val, - ObAccuracy &accuracy, - ObObjType dst_type, - ObCollationType in_coll_type, - ObCollationType dst_coll_type, - ObDatum &res, - ObVector &mismatch_val, - ObVector &mismatch_type, - uint8_t &is_type_cast, - uint8_t ascii_type, - uint8_t is_truncate) -{ - INIT_SUCC(ret); - - switch (dst_type) { - case ObNullType : { - res.set_null(); - break; - } - case ObTinyIntType: - case ObSmallIntType: - case ObMediumIntType: - case ObInt32Type: - case ObIntType: { - int64_t val = 0; - ret = cast_to_int(j_base, dst_type, val); - if (!try_set_error_val(expr, ctx, res, ret, error_type, error_val, mismatch_val, mismatch_type, is_type_cast, accuracy, dst_type)) { - res.set_int(val); + is_null_result = true; } break; } - case ObUTinyIntType: - case ObUSmallIntType: - case ObUMediumIntType: - case ObUInt32Type: - case ObUInt64Type: { - uint64_t val = 0; - ret = cast_to_uint(j_base, dst_type, val); - if (!try_set_error_val(expr, ctx, res, ret, error_type, error_val, mismatch_val, mismatch_type, is_type_cast, accuracy, dst_type)) { - res.set_uint(val); - } + default: // empty_type from get_default_value has done range check, do nothing for default break; - } - case ObDateTimeType: { - int64_t val; - GET_SESSION() - { - ret = cast_to_datetime(j_base, allocator, session, ctx, &expr, accuracy, val, is_type_cast); - } - if (ret == OB_ERR_NULL_VALUE) { - res.set_null(); - } else if (!try_set_error_val(expr, ctx, res, ret, error_type, error_val, mismatch_val, mismatch_type, is_type_cast, accuracy, dst_type)) { - res.set_datetime(val); - } - break; - } - case ObTimestampNanoType: - case ObTimestampTZType: - case ObTimestampLTZType: - case ObTimestampType: { - ObOTimestampData val; - GET_SESSION() - { - ret = cast_to_otimstamp(j_base, session, ctx, &expr, accuracy, dst_type, val, is_type_cast); - } - if (!try_set_error_val(expr, ctx, res, ret, error_type, error_val, mismatch_val, mismatch_type, is_type_cast, accuracy, dst_type)) { - if (dst_type == ObTimestampTZType) { - res.set_otimestamp_tz(val); - } else { - res.set_otimestamp_tiny(val); - } - } - break; - } - case ObDateType: { - int32_t val; - ret = cast_to_date(j_base, val, is_type_cast); - if (!try_set_error_val(expr, ctx, res, ret, error_type, error_val, mismatch_val, mismatch_type, is_type_cast, accuracy, dst_type)) { - res.set_date(val); - } - break; - } - case ObTimeType: { - int64_t val = 0; - ret = cast_to_time(j_base, accuracy, val); - if (!try_set_error_val(expr, ctx, res, ret, error_type, error_val, mismatch_val, mismatch_type, is_type_cast, accuracy, dst_type)) { - res.set_time(val); - } - break; - } - case ObYearType: { - uint8_t val = 0; - ret = cast_to_year(j_base, val); - if (!try_set_error_val(expr, ctx, res, ret, error_type, error_val, mismatch_val, mismatch_type, is_type_cast, accuracy, dst_type)) { - res.set_year(val); - } - break; - } - case ObNumberFloatType: - case ObFloatType: - case ObUFloatType: { - float out_val = 0; - ret = cast_to_float(j_base, dst_type, out_val); - if (!try_set_error_val(expr, ctx, res, ret, error_type, error_val, mismatch_val, mismatch_type, is_type_cast, accuracy, dst_type)) { - res.set_float(out_val); - } - break; - } - case ObDoubleType: - case ObUDoubleType: { - double out_val = 0; - ret = cast_to_double(j_base, dst_type, out_val); - if (!try_set_error_val(expr, ctx, res, ret, error_type, error_val, mismatch_val, mismatch_type, is_type_cast, accuracy, dst_type)) { - res.set_double(out_val); - } - break; - } - case ObUNumberType: - case ObNumberType: { - number::ObNumber out_val; - ret = cast_to_number(allocator, j_base, accuracy, dst_type, out_val, is_type_cast); - if (!try_set_error_val(expr, ctx, res, ret, error_type, error_val, mismatch_val, mismatch_type, is_type_cast, accuracy, dst_type)) { - res.set_number(out_val); - } - break; - } - case ObDecimalIntType: { - // TODO:@xiaofeng.lby, modify this after support cast json into decimalint directly in json_decimalint - number::ObNumber temp_num; - ObDecimalInt *decint = nullptr; - int32_t int_bytes; - if (OB_FAIL(cast_to_number(allocator, j_base, accuracy, ObNumberType, temp_num, is_type_cast))) { - LOG_WARN("cast_to_number failed", K(ret)); - } else if (OB_FAIL(wide::from_number(temp_num, *allocator, accuracy.scale_, decint, int_bytes))) { - LOG_WARN("cast number to decimal int failed", K(ret)); - } - if (!try_set_error_val(expr, ctx, res, ret, error_type, error_val, mismatch_val, mismatch_type, is_type_cast, accuracy, ObNumberType)) { - const int len = wide::ObDecimalIntConstValue::get_int_bytes_by_precision(accuracy.precision_); - if (len < int_bytes) { - res.set_null(); - } else if (len > int_bytes) { - ObDecimalIntBuilder res_builder; - res_builder.from(decint, int_bytes); - res_builder.extend(len); - res.set_decimal_int(res_builder.get_decimal_int(), res_builder.get_int_bytes()); - } else { - res.set_decimal_int(decint, int_bytes); - } - } - break; - } - case ObVarcharType: - case ObRawType: - case ObNVarchar2Type: - case ObNCharType: - case ObCharType: - case ObTinyTextType: - case ObTextType : - case ObMediumTextType: - case ObHexStringType: - case ObLongTextType: { - ObString val; - ret = cast_to_string(allocator, j_base, in_coll_type, dst_coll_type, accuracy, dst_type, val, is_type_cast, is_truncate); - ObTextStringDatumResult text_result(expr.datum_meta_.type_, &expr, &ctx, &res); - if (OB_FAIL(ret)) { - } else if (ascii_type == 0) { - if (OB_FAIL(text_result.init(val.length()))) { - LOG_WARN("init lob result failed"); - } else if (OB_FAIL(text_result.append(val))) { - LOG_WARN("failed to append realdata", K(ret), K(val), K(text_result)); - } - } else { - char *buf = NULL; - int64_t buf_len = val.length() * ObCharset::MAX_MB_LEN * 2; - int64_t reserve_len = 0; - int32_t length = 0; + } + return ret; +} - if (OB_FAIL(text_result.init(buf_len))) { - LOG_WARN("init lob result failed"); - } else if (OB_FAIL(text_result.get_reserved_buffer(buf, reserve_len))) { - LOG_WARN("fail to get reserved buffer", K(ret)); - } else if (reserve_len != buf_len) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("get reserve len is invalid", K(ret), K(reserve_len), K(buf_len)); - } else if (OB_FAIL(ObJsonExprHelper::calc_asciistr_in_expr(val, expr.args_[0]->datum_meta_.cs_type_, - expr.datum_meta_.cs_type_, - buf, reserve_len, length))) { - LOG_WARN("fail to calc unistr", K(ret)); - } else if (OB_FAIL(text_result.lseek(length, 0))) { - LOG_WARN("text_result lseek failed", K(ret), K(text_result), K(length)); - } - } - if (!try_set_error_val(expr, ctx, res, ret, error_type, error_val, mismatch_val, mismatch_type, is_type_cast, accuracy, dst_type)) { - // old engine set same alloctor for wrapper, so we can use val without copy - text_result.set_result(); - } +void ObExprJsonValue::get_error_option(int8_t error_type, + bool &is_null, bool &has_default_val) +{ + switch (error_type) { + case JSN_VALUE_DEFAULT : { + has_default_val = true; break; } - case ObBitType: { - uint64_t out_val = 0; - ret = cast_to_bit(j_base, out_val); - if (!try_set_error_val(expr, ctx, res, ret, error_type, error_val, mismatch_val, mismatch_type, is_type_cast, accuracy, dst_type)) { - res.set_bit(out_val); - } + case JSN_VALUE_NULL : + case JSN_VALUE_IMPLICIT: { + is_null = true; break; } - case ObJsonType: { - ObString out_val; - ret = cast_to_json(allocator, j_base, out_val, is_type_cast); - ObTextStringDatumResult text_result(expr.datum_meta_.type_, &expr, &ctx, &res); - if (OB_SUCC(ret)) { - if (OB_FAIL(text_result.init(out_val.length()))) { - LOG_WARN("init lob result failed"); - } else if (OB_FAIL(text_result.append(out_val))) { - LOG_WARN("failed to append realdata", K(ret), K(out_val), K(text_result)); - } - } - if (!try_set_error_val(expr, ctx, res, ret, error_type, error_val, mismatch_val, mismatch_type, is_type_cast, accuracy, dst_type)) { - text_result.set_result(); - } + case JSN_VALUE_ERROR : { break; } default: { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("unexpected dst_type", K(dst_type)); - try_set_error_val(expr, ctx, res, ret, error_type, error_val, mismatch_val, mismatch_type, is_type_cast, accuracy, dst_type); break; } } - LOG_DEBUG("finish cast_to_res.", K(ret), K(dst_type), K(error_type)); - - return ret; } -template bool ObExprJsonValue::try_set_error_val(const ObExpr &expr, ObEvalCtx &ctx, - Obj &res, int &ret, uint8_t &error_type, - Obj *&error_val, ObVector &mismatch_val, - ObVector &mismatch_type, - uint8_t &is_type_cast, - const ObAccuracy &accuracy, ObObjType dst_type) + ObDatum &res, int &ret, + ObJsonExprParam* json_param, + uint8_t &is_type_mismatch) { bool has_set_res = true; - bool mismatch_error = true; bool is_null_res = false; bool set_default_val = false; - - bool is_cover_by_error = true; - ObObjType default_val_type; + int temp_ret = OB_SUCCESS; if (OB_FAIL(ret)) { - int temp_ret = 0; - if (lib::is_oracle_mode() && error_type == JSN_VALUE_IMPLICIT) { - temp_ret = get_on_empty_or_error(expr, ctx, JSN_VAL_ERROR, is_cover_by_error, accuracy, error_type, &error_val, dst_type, default_val_type); + if (lib::is_oracle_mode() && json_param->error_type_ == JSN_VALUE_DEFAULT) { + if (!json_param->is_error_default_const_ || OB_ISNULL(json_param->error_val_)) { + temp_ret = get_default_value(expr.args_[JSN_VAL_ERROR + 1], ctx, + json_param->accuracy_, &json_param->error_val_); + } } - if (temp_ret != OB_SUCCESS && !is_cover_by_error) { + if (temp_ret != OB_SUCCESS) { ret = temp_ret; LOG_WARN("failed to get error option.", K(temp_ret)); } else { - if (error_type == JSN_VALUE_DEFAULT) { - set_default_val = true; - } else if (error_type == JSN_VALUE_NULL || error_type == JSN_VALUE_IMPLICIT) { - is_null_res = true; - } - - if (lib::is_oracle_mode() && is_type_cast == 1) { - for(size_t i = 0; i < mismatch_val.size(); i++) { // 目前不支持UDT,因此只考虑第一个参数中的 error 和 null。 - if (mismatch_val[i] == JSN_QUERY_MISMATCH_ERROR) { - mismatch_error = false; - } else if (mismatch_val[i] == JSN_VALUE_MISMATCH_NULL || mismatch_val[i] == JSN_VALUE_MISMATCH_IGNORE) { - is_null_res = true; - } - } - if (mismatch_error) { - if (is_null_res) { - set_default_val = false; - } - } else { - is_null_res = false; - set_default_val = false; - } + get_error_option(json_param->error_type_, is_null_res, set_default_val); + if (lib::is_oracle_mode() && is_type_mismatch == 1) { + get_mismatch_option(json_param->on_mismatch_, + json_param->on_mismatch_type_, + is_null_res, + set_default_val); } } - if (is_null_res) { + if (temp_ret != OB_SUCCESS) { + } else if (is_null_res) { res.set_null(); ret = OB_SUCCESS; - } else if (set_default_val && OB_NOT_NULL(error_val)) { - set_val(res, error_val); - ret = OB_SUCCESS; - if (lib::is_oracle_mode() && OB_FAIL(check_default_val_accuracy(accuracy, default_val_type, error_val))) { - LOG_WARN("default val check fail", K(ret)); + } else if (set_default_val) { + if (OB_ISNULL(json_param->error_val_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("fail to get error val", K(ret)); + } else { + set_val(res, json_param->error_val_); + ret = OB_SUCCESS; } } } else { has_set_res = false; } - return has_set_res; } +void ObExprJsonValue::get_mismatch_option(ObIArray &mismatch_val, + ObIArray &mismatch_type, + bool &is_null_res, + bool &set_default_val) +{ + bool mismatch_error = true; + for(size_t i = 0; i < mismatch_val.count(); i++) { // 目前不支持UDT,因此只考虑第一个参数中的 error 和 null。 + if (mismatch_val.at(i) == OB_JSON_ON_MISMATCH_ERROR) { + mismatch_error = false; + } else if (mismatch_val.at(i) == OB_JSON_ON_MISMATCH_NULL || mismatch_val.at(i) == OB_JSON_ON_MISMATCH_IGNORE) { + is_null_res = true; + } + } + if (mismatch_error) { + if (is_null_res) { + set_default_val = false; + } + } else { + is_null_res = false; + set_default_val = false; + } +} + int ObExprJsonValue::get_on_mismatch(const ObExpr &expr, ObEvalCtx &ctx, uint8_t index, bool &is_cover_by_error, const ObAccuracy &accuracy, - ObVector &val, - ObVector &type) + ObIArray &val, + ObIArray &type) { INIT_SUCC(ret); @@ -1825,7 +922,7 @@ int ObExprJsonValue::get_on_mismatch(const ObExpr &expr, ObDatum *json_datum = NULL; uint32_t expr_count = expr.arg_cnt_; - int32_t pos = -1; + uint32_t pos = -1; for(uint32_t i = index; OB_SUCC(ret) && i < expr_count; i++) { json_arg = expr.args_[i]; @@ -1833,498 +930,80 @@ int ObExprJsonValue::get_on_mismatch(const ObExpr &expr, if (OB_FAIL(json_arg->eval(ctx, json_datum))) { is_cover_by_error = false; LOG_WARN("eval json arg failed", K(ret)); - } else if (val_type != ObIntType) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("input type error", K(val_type), K(ret)); } else { int64_t option_type = json_datum->get_int(); - if (option_type >= JSN_QUERY_MISMATCH_ERROR && - option_type <= JSN_VALUE_MISMATCH_IMPLICIT) { - pos ++; - if (OB_FAIL(val.push_back(static_cast(option_type)))) { - LOG_WARN("mismtach add fail", K(ret)); - } else if (OB_FAIL(type.push_back(0))) { - LOG_WARN("mismatch option add fail", K(ret)); - } - } else if (option_type >= OB_JSON_TYPE_MISSING_DATA && - option_type <= OB_JSON_TYPE_DOT) { - uint8_t old_value = 0; - switch(option_type) { - case OB_JSON_TYPE_MISSING_DATA :{ - type.replace(type.begin() + pos, (type.at(pos) | 1), old_value); - break; - } - case OB_JSON_TYPE_EXTRA_DATA :{ - type.replace(type.begin() + pos, (type.at(pos) | 2), old_value); - break; - } - case OB_JSON_TYPE_TYPE_ERROR :{ - type.replace(type.begin() + pos, (type.at(pos) | 4), old_value); - break; - } - default :{ - break; - } - } - } else { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("input option type error", K(option_type), K(ret)); + if (OB_FAIL(ObJsonUtil::set_mismatch_val(val, type, option_type, pos))) { + LOG_WARN("fail to set mismatch value", K(ret)); } } } return ret; } -// get clause int type -int ObExprJsonValue::get_on_ascii(const ObExpr &expr, - ObEvalCtx &ctx, - uint8_t index, - bool &is_cover_by_error, - uint8 &type) + +int ObExprJsonValue::get_default_value(ObExpr *expr, + ObEvalCtx &ctx, + const ObAccuracy &accuracy, + ObDatum **default_value) { INIT_SUCC(ret); - bool pre_check_flag = false; - ObExpr *json_arg = expr.args_[index]; - ObObjType val_type = json_arg->datum_meta_.type_; + ObObjType val_type = expr->datum_meta_.type_; ObDatum *json_datum = NULL; - if (OB_FAIL(json_arg->eval(ctx, json_datum))) { - is_cover_by_error = false; + if (lib::is_mysql_mode()) { + expr->extra_ &= ~CM_WARN_ON_FAIL; // make cast return error when fail + expr->extra_ &= ~CM_NO_RANGE_CHECK; // make cast check range + expr->extra_ &= ~CM_STRING_INTEGER_TRUNC; // make cast check range when string to uint + expr->extra_ |= CM_ERROR_ON_SCALE_OVER; // make cast check presion and scale + expr->extra_ |= CM_EXPLICIT_CAST; // make cast json fail return error + } + if (OB_FAIL(expr->eval(ctx, json_datum))) { LOG_WARN("eval json arg failed", K(ret)); - } else if (val_type != ObIntType) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("input type error", K(val_type)); + } else if (val_type == ObNullType || json_datum->is_null()) { + } else if (OB_FAIL(check_default_val_accuracy(accuracy, val_type, json_datum))) { + LOG_WARN("failed check default value", K(ret)); } else { - int64_t option_type = json_datum->get_int(); - if (option_type < OB_JSON_ON_ASCII_IMPLICIT || - option_type > OB_JSON_ON_ASCII_USE) { + *default_value = json_datum; + } + if (ret == OB_OPERATE_OVERFLOW) { + if (val_type >= ObDateTimeType && val_type <= ObYearType) { + LOG_USER_ERROR(OB_OPERATE_OVERFLOW, "TIME DEFAULT", "json_value"); + } else if (val_type == ObNumberType || val_type == ObUNumberType || val_type == ObDecimalIntType) { + LOG_USER_ERROR(OB_OPERATE_OVERFLOW, "DECIMAL DEFAULT", "json_value"); + } + } + return ret; +} + +int ObExprJsonValue::deal_item_method_in_seek(ObIJsonBase*& in, + bool &is_null_result, + ObJsonPath *j_path,common::ObIAllocator *allocator, + uint8_t &is_type_mismatch) +{ + INIT_SUCC(ret); + + if (in->json_type() == ObJsonNodeType::J_OBJECT + || in->json_type() == ObJsonNodeType::J_ARRAY) { + ret = OB_ERR_JSON_VALUE_NO_SCALAR; + } else if (j_path->is_last_func()) { + ObJsonUtil::ObItemMethodValid eval_func_ = ObJsonUtil::get_item_method_cast_res_func(j_path, in); + if (OB_ISNULL(eval_func_)) { ret = OB_ERR_UNEXPECTED; - LOG_WARN("input option type error", K(option_type)); - } else { - type = static_cast(option_type); + LOG_WARN("eval func can not be null", K(ret)); + } else if (OB_FAIL(((ObJsonUtil::ObItemMethodValid)(eval_func_))(in, is_null_result, allocator, is_type_mismatch))) { + LOG_WARN("fail to deal item method and seek result", K(ret)); } } - return ret; -} - -// get_on_empty_or_error(expr, ctx, 3, is_cover_by_error, accuracy, empty_type, &empty_datum); -int ObExprJsonValue::get_on_empty_or_error(const ObExpr &expr, - ObEvalCtx &ctx, - uint8_t index, - bool &is_cover_by_error, - const ObAccuracy &accuracy, - uint8_t &type, - ObDatum **default_value, - ObObjType dst_type, - ObObjType &default_val_type) -{ - INIT_SUCC(ret); - bool pre_check_flag = false; - ObExpr *json_arg = expr.args_[index]; - ObObjType val_type = json_arg->datum_meta_.type_; - ObDatum *json_datum = NULL; - if (OB_FAIL(json_arg->eval(ctx, json_datum))) { - is_cover_by_error = false; - LOG_WARN("eval json arg failed", K(ret)); - } else if (val_type != ObIntType) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("input type error", K(val_type)); - } else { - int64_t option_type = json_datum->get_int(); - if (option_type < JSN_VALUE_ERROR || - option_type > JSN_VALUE_IMPLICIT) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("input option type error", K(option_type)); - } else { - type = static_cast(option_type); - } - } - json_arg = expr.args_[index + 2]; - val_type = json_arg->datum_meta_.type_; - if (OB_SUCC(ret) && index != JSN_VAL_ERROR) { - if (lib::is_oracle_mode() - && (val_type == ObCharType || val_type == ObNumberType || val_type == ObDecimalIntType)) { - if (OB_FAIL(json_arg->eval(ctx, json_datum))) { - is_cover_by_error = false; - LOG_WARN("pre eval json arg failed", K(ret)); - } else { - ObString in_str(json_datum->len_, json_datum->ptr_); - if (OB_FAIL(ObJsonExprHelper::pre_default_value_check(dst_type, in_str, val_type))) { - is_cover_by_error = false; - if (ret == OB_ERR_INVALID_DEFAULT_VALUE_PROVIDED) { - pre_check_flag = true; - ret = 0; - } else { - LOG_WARN("default value pre check fail", K(ret), K(in_str)); - } - } - } - } - } - if (OB_SUCC(ret)) { - json_arg = expr.args_[index + 1]; - val_type = json_arg->datum_meta_.type_; - default_val_type = val_type; - json_arg->extra_ &= ~CM_WARN_ON_FAIL; // make cast return error when fail - json_arg->extra_ &= ~CM_NO_RANGE_CHECK; // make cast check range - json_arg->extra_ &= ~CM_STRING_INTEGER_TRUNC; // make cast check range when string to uint - json_arg->extra_ |= CM_ERROR_ON_SCALE_OVER; // make cast check presion and scale - json_arg->extra_ |= CM_EXPLICIT_CAST; // make cast json fail return error - if (OB_FAIL(json_arg->eval(ctx, json_datum))) { - is_cover_by_error = false; - LOG_WARN("eval json arg failed", K(ret)); - } else if (val_type == ObNullType || json_datum->is_null()) { - } else if ((lib::is_mysql_mode() || index == JSN_VAL_EMPTY) && OB_FAIL(check_default_val_accuracy(accuracy, val_type, json_datum))) { - is_cover_by_error = false; - } else { - *default_value = json_datum; - } - if (ret == OB_OPERATE_OVERFLOW) { - if (val_type >= ObDateTimeType && val_type <= ObYearType) { - LOG_USER_ERROR(OB_OPERATE_OVERFLOW, "TIME DEFAULT", "json_value"); - } else if (val_type == ObNumberType || val_type == ObUNumberType || val_type == ObDecimalIntType) { - LOG_USER_ERROR(OB_OPERATE_OVERFLOW, "DECIMAL DEFAULT", "json_value"); - } - } - } - if (pre_check_flag) { - ret = OB_ERR_INVALID_DEFAULT_VALUE_PROVIDED; - } - return ret; -} - -int ObExprJsonValue::get_cast_type( - const ObExprResType param_type2, - ObExprResType &dst_type, - ObExprTypeCtx &type_ctx) const -{ - INIT_SUCC(ret); - - if (!param_type2.is_int() && !param_type2.get_param().is_int()) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("cast param type is unexpected", K(param_type2)); - } else { - const ObObj ¶m = param_type2.get_param(); - ParseNode parse_node; - parse_node.value_ = param.get_int(); - ObObjType obj_type = static_cast(parse_node.int16_values_[OB_NODE_CAST_TYPE_IDX]); - dst_type.set_collation_type(static_cast(parse_node.int16_values_[OB_NODE_CAST_COLL_IDX])); - dst_type.set_type(obj_type); - if (ob_is_string_type(obj_type) || ob_is_lob_locator(obj_type)) { - // cast(x as char(10)) or cast(x as binary(10)) - dst_type.set_full_length(parse_node.int32_values_[OB_NODE_CAST_C_LEN_IDX], - param_type2.get_accuracy().get_length_semantics()); - } else if (ob_is_raw(obj_type)) { - dst_type.set_length(parse_node.int32_values_[OB_NODE_CAST_C_LEN_IDX]); - } else if (ObFloatType == dst_type.get_type()) { - // Compatible with mysql. If the precision p is not specified, produces a result of type FLOAT. - // If p is provided and 0 <= < p <= 24, the result is of type FLOAT. If 25 <= p <= 53, - // the result is of type DOUBLE. If p < 0 or p > 53, an error is returned - // however, ob use -1 as default precision, so it is a valid value - ObPrecision float_precision = parse_node.int16_values_[OB_NODE_CAST_N_PREC_IDX]; - if (float_precision < -1 || float_precision > OB_MAX_DOUBLE_FLOAT_PRECISION) { - ret = OB_ERR_TOO_BIG_PRECISION; - LOG_USER_ERROR(OB_ERR_TOO_BIG_PRECISION, float_precision, "CAST", OB_MAX_DOUBLE_FLOAT_PRECISION); - } else if (float_precision <= OB_MAX_FLOAT_PRECISION) { - dst_type.set_type(ObFloatType); - } else { - dst_type.set_type(ObDoubleType); - } - dst_type.set_precision(-1); - dst_type.set_scale(parse_node.int16_values_[OB_NODE_CAST_N_SCALE_IDX]); - } else if (lib::is_mysql_mode() && ObJsonType == dst_type.get_type()) { - dst_type.set_collation_type(CS_TYPE_UTF8MB4_BIN); - } else { - dst_type.set_precision(parse_node.int16_values_[OB_NODE_CAST_N_PREC_IDX]); - dst_type.set_scale(parse_node.int16_values_[OB_NODE_CAST_N_SCALE_IDX]); - if (ObNumberType == dst_type.get_type() - && is_decimal_int_accuracy_valid(dst_type.get_precision(), dst_type.get_scale())) { - bool enable_decimalint = false; - if (OB_ISNULL(type_ctx.get_session())) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("type_ctx.get_session() is null", K(ret)); - } else if (OB_FAIL(ObSQLUtils::check_enable_decimalint(type_ctx.get_session(), - enable_decimalint))) { - LOG_WARN("fail to check_enable_decimalint_type", - K(ret), K(type_ctx.get_session()->get_effective_tenant_id())); - } else if (enable_decimalint) { - dst_type.set_type(ObDecimalIntType); - } - } - } - LOG_DEBUG("get_cast_type", K(dst_type), K(param_type2)); + if (OB_SUCC(ret) && in->json_type() == ObJsonNodeType::J_NULL) { + is_null_result = true; } return ret; } -int ObExprJsonValue::set_dest_type(ObExprResType &type1, - ObExprResType &type, - ObExprResType &dst_type, - ObExprTypeCtx &type_ctx) const -{ - INIT_SUCC(ret); - const sql::ObSQLSessionInfo *session = type_ctx.get_session(); - - if (OB_ISNULL(session)) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("ptr is NULL", K(ret), KP(session)); - } else { - // always cast to user requested type - if (!lib::is_oracle_mode() && - ObCharType == dst_type.get_type()) { - // cast(x as binary(10)), in parser,binary->T_CHAR+bianry, but, result type should be varchar, so set it. - type.set_type(ObVarcharType); - } else { - type.set_type(dst_type.get_type()); - type.set_collation_type(dst_type.get_collation_type()); - } - int16_t scale = dst_type.get_scale(); - if (!lib::is_oracle_mode() - && (ObTimeType == dst_type.get_type() || ObDateTimeType == dst_type.get_type()) - && scale > MAX_SCALE_FOR_TEMPORAL) { - ret = OB_ERR_TOO_BIG_PRECISION; - LOG_USER_ERROR(OB_ERR_TOO_BIG_PRECISION, scale, "CAST", OB_MAX_DATETIME_PRECISION); - } - if (OB_SUCC(ret)) { - ObCompatibilityMode compatibility_mode = get_compatibility_mode(); - ObCollationType collation_connection = type_ctx.get_coll_type(); - ObCollationType collation_nation = session->get_nls_collation_nation(); - int32_t length = 0; - if (ob_is_string_type(dst_type.get_type()) || ob_is_json(dst_type.get_type())) { - type.set_collation_level(CS_LEVEL_IMPLICIT); - int32_t len = dst_type.get_length(); - int16_t length_semantics = ((dst_type.is_string_type()) - ? dst_type.get_length_semantics() - : (OB_NOT_NULL(type_ctx.get_session()) - ? type_ctx.get_session()->get_actual_nls_length_semantics() - : LS_BYTE)); - if (len > 0) { // cast(1 as char(10)) - type.set_full_length(len, length_semantics); - } else if (OB_FAIL(get_cast_string_len(type1, dst_type, type_ctx, len, length_semantics, - collation_connection))) { // cast (1 as char) - LOG_WARN("fail to get cast string length", K(ret)); - } else { - type.set_full_length(len, length_semantics); - } - if (CS_TYPE_INVALID != dst_type.get_collation_type()) { - // cast as binary - type.set_collation_type(dst_type.get_collation_type()); - } else { - // use collation of current session - type.set_collation_type(ob_is_nstring_type(dst_type.get_type()) ? - collation_nation : collation_connection); - } - } else { - type.set_length(length); - if (ObNumberTC == dst_type.get_type_class() && 0 == dst_type.get_precision()) { - // MySql:cast (1 as decimal(0)) = cast(1 as decimal) - // Oracle: cast(1.4 as number) = cast(1.4 as number(-1, -1)) - type.set_precision(ObAccuracy::DDL_DEFAULT_ACCURACY2[compatibility_mode][ObNumberType].get_precision()); - } else if (ObDecimalIntTC == dst_type.get_type_class() && 0 == dst_type.get_precision()) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("dst type is decimal int, but precision is zero", K(ret), K(dst_type)); - } else if (ObIntTC == dst_type.get_type_class() || ObUIntTC == dst_type.get_type_class()) { - // for int or uint , the precision = len - int32_t len = 0; - int16_t length_semantics = LS_BYTE;//unused - if (OB_FAIL(get_cast_inttc_len(type1, dst_type, type_ctx, len, length_semantics, collation_connection))) { - LOG_WARN("fail to get cast inttc length", K(ret)); - } else { - len = len > OB_LITERAL_MAX_INT_LEN ? OB_LITERAL_MAX_INT_LEN : len; - type.set_precision(static_cast(len)); - } - } else if (ORACLE_MODE == compatibility_mode && ObDoubleType == dst_type.get_type()) { - ObAccuracy acc = ObAccuracy::DDL_DEFAULT_ACCURACY2[compatibility_mode][dst_type.get_type()]; - type.set_accuracy(acc); - } else { - type.set_precision(dst_type.get_precision()); - } - type.set_scale(dst_type.get_scale()); - } - } - } - - return ret; -} - -int ObExprJsonValue::get_cast_string_len(ObExprResType &type1, - ObExprResType &type2, - ObExprTypeCtx &type_ctx, - int32_t &res_len, - int16_t &length_semantics, - ObCollationType conn) const -{ - INIT_SUCC(ret); - const ObObj &val = type1.get_param(); - - if (!type1.is_literal()) { // column - res_len = CAST_STRING_DEFUALT_LENGTH[type1.get_type()]; - int16_t prec = type1.get_accuracy().get_precision(); - int16_t scale = type1.get_accuracy().get_scale(); - switch(type1.get_type()) { - case ObTinyIntType: - case ObSmallIntType: - case ObMediumIntType: - case ObInt32Type: - case ObIntType: - case ObUTinyIntType: - case ObUSmallIntType: - case ObUMediumIntType: - case ObUInt32Type: - case ObUInt64Type: { - int32_t prec = static_cast(type1.get_accuracy().get_precision()); - res_len = prec > res_len ? prec : res_len; - break; - } - case ObNumberType: - case ObUNumberType: - case ObDecimalIntType: { - if (lib::is_oracle_mode()) { - if (0 < prec) { - if (0 < scale) { - res_len = prec + 2; - } else if (0 == scale) { - res_len = prec + 1; - } else { - res_len = prec - scale; - } - } - } else { - if (0 < prec) { - if (0 < scale) { - res_len = prec + 2; - } else { - res_len = prec + 1; - } - } - } - break; - } - case ObTimestampTZType: - case ObTimestampLTZType: - case ObTimestampNanoType: - case ObDateTimeType: - case ObTimestampType: { - if (scale > 0) { - res_len += scale + 1; - } - break; - } - case ObTimeType: { - if (scale > 0) { - res_len += scale + 1; - } - break; - } - case ObTinyTextType: - case ObTextType: - case ObMediumTextType: - case ObLongTextType: - case ObVarcharType: - case ObCharType: - case ObHexStringType: - case ObRawType: - case ObNVarchar2Type: - case ObNCharType: { - res_len = type1.get_length(); - length_semantics = type1.get_length_semantics(); - break; - } - default: { - break; - } - } - } else if (type1.is_null()) { - res_len = 0;//compatible with mysql; - } else if (OB_ISNULL(type_ctx.get_session())) { - // calc type don't set ret, just print the log. by design. - LOG_WARN("my_session is null"); - } else { // literal - ObArenaAllocator oballocator(ObModIds::BLOCK_ALLOC); - ObCastMode cast_mode = CM_NONE; - ObCollationType cast_coll_type = (CS_TYPE_INVALID != type2.get_collation_type()) - ? type2.get_collation_type() - : conn; - const ObDataTypeCastParams dtc_params = - ObBasicSessionInfo::create_dtc_params(type_ctx.get_session()); - ObCastCtx cast_ctx(&oballocator, - &dtc_params, - 0, - cast_mode, - cast_coll_type); - ObString val_str; - EXPR_GET_VARCHAR_V2(val, val_str); - if (OB_SUCC(ret) && NULL != val_str.ptr()) { - int32_t len_byte = val_str.length(); - res_len = len_byte; - length_semantics = LS_CHAR; - if (NULL != val_str.ptr()) { - int32_t trunc_len_byte = static_cast(ObCharset::strlen_byte_no_sp(cast_coll_type, - val_str.ptr(), len_byte)); - res_len = static_cast(ObCharset::strlen_char(cast_coll_type, - val_str.ptr(), trunc_len_byte)); - } - if (type1.is_numeric_type() && !type1.is_integer_type()) { - res_len += 1; - } - } - } - - return ret; -} - -int ObExprJsonValue::get_cast_inttc_len(ObExprResType &type1, - ObExprResType &type2, - ObExprTypeCtx &type_ctx, - int32_t &res_len, - int16_t &length_semantics, - ObCollationType conn) const -{ - INIT_SUCC(ret); - - if (type1.is_literal()) { // literal - if (ObStringTC == type1.get_type_class()) { - res_len = type1.get_accuracy().get_length(); - length_semantics = type1.get_length_semantics(); - } else if (OB_FAIL(ObField::get_field_mb_length(type1.get_type(), - type1.get_accuracy(), type1.get_collation_type(), res_len))) { - LOG_WARN("failed to get filed mb length"); - } - } else { - res_len = CAST_STRING_DEFUALT_LENGTH[type1.get_type()]; - ObObjTypeClass tc1 = type1.get_type_class(); - int16_t scale = type1.get_accuracy().get_scale(); - if (ObDoubleTC == tc1) { - res_len -= 1; - } else if (ObDateTimeTC == tc1 && scale > 0) { - res_len += scale - 1; - } else if (OB_FAIL(get_cast_string_len(type1, type2, type_ctx, res_len, length_semantics, conn))) { - LOG_WARN("fail to get cast string length", K(ret)); - } else { - // do nothing - } - } - - return ret; -} - -bool ObExprJsonValue::type_cast_to_string(ObString &json_string, - common::ObIAllocator *allocator, - ObIJsonBase *j_base, - ObAccuracy &accuracy) { - INIT_SUCC(ret); - uint8_t is_type_cast = 0; - ret = cast_to_string(allocator, j_base, CS_TYPE_BINARY, CS_TYPE_BINARY, accuracy, ObLongTextType, json_string, is_type_cast, 0); - return ret == 0 ? true : false; -} - DEF_SET_LOCAL_SESSION_VARS(ObExprJsonValue, raw_expr) { int ret = OB_SUCCESS; SET_LOCAL_SYSVAR_CAPACITY(1); EXPR_ADD_LOCAL_SYSVAR(share::SYS_VAR_COLLATION_CONNECTION); return ret; } - -#undef CAST_FAIL - } } diff --git a/src/sql/engine/expr/ob_expr_json_value.h b/src/sql/engine/expr/ob_expr_json_value.h index 3368e92922..898b7081fb 100644 --- a/src/sql/engine/expr/ob_expr_json_value.h +++ b/src/sql/engine/expr/ob_expr_json_value.h @@ -18,6 +18,8 @@ #include "lib/json_type/ob_json_tree.h" #include "lib/json_type/ob_json_base.h" #include "ob_json_param_type.h" +#include "ob_expr_json_utils.h" +#include "ob_expr_json_func_helper.h" using namespace oceanbase::common; @@ -26,6 +28,10 @@ namespace oceanbase namespace sql { +/* process ascii */ +const static uint8_t OB_JSON_ON_ASCII_IMPLICIT = 0; +const static uint8_t OB_JSON_ON_ASCII_USE = 1; + class ObExprJsonValue : public ObFuncExprOperator { @@ -39,119 +45,41 @@ public: const override; static int eval_json_value(const ObExpr &expr, ObEvalCtx &ctx, ObDatum &res); static int eval_ora_json_value(const ObExpr &expr, ObEvalCtx &ctx, ObDatum &res); - static int cast_to_res(common::ObIAllocator *allocator, - const ObExpr &expr, - ObEvalCtx &ctx, - ObIJsonBase *j_base, - uint8_t error_type, - ObDatum *error_val, - common::ObAccuracy &accuracy, - ObObjType dst_type, - common::ObCollationType in_coll_type, - common::ObCollationType dst_coll_type, - ObDatum &res, - ObVector &mismatch_val, - ObVector &mismatch_type, - uint8_t &is_type_cast, - uint8_t ascii_type, - uint8_t is_truncate); - virtual int cg_expr(ObExprCGCtx &expr_cg_ctx, const ObRawExpr &raw_expr, - ObExpr &rt_expr) const override; - virtual common::ObCastMode get_cast_mode() const { return CM_ERROR_ON_SCALE_OVER;} - DECLARE_SET_LOCAL_SESSION_VARS; -private: + static int get_empty_or_error_type(const ObExpr &expr, + ObEvalCtx &ctx, + uint8_t index, + bool &is_cover_by_error, + const ObAccuracy &accuracy, + uint8_t &type, + ObObjType dst_type); + static int deal_item_method_in_seek(ObIJsonBase*& in, + bool &is_null_result, + ObJsonPath *j_path, + ObIAllocator *allocator, + uint8_t &is_type_mismatch); /* code for cast accuracy check */ template static int check_default_val_accuracy(const ObAccuracy &accuracy, const ObObjType &type, const Obj *obj); - static int get_accuracy_internal( - ObEvalCtx& ctx, - common::ObAccuracy &accuracy, - ObObjType &dest_type, - const int64_t value, - const ObLengthSemantics &length_semantics); - static int get_accuracy(const ObExpr &expr, - ObEvalCtx& ctx, - common::ObAccuracy &accuracy, - ObObjType &dest_type, - bool &is_cover_by_error); - static int number_range_check(const common::ObAccuracy &accuracy, - ObIAllocator *allocator, - number::ObNumber &val, - bool strict = false); - static int datetime_scale_check(const common::ObAccuracy &accuracy, - int64_t &value, - bool strict = false); - static int time_scale_check(const common::ObAccuracy &accuracy, int64_t &value, - bool strict = false); - /* cast wrapper to dst type with accuracy check*/ - static int get_cast_ret(int ret); - static int cast_to_int(ObIJsonBase *j_base, ObObjType dst_type, int64_t &val); - static int cast_to_uint(ObIJsonBase *j_base, ObObjType dst_type, uint64_t &val); - static int cast_to_datetime(ObIJsonBase *j_base, - common::ObIAllocator *allocator, - const ObBasicSessionInfo *session, - ObEvalCtx &ctx, - const ObExpr *expr, - common::ObAccuracy &accuracy, - int64_t &val, - uint8_t &is_type_cast); - static bool type_cast_to_string(ObString &json_string, - common::ObIAllocator *allocator, - ObIJsonBase *j_base, - ObAccuracy &accuracy); - static int cast_to_otimstamp(ObIJsonBase *j_base, - const ObBasicSessionInfo *session, - ObEvalCtx &ctx, - const ObExpr *expr, - common::ObAccuracy &accuracy, - ObObjType dst_type, - ObOTimestampData &out_val, - uint8_t &is_type_cast); - static int cast_to_date(ObIJsonBase *j_base, int32_t &val, uint8_t &is_type_cast); - static int cast_to_time(ObIJsonBase *j_base, - common::ObAccuracy &accuracy, - int64_t &val); - static int cast_to_year(ObIJsonBase *j_base, uint8_t &val); - static int cast_to_float(ObIJsonBase *j_base, ObObjType dst_type, float &val); - static int cast_to_double(ObIJsonBase *j_base, ObObjType dst_type, double &val); - static int cast_to_number(common::ObIAllocator *allocator, - ObIJsonBase *j_base, - common::ObAccuracy &accuracy, - ObObjType dst_type, - number::ObNumber &val, - uint8_t &is_type_cast); - static int cast_to_string(common::ObIAllocator *allocator, - ObIJsonBase *j_base, - ObCollationType in_cs_type, - ObCollationType dst_cs_type, - common::ObAccuracy &accuracy, - ObObjType dst_type, - ObString &val, - uint8_t &is_type_cast, - uint8_t is_truncate); - static int cast_to_bit(ObIJsonBase *j_base, uint64_t &val); - static int cast_to_json(common::ObIAllocator *allocator, ObIJsonBase *j_base, - ObString &val, uint8_t &is_type_cast); - template + virtual int cg_expr(ObExprCGCtx &expr_cg_ctx, const ObRawExpr &raw_expr, + ObExpr &rt_expr) const override; + virtual bool need_rt_ctx() const override { return true; } + virtual common::ObCastMode get_cast_mode() const { return CM_ERROR_ON_SCALE_OVER;} + static int calc_empty_error_type(ObExprResType* types_stack, uint8_t pos, ObExprResType &dst_type, ObExprTypeCtx& type_ctx); + static int calc_input_type(ObExprResType& types_stack, bool &is_json_input); + DECLARE_SET_LOCAL_SESSION_VARS; +private: static bool try_set_error_val(const ObExpr &expr, ObEvalCtx &ctx, - Obj &res, int &ret, uint8_t &error_type, - Obj *&error_val, ObVector &mismatch_val, - ObVector &mismatch_type, - uint8_t &is_type_cast, - const ObAccuracy &accuracy, ObObjType dst_type); - static int error_convert(int ret_old); - static int doc_do_seek(ObJsonBaseVector &hits, bool &is_null_result, - ObDatum *json_datum, ObJsonPath *j_path, - ObIJsonBase *j_base, const ObExpr &expr, - ObEvalCtx &ctx, bool &is_cover_by_error, - const ObAccuracy &accuracy, ObObjType dst_type, - ObDatum *&return_val, ObDatum *error_datum, - uint8_t error_type, ObDatum *empty_datum, - uint8_t &empty_type, ObObjType &default_val_type, uint8_t &is_type_cast); + ObDatum &res, int &ret, + ObJsonExprParam* json_param, + uint8_t &is_type_mismatch); + static int doc_do_seek(ObJsonSeekResult &hits, bool &is_null_result, ObJsonExprParam* json_param, + ObIJsonBase *j_base, const ObExpr &expr, ObEvalCtx &ctx, bool &is_cover_by_error, + ObDatum *&return_val, + uint8_t &is_type_mismatch); // new sql engine static inline void set_val(ObDatum &res, ObDatum *val) { res.set_datum(*val); } @@ -160,44 +88,23 @@ private: static inline void set_val(ObObj &res, ObObj *val) { res = *val; } - /* process ascii */ - const static uint8_t OB_JSON_ON_ASCII_IMPLICIT = 0; - const static uint8_t OB_JSON_ON_ASCII_USE = 1; - - /* process mismatch type { MISSING : 4 (1), EXTRA : 5 (2), TYPE : 6 (4), EMPTY : 7 (0)} make diff with mismatch type */ - const static uint8_t OB_JSON_TYPE_MISSING_DATA = 4; - const static uint8_t OB_JSON_TYPE_EXTRA_DATA = 5; - const static uint8_t OB_JSON_TYPE_TYPE_ERROR = 6; - const static uint8_t OB_JSON_TYPE_IMPLICIT = 7; - const static uint8_t OB_JSON_TYPE_DOT = 8; - - static int get_on_empty_or_error(const ObExpr &expr, - ObEvalCtx &ctx, - uint8_t index, - bool &is_cover_by_error, - const ObAccuracy &accuracy, - uint8 &type, - ObDatum **default_value, - ObObjType dst_type, - ObObjType &default_val_type); - static int get_on_ascii(const ObExpr &expr, - ObEvalCtx &ctx, - uint8_t index, - bool &is_cover_by_error, - uint8 &type); + static int get_default_value(ObExpr *expr, + ObEvalCtx &ctx, + const ObAccuracy &accuracy, + ObDatum **default_value); + static int get_default_empty_error_value(const ObExpr &expr, + ObJsonExprParam* json_param, + ObEvalCtx &ctx); static int get_on_mismatch(const ObExpr &expr, ObEvalCtx &ctx, uint8_t index, bool &is_cover_by_error, const ObAccuracy &accuracy, - ObVector &val, - ObVector &type); + ObIArray &val, + ObIArray &type); /* code from ob_expr_cast for cal_result_type */ - const static int32_t OB_LITERAL_MAX_INT_LEN = 21; - int get_cast_type(const ObExprResType param_type2, - ObExprResType &dst_type, - ObExprTypeCtx &type_ctx) const; + int get_cast_type(const ObExprResType param_type2, ObExprResType &dst_type) const; int set_dest_type(ObExprResType &type1, ObExprResType &type, ObExprResType &dst_type, ObExprTypeCtx &type_ctx) const; int get_cast_string_len(ObExprResType &type1, ObExprResType &type2, @@ -211,6 +118,38 @@ private: int32_t &res_len, int16_t &length_semantics, common::ObCollationType conn) const; + static int check_default_value(ObExprResType* types_stack, int8_t pos, ObExprResType &dst_type); + static int get_clause_param_value(const ObExpr &expr, ObEvalCtx &ctx, + ObJsonExprParam* json_param, + bool &is_cover_by_error); +public: + static int extract_plan_cache_param(const ObExprJsonQueryParamInfo *info, ObJsonExprParam& json_param); + static int check_param_valid(const ObExpr &expr, ObJsonExprParam* json_param, + ObJsonPath *j_path, bool &is_cover_by_error); + static int init_ctx_var(const ObExpr &expr, ObJsonParamCacheCtx* param_ctx); + static void get_mismatch_option(ObIArray &mismatch_val, + ObIArray &mismatch_type, + bool &is_null_res, + bool &set_default_val); + + static void get_error_option(int8_t error_type, + bool &is_null, bool &has_default_val); + static int get_empty_option(ObDatum *&empty_res, + bool &is_cover_by_error, + int8_t empty_type, + ObDatum *empty_datum, + bool &is_null_result); + static int set_result(const ObExpr &expr, + ObJsonExprParam* json_param, + ObEvalCtx &ctx, + bool &is_null_result, + bool &is_cover_by_error, + uint8_t &is_type_mismatch, + ObDatum &res, + ObDatum *return_val, + ObIAllocator *allocator, + ObJsonSeekResult &hits); + // disallow copy DISALLOW_COPY_AND_ASSIGN(ObExprJsonValue); }; diff --git a/src/sql/engine/expr/ob_expr_lob_utils.h b/src/sql/engine/expr/ob_expr_lob_utils.h index a85113fd68..bc8bea3251 100644 --- a/src/sql/engine/expr/ob_expr_lob_utils.h +++ b/src/sql/engine/expr/ob_expr_lob_utils.h @@ -207,7 +207,7 @@ public: str.assign_ptr(lob.get_inrow_data_ptr(), static_cast(lob.get_byte_size(datum.len_))); } else { const ObMemLobCommon *memlob = reinterpret_cast(datum.ptr_); - if (datum.len_ != 0 && memlob->has_inrow_data_ && memlob->has_extern_ == 0) { + if (datum.len_ != 0 && memlob->has_inrow_data_ && memlob->has_extern_ == 0 && (memlob->type_ != ObMemLobType::TEMP_DELTA_LOB)) { if (memlob->is_simple_) { str.assign_ptr(memlob->data_, static_cast(datum.len_ - sizeof(ObMemLobCommon))); } else { diff --git a/src/sql/engine/expr/ob_expr_operator_factory.cpp b/src/sql/engine/expr/ob_expr_operator_factory.cpp index a7eb87220e..e9a92b3447 100644 --- a/src/sql/engine/expr/ob_expr_operator_factory.cpp +++ b/src/sql/engine/expr/ob_expr_operator_factory.cpp @@ -313,6 +313,8 @@ #include "sql/engine/expr/ob_expr_nlssort.h" #include "sql/engine/expr/ob_expr_json_object.h" #include "sql/engine/expr/ob_expr_json_extract.h" +#include "sql/engine/expr/ob_expr_json_schema_valid.h" +#include "sql/engine/expr/ob_expr_json_schema_validation_report.h" #include "sql/engine/expr/ob_expr_json_contains.h" #include "sql/engine/expr/ob_expr_json_contains_path.h" #include "sql/engine/expr/ob_expr_json_depth.h" @@ -325,6 +327,7 @@ #include "sql/engine/expr/ob_expr_json_valid.h" #include "sql/engine/expr/ob_expr_json_remove.h" #include "sql/engine/expr/ob_expr_json_array_append.h" +#include "sql/engine/expr/ob_expr_json_append.h" #include "sql/engine/expr/ob_expr_json_array_insert.h" #include "sql/engine/expr/ob_expr_json_value.h" #include "sql/engine/expr/ob_expr_json_replace.h" @@ -401,6 +404,9 @@ #include "sql/engine/expr/ob_expr_xml_serialize.h" #include "sql/engine/expr/ob_expr_xmlcast.h" #include "sql/engine/expr/ob_expr_update_xml.h" +#include "sql/engine/expr/ob_expr_insert_child_xml.h" +#include "sql/engine/expr/ob_expr_xml_delete_xml.h" +#include "sql/engine/expr/ob_expr_xml_sequence.h" #include "sql/engine/expr/ob_expr_sql_udt_construct.h" #include "sql/engine/expr/ob_expr_priv_attribute_access.h" #include "sql/engine/expr/ob_expr_temp_table_ssid.h" @@ -913,6 +919,8 @@ void ObExprOperatorFactory::register_expr_operators() REG_OP(ObExprHash); REG_OP(ObExprJsonObject); REG_OP(ObExprJsonExtract); + REG_OP(ObExprJsonSchemaValid); + REG_OP(ObExprJsonSchemaValidationReport); REG_OP(ObExprJsonContains); REG_OP(ObExprJsonContainsPath); REG_OP(ObExprJsonDepth); @@ -925,6 +933,7 @@ void ObExprOperatorFactory::register_expr_operators() REG_OP(ObExprJsonSearch); REG_OP(ObExprJsonValid); REG_OP(ObExprJsonArrayAppend); + REG_OP(ObExprJsonAppend); REG_OP(ObExprJsonArrayInsert); REG_OP(ObExprJsonValue); REG_OP(ObExprJsonReplace); @@ -939,6 +948,8 @@ void ObExprOperatorFactory::register_expr_operators() REG_OP(ObExprJsonMergePatch); REG_OP(ObExprJsonPretty); REG_OP(ObExprJsonMemberOf); + REG_OP(ObExprExtractValue); + REG_OP(ObExprUpdateXml); REG_OP(ObExprSha); REG_SAME_OP(T_FUN_SYS_SHA ,T_FUN_SYS_SHA, N_SHA1, i); REG_OP(ObExprSha2); @@ -1353,6 +1364,9 @@ void ObExprOperatorFactory::register_expr_operators() REG_OP_ORCL(ObExprXmlSerialize); REG_OP_ORCL(ObExprXmlcast); REG_OP_ORCL(ObExprUpdateXml); + REG_OP_ORCL(ObExprInsertChildXml); + REG_OP_ORCL(ObExprDeleteXml); + REG_OP_ORCL(ObExprXmlSequence); REG_OP_ORCL(ObExprUdtConstruct); REG_OP_ORCL(ObExprUDTAttributeAccess); REG_OP_ORCL(ObExprTempTableSSID); diff --git a/src/sql/engine/expr/ob_expr_priv_xml_binary.cpp b/src/sql/engine/expr/ob_expr_priv_xml_binary.cpp index 370048a483..5dbd219d92 100644 --- a/src/sql/engine/expr/ob_expr_priv_xml_binary.cpp +++ b/src/sql/engine/expr/ob_expr_priv_xml_binary.cpp @@ -14,11 +14,9 @@ #define USING_LOG_PREFIX SQL_ENG #include "ob_expr_priv_xml_binary.h" #include "ob_expr_lob_utils.h" -#ifdef OB_BUILD_ORACLE_XML #include "sql/engine/expr/ob_expr_xml_func_helper.h" #include "lib/xml/ob_xml_util.h" #include "sql/engine/ob_exec_context.h" -#endif using namespace oceanbase::common; using namespace oceanbase::sql; @@ -70,7 +68,6 @@ int ObExprPrivXmlBinary::calc_result_typeN(ObExprResType& type, return ret; } -#ifdef OB_BUILD_ORACLE_XML int ObExprPrivXmlBinary::eval_priv_xml_binary(const ObExpr &expr, ObEvalCtx &ctx, ObDatum &res) { int ret = OB_SUCCESS; @@ -134,12 +131,11 @@ int ObExprPrivXmlBinary::eval_priv_xml_binary(const ObExpr &expr, ObEvalCtx &ctx ret = OB_ERR_UNEXPECTED; LOG_WARN("get null xml document.", K(ret)); } else { - xml_root = static_cast(xml_doc); ObString res_string; if (!xml_doc->get_encoding().empty() || xml_doc->get_encoding_flag()) { xml_doc->set_encoding(ObXmlUtil::get_charset_name(ObCollationType::CS_TYPE_UTF8MB4_BIN)); } - if (OB_FAIL(xml_root->get_raw_binary(res_string, &tmp_allocator))) { + if (OB_FAIL(xml_doc->get_raw_binary(res_string, &tmp_allocator))) { LOG_WARN("failed to get xml binary", K(ret), K(xml_plain_text)); } else { ObTextStringDatumResult str_result(expr.datum_meta_.type_, &expr, &ctx, &res); @@ -155,14 +151,28 @@ int ObExprPrivXmlBinary::eval_priv_xml_binary(const ObExpr &expr, ObEvalCtx &ctx } else { // must be xmlsql type // Todo: xml schema validation ObMulModeNodeType type = M_NULL; - if (OB_FAIL(ObXmlUtil::xml_bin_type(xml_plain_text, type))) { - } else if (type == M_UNPARESED_DOC) { + ObXmlBin extend(mem_ctx); + int64_t size = 0; + if (OB_FAIL(ObXmlUtil::xml_bin_header_info(xml_plain_text, type, size))) { + } else if (type == M_UNPARESED_DOC || (size < xml_plain_text.length() && type == M_DOCUMENT)) { ObString res_string; if (OB_FAIL(common::ObMulModeFactory::get_xml_base(mem_ctx, xml_plain_text, ObNodeMemType::BINARY_TYPE, ObNodeMemType::BINARY_TYPE, xml_root))) { LOG_WARN("get xml base failed", K(ret)); + } else if (OB_ISNULL(xml_root)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("xml_root is null", K(ret)); + } else if (xml_root->check_extend()) { + ObXmlBin* bin = static_cast(xml_root); + if (OB_FAIL(bin->merge_extend(extend))) { + LOG_WARN("fail to merge extend", K(ret)); + } else { + xml_root = &extend; + } + } + if (OB_FAIL(ret)) { } else if (OB_FAIL(xml_root->get_raw_binary(res_string, mem_ctx->allocator_))) { LOG_WARN("get raw binary failed", K(ret)); } else { @@ -186,7 +196,6 @@ int ObExprPrivXmlBinary::eval_priv_xml_binary(const ObExpr &expr, ObEvalCtx &ctx return ret; } -#endif int ObExprPrivXmlBinary::cg_expr(ObExprCGCtx &expr_cg_ctx, const ObRawExpr &raw_expr, ObExpr &rt_expr) const { diff --git a/src/sql/engine/expr/ob_expr_priv_xml_binary.h b/src/sql/engine/expr/ob_expr_priv_xml_binary.h index c8677776a2..80bfb5c9e0 100644 --- a/src/sql/engine/expr/ob_expr_priv_xml_binary.h +++ b/src/sql/engine/expr/ob_expr_priv_xml_binary.h @@ -30,11 +30,7 @@ public: int64_t param_num, common::ObExprTypeCtx& type_ctx) const override; -#ifdef OB_BUILD_ORACLE_XML static int eval_priv_xml_binary(const ObExpr &expr, ObEvalCtx &ctx, ObDatum &res); -#else - static int eval_priv_xml_binary(const ObExpr &expr, ObEvalCtx &ctx, ObDatum &res) { return OB_NOT_SUPPORTED; } -#endif virtual int cg_expr(ObExprCGCtx &expr_cg_ctx, const ObRawExpr &raw_expr, ObExpr &rt_expr) const override; diff --git a/src/sql/engine/expr/ob_expr_sys_makexml.cpp b/src/sql/engine/expr/ob_expr_sys_makexml.cpp index 1d650000a8..9ae323e234 100644 --- a/src/sql/engine/expr/ob_expr_sys_makexml.cpp +++ b/src/sql/engine/expr/ob_expr_sys_makexml.cpp @@ -13,9 +13,7 @@ #define USING_LOG_PREFIX SQL_ENG #include "ob_expr_sys_makexml.h" -#ifdef OB_BUILD_ORACLE_XML #include "sql/engine/expr/ob_expr_xml_func_helper.h" -#endif using namespace oceanbase::common; using namespace oceanbase::sql; @@ -79,7 +77,6 @@ int ObExprSysMakeXML::calc_result_typeN(ObExprResType& type, return ret; } -#ifdef OB_BUILD_ORACLE_XML int ObExprSysMakeXML::eval_sys_makexml(const ObExpr &expr, ObEvalCtx &ctx, ObDatum &res) { int ret = OB_SUCCESS; @@ -166,7 +163,6 @@ int ObExprSysMakeXML::eval_sys_makexml(const ObExpr &expr, ObEvalCtx &ctx, ObDat } return ret; } -#endif int ObExprSysMakeXML::cg_expr(ObExprCGCtx &expr_cg_ctx, const ObRawExpr &raw_expr, ObExpr &rt_expr) const diff --git a/src/sql/engine/expr/ob_expr_sys_makexml.h b/src/sql/engine/expr/ob_expr_sys_makexml.h index fd814cae70..d4b1814c7d 100644 --- a/src/sql/engine/expr/ob_expr_sys_makexml.h +++ b/src/sql/engine/expr/ob_expr_sys_makexml.h @@ -33,11 +33,7 @@ public: ObExprResType* types, int64_t param_num, common::ObExprTypeCtx& type_ctx) const override; -#ifdef OB_BUILD_ORACLE_XML static int eval_sys_makexml(const ObExpr &expr, ObEvalCtx &ctx, ObDatum &res); -#else - static int eval_sys_makexml(const ObExpr &expr, ObEvalCtx &ctx, ObDatum &res) { return OB_NOT_SUPPORTED; } -#endif virtual int cg_expr(ObExprCGCtx &expr_cg_ctx, const ObRawExpr &raw_expr, ObExpr &rt_expr) const override; diff --git a/src/sql/engine/expr/ob_expr_treat.cpp b/src/sql/engine/expr/ob_expr_treat.cpp index 382cb4b932..cca1bba428 100644 --- a/src/sql/engine/expr/ob_expr_treat.cpp +++ b/src/sql/engine/expr/ob_expr_treat.cpp @@ -107,8 +107,7 @@ static int treat_as_json_udt(const ObExpr &expr, ObEvalCtx &ctx, common::ObIAllo ret = OB_ERR_UNEXPECTED; LOG_WARN("cast to json type is null", K(ret), K(opaque)); } else if(OB_ISNULL(json_doc = jsontype->get_data())) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("get json doc is null", K(ret), K(jsontype)); + res.set_null(); } else { ObJsonNode * json_node_copy = nullptr; if (OB_ISNULL(json_node_copy = json_doc->clone(&ctx.exec_ctx_.get_allocator()))) { diff --git a/src/sql/engine/expr/ob_expr_update_xml.cpp b/src/sql/engine/expr/ob_expr_update_xml.cpp index fd0bdc610f..a990134da2 100644 --- a/src/sql/engine/expr/ob_expr_update_xml.cpp +++ b/src/sql/engine/expr/ob_expr_update_xml.cpp @@ -12,11 +12,9 @@ */ #include "ob_expr_update_xml.h" -#ifdef OB_BUILD_ORACLE_XML #include "sql/engine/expr/ob_expr_xml_func_helper.h" #include "lib/xml/ob_xml_parser.h" #include "lib/xml/ob_xml_util.h" -#endif #include "sql/session/ob_sql_session_info.h" #include "sql/engine/ob_exec_context.h" #include "sql/session/ob_sql_session_info.h" @@ -44,7 +42,27 @@ int ObExprUpdateXml::calc_result_typeN(ObExprResType &type, common::ObExprTypeCtx &type_ctx) const { int ret = OB_SUCCESS; - if (param_num < 3) { + bool is_mysql_mode = lib::is_mysql_mode(); + if (is_mysql_mode) { + if (param_num != 3) { + ret = OB_ERR_PARAM_SIZE; + LOG_WARN("invalid param number", K(ret), K(param_num)); + } else { + for (int8_t i = 0; OB_SUCC(ret) && i < param_num; i++) { + ObObjType param_type = types[i].get_type(); + if (param_type == ObNullType) { + } else if (ob_is_string_type(param_type)) { + if (types[i].get_charset_type() != CHARSET_UTF8MB4) { + types[i].set_calc_collation_type(CS_TYPE_UTF8MB4_BIN); + } + } else if (i != 1 && !ob_is_string_type(param_type)) { + types[i].set_calc_type(ObVarcharType); + types[i].set_calc_collation_type(CS_TYPE_UTF8MB4_BIN); + } + } + } + + } else if (param_num < 3) { ret = OB_ERR_PARAM_SIZE; LOG_WARN("invalid param number", K(ret), K(param_num)); } else if (!is_called_in_sql()) { @@ -59,6 +77,12 @@ int ObExprUpdateXml::calc_result_typeN(ObExprResType &type, LOG_WARN("inconsistent datatypes", K(ret), K(ob_obj_type_str(types[0].get_type()))); } if (OB_FAIL(ret)) { + } else if (is_mysql_mode) { + type.set_type(ObLongTextType); + type.set_collation_type(CS_TYPE_UTF8MB4_BIN); + type.set_collation_level(CS_LEVEL_IMPLICIT); + type.set_length(OB_MAX_LONGTEXT_LENGTH / 4); + type.set_length_semantics(LS_BYTE); } else { bool has_ns_str = (param_num - 1) % 2 == 1; int64_t xpath_value_end = has_ns_str ? param_num - 1 : param_num; @@ -95,14 +119,96 @@ int ObExprUpdateXml::calc_result_typeN(ObExprResType &type, } } } - } - if (OB_SUCC(ret)) { - type.set_sql_udt(ObXMLSqlType); + if (OB_SUCC(ret)) { + type.set_sql_udt(ObXMLSqlType); + } } return ret; } -#ifdef OB_BUILD_ORACLE_XML +int ObExprUpdateXml::eval_mysql_update_xml(const ObExpr &expr, ObEvalCtx &ctx, ObDatum &res) +{ + INIT_SUCC(ret); + ObEvalCtx::TempAllocGuard tmp_alloc_g(ctx); + common::ObArenaAllocator &allocator = tmp_alloc_g.get_allocator(); + int64_t num_child = expr.arg_cnt_; + ObString xml_target; + ObString xpath_expr; + ObString new_xml; + ObString xml_res; + ObIMulModeBase *xml_base = nullptr; + ObXmlDocument *xml_doc = nullptr; + bool return_null = false; + ObUpdateXMLRetType res_origin = ObUpdateXMLRetType::ObRetMax; + + ObMulModeMemCtx* xml_mem_ctx = nullptr; + lib::ObMallocHookAttrGuard malloc_guard(lib::ObMemAttr(MTL_ID(), "XMLModule")); + if (OB_FAIL(ObXmlUtil::create_mulmode_tree_context(&allocator, xml_mem_ctx))) { + LOG_WARN("fail to create tree memory context", K(ret)); + } else if (num_child != 3) { + ret = OB_ERR_PARAM_SIZE; + LOG_WARN("invalid param number", K(ret), K(num_child)); + } else if (!expr.args_[1]->is_const_expr()) { + ret = OB_XPATH_EXPRESSION_UNSUPPORTED; + LOG_WARN("args_[1] get const expr invalid", K(ret), K(expr.args_[1])); + } else if (ObNullType == expr.args_[0]->datum_meta_.type_ || + !ob_is_string_type(expr.args_[1]->datum_meta_.type_) || + ObNullType == expr.args_[2]->datum_meta_.type_) { + return_null = true; + } else if (OB_FAIL(ObXMLExprHelper::get_str_from_expr(expr.args_[0], ctx, xml_target, allocator))) { + LOG_WARN("failed to get xml_target str from expr", K(ret)); + } else if (xml_target.empty()) { + // do nothing + } else if (OB_FAIL(ObXMLExprHelper::get_str_from_expr(expr.args_[1], ctx, xpath_expr, allocator))) { + LOG_WARN("failed to get xpath expr.", K(ret)); + } else if (OB_FAIL(ObXMLExprHelper::get_str_from_expr(expr.args_[2], ctx, new_xml, allocator))) { + LOG_WARN("failed to get new xml.", K(ret)); + } else if (OB_FAIL(ObMulModeFactory::get_xml_base(xml_mem_ctx, xml_target, ObNodeMemType::TREE_TYPE, ObNodeMemType::TREE_TYPE, xml_base, M_DOCUMENT))) { + ret = OB_SUCCESS; + if (OB_FAIL(ObMulModeFactory::get_xml_base(xml_mem_ctx, xml_target, ObNodeMemType::TREE_TYPE, ObNodeMemType::TREE_TYPE, xml_base, M_CONTENT))) { + LOG_USER_WARN(OB_ERR_XML_PARSE); + ret = OB_SUCCESS; + return_null = true; + LOG_WARN("parse xml_frag failed.", K(xml_target)); + } + } + + if (OB_FAIL(ret) || OB_ISNULL(xml_base)) { + } else if (OB_FAIL(update_xml_tree_mysql(xml_mem_ctx, new_xml, ctx, xpath_expr, xml_base, res_origin))) { + LOG_WARN("update xml tree failed", K(ret)); + } + + if (OB_FAIL(ret)) { + } else if (return_null) { + res.set_null(); + } else if (OB_ISNULL(xml_base)) { + if (OB_FAIL(ObXMLExprHelper::set_string_result(expr, ctx, res, xml_res))) { + LOG_WARN("pack res origin failed.", K(ret), K(xml_res)); + } + } else if (res_origin == ObUpdateXMLRetType::ObRetInputStr) { + if (OB_FAIL(ObXMLExprHelper::set_string_result(expr, ctx, res, xml_target))) { + LOG_WARN("pack res origin failed.", K(ret), K(xml_target)); + } + } else if (res_origin == ObUpdateXMLRetType::ObRetNullType) { + res.set_null(); + } else { + ObStringBuffer buff(&allocator); + if (OB_ISNULL(xml_doc = static_cast(xml_base))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get xml base failed.", K(ret)); + } else if (OB_FAIL(xml_doc->print_document(buff, CS_TYPE_INVALID, ObXmlFormatType::NO_FORMAT | ObXmlFormatType::NO_ENTITY_ESCAPE))) { + LOG_WARN("failed to print document.", K(ret)); + } else { + ObString res_str = buff.string(); + if (OB_FAIL(ObXMLExprHelper::set_string_result(expr, ctx, res, res_str))) { + LOG_WARN("failed to pack long text res.", K(ret)); + } + } + } + + return ret; +} + int ObExprUpdateXml::eval_update_xml(const ObExpr &expr, ObEvalCtx &ctx, ObDatum &res) { int ret = OB_SUCCESS; @@ -146,7 +252,7 @@ int ObExprUpdateXml::eval_update_xml(const ObExpr &expr, ObEvalCtx &ctx, ObDatum if (OB_SUCC(ret)) { int64_t xpath_value_size = has_namespace_str ? num_child - 1 : num_child; - if (OB_FAIL(ObXMLExprHelper::get_xml_base(xml_mem_ctx, xml_datum, cs_type, ObNodeMemType::TREE_TYPE, xml_tree, node_type, true))) { + if (OB_FAIL(ObXMLExprHelper::get_xml_base(xml_mem_ctx, xml_datum, cs_type, ObNodeMemType::TREE_TYPE, xml_tree, node_type, ObGetXmlBaseType::OB_IS_REPARSE))) { LOG_WARN("fail to get xml base", K(ret)); } // do update xml @@ -199,6 +305,174 @@ int ObExprUpdateXml::eval_update_xml(const ObExpr &expr, ObEvalCtx &ctx, ObDatum return ret; } +int ObExprUpdateXml::update_xml_tree_mysql(ObMulModeMemCtx* xml_mem_ctx, + ObString xml_target, + ObEvalCtx &ctx, + ObString &xpath_str, + ObIMulModeBase *&xml_tree, + ObUpdateXMLRetType &res_origin) +{ + INIT_SUCC(ret); + ObPathExprIter xpath_iter((static_cast(xml_tree))->get_mem_ctx()->allocator_); + ObIMulModeBase *node = NULL; + ObString default_ns; + ObPathVarObject prefix_ns(*(static_cast(xml_tree))->get_mem_ctx()->allocator_); + ObArray res_array; + ObXmlNode *xml_node = nullptr; + ObXmlText *text_node = NULL; + bool has_get_node = false; + if (OB_FAIL(xpath_iter.init(xml_mem_ctx, xpath_str, default_ns, xml_tree, &prefix_ns))) { + LOG_WARN("fail to init xpath iterator", K(xpath_str), K(default_ns), K(ret)); + ObXMLExprHelper::replace_xpath_ret_code(ret); + } else if (OB_FAIL(xpath_iter.open())) { + LOG_WARN("fail to open xpath iterator", K(ret)); + ObXMLExprHelper::replace_xpath_ret_code(ret); + } + + while (OB_SUCC(ret) && ObUpdateXMLRetType::ObRetMax == res_origin) { + if (OB_FAIL(xpath_iter.get_next_node(node))) { + if (ret != OB_ITER_END) { + LOG_WARN("fail to get next xml node", K(ret)); + } + } else if (has_get_node) { + res_origin = ObUpdateXMLRetType::ObRetInputStr; + } else { + has_get_node = true; + } + } + + if (ret == OB_ITER_END) { + ret = OB_SUCCESS; + } + + if (OB_FAIL(ret)) { + if (ret == OB_NOT_IMPLEMENT) { + ret = OB_SUCCESS; + res_origin = ObUpdateXMLRetType::ObRetNullType; + } + } else if (res_origin != ObUpdateXMLRetType::ObRetMax) { + // do nothing + } else if (OB_ISNULL(node)) { + res_origin = ObUpdateXMLRetType::ObRetInputStr; + } else if (OB_ISNULL(xml_node = static_cast(node))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get xml node null", K(ret)); + } else if (OB_ISNULL(text_node = OB_NEWx(ObXmlText, xml_mem_ctx->allocator_, ObMulModeNodeType::M_TEXT, xml_mem_ctx))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("new xml text failed", K(ret)); + } else { + text_node->set_value(xml_target); + switch (xml_node->type()) { + case M_DOCUMENT: + case M_CONTENT: { + ObXmlDocument *document = NULL; + if (OB_ISNULL(document = OB_NEWx(ObXmlDocument, xml_mem_ctx->allocator_, ObMulModeNodeType::M_DOCUMENT, xml_mem_ctx))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("new xml text failed", K(ret)); + } else if (OB_FAIL(document->add_element(text_node))) { + LOG_WARN("add text failed.", K(ret)); + } else if (OB_ISNULL(xml_tree = static_cast(document))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("document cast to xml node failed.", K(ret)); + } + break; + } + case M_CDATA: + case M_INSTRUCT: + case M_COMMENT: + case M_TEXT: { + // update parent node + ObXmlNode *parent = NULL; + if (OB_ISNULL(parent = xml_node->get_parent())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get xml node parent failed.", K(ret)); + } else if (OB_FAIL(update_xml_child_text(parent, text_node))) { + LOG_WARN("failed to update text.", K(ret), K(xml_node->type())); + } + + break; + } + case M_ATTRIBUTE: { + // rewrite tree + ObXmlAttribute *attribute = NULL; + if (OB_ISNULL(attribute = OB_NEWx(ObXmlAttribute, xml_mem_ctx->allocator_, ObMulModeNodeType::M_ATTRIBUTE, xml_mem_ctx))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("new xml text failed", K(ret)); + } else { + attribute->set_xml_key(xml_target); + attribute->set_only_key(); + if (OB_FAIL(update_xml_child_text(xml_node, attribute))) { + LOG_WARN("update xml child failed.", K(ret), K(xml_node->type())); + } + } + + break; + } + case M_ELEMENT: { + if (OB_FAIL(update_xml_child_text(xml_node, text_node))) { + LOG_WARN("update xml child failed.", K(ret), K(xml_node->type())); + } + break; + } + case M_NAMESPACE: { + res_origin = ObUpdateXMLRetType::ObRetInputStr; + break; + } + default: { + ret = OB_NOT_SUPPORTED; + LOG_WARN("unsupported xml node type", K(ret), K(xml_node->type())); + break; + } + } + } + + int tmp_ret = OB_SUCCESS; + if (OB_SUCCESS != (tmp_ret = xpath_iter.close())) { + LOG_WARN("fail to close xpath iter", K(tmp_ret)); + ret = COVER_SUCC(tmp_ret); + } + + return ret; +} + +int ObExprUpdateXml::update_xml_child_text(ObXmlNode *old_node, ObXmlNode *text_node) +{ + INIT_SUCC(ret); + ObXmlNode *parent = NULL; + ObXmlElement *ele_node = NULL; + int64_t pos = -1; + if (OB_ISNULL(old_node)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get null node", K(ret)); + } else if (OB_ISNULL(parent = old_node->get_parent())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("parat node is null", K(ret)); + } else if (OB_ISNULL(ele_node = static_cast(parent))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("parat node is null", K(ret)); + } else { + pos = old_node->get_index(); + if (pos < 0 || pos > ele_node->count()) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("pos is invalid", K(ret), K(pos)); + } else if (old_node->type() == M_ATTRIBUTE) { + if (OB_FAIL(ele_node->remove_attribute(pos))) { + LOG_WARN("failed to remove attribute.", K(ret), K(pos)); + } else if (OB_FAIL(ele_node->add_attribute(text_node, false, pos))) { + LOG_WARN("failed to add attribute.", K(ret), K(pos)); + } + } else { + if (OB_FAIL(ele_node->remove_element(ele_node->at(pos)))) { + LOG_WARN("remove element failed.", K(ret), K(pos)); + } else if (OB_FAIL(ele_node->add_element(text_node, false, pos))) { + LOG_WARN("failed to add text.", K(ret)); + } + } + } + + return ret; +} + int ObExprUpdateXml::update_xml_tree(ObMulModeMemCtx* xml_mem_ctx, const ObExpr *expr, ObEvalCtx &ctx, @@ -619,7 +893,7 @@ int ObExprUpdateXml::update_namespace_value(ObIAllocator &allocator, ObXmlNode * int64_t pos = -1; new_ns->set_value(ns_value); new_ns->set_prefix(ObXmlConstants::XMLNS_STRING); - new_ns->set_key(key); + new_ns->set_xml_key(key); if (OB_FAIL(parent->get_attribute_pos(ObMulModeNodeType::M_NAMESPACE, key, pos))) { LOG_WARN("fail to get namespace node pos", K(ret)); } else if (OB_FAIL(parent->remove_namespace(pos))) { @@ -667,7 +941,7 @@ int ObExprUpdateXml::clear_element_child_node(ObXmlElement *ele_node) if (OB_SUCC(ret) && OB_NOT_NULL(ns)) { if (ele_node->get_prefix().empty()) { ObXmlAttribute *default_ns = NULL; - if (OB_FAIL(get_valid_default_ns_from_parent(ele_node->get_parent(), default_ns))) { + if (OB_FAIL(ObXMLExprHelper::get_valid_default_ns_from_parent(ele_node->get_parent(), default_ns))) { LOG_WARN("fail to get valid default ns", K(ret)); } else if (OB_NOT_NULL(default_ns) && default_ns->get_value().compare(ns->get_value()) == 0) { @@ -737,7 +1011,7 @@ int ObExprUpdateXml::update_namespace_xml_node(ObIAllocator &allocator, ObXmlNod LOG_WARN("fail to get attribute pos", K(ret), K(key), K(ns_node->type())); } else if (!is_default_ns && OB_FAIL(update_exist_nodes_ns(ele_node, ns_node))) { LOG_WARN("fail to update exist node ns", K(ret)); - } else if (OB_FAIL(update_new_nodes_ns(allocator, ele_node, update_node))) { + } else if (OB_FAIL(ObXMLExprHelper::update_new_nodes_ns(allocator, ele_node, update_node))) { LOG_WARN("fail to update new node ns", K(ret)); } else { // remove prefix ns: not default ns && ns of element is not this prefix && attr of element not use, remove the prefix xmlns @@ -764,7 +1038,7 @@ int ObExprUpdateXml::update_exist_nodes_ns(ObXmlElement *parent, ObXmlAttribute LOG_WARN("node is NULL", K(ret), K(parent), K(prefix_ns)); } else { for (int64_t i = 0; OB_SUCC(ret) && i < parent->size(); i++) { - if (OB_FAIL(set_ns_recrusively(parent->at(i), prefix_ns))) { + if (OB_FAIL(ObXMLExprHelper::set_ns_recrusively(parent->at(i), prefix_ns))) { LOG_WARN("fail to set exist nodes ns", K(ret)); } } @@ -772,118 +1046,6 @@ int ObExprUpdateXml::update_exist_nodes_ns(ObXmlElement *parent, ObXmlAttribute return ret; } -// update the new node default ns to empty when the parent node has default ns -int ObExprUpdateXml::update_new_nodes_ns(ObIAllocator &allocator, ObXmlNode *parent, ObXmlNode *update_node) -{ - int ret = OB_SUCCESS; - ObXmlAttribute *empty_ns = NULL; - ObXmlAttribute *default_ns = NULL; - if (OB_ISNULL(parent) || OB_ISNULL(update_node)) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("node is NULL", K(ret), K(parent), K(update_node)); - } else if (OB_FAIL(get_valid_default_ns_from_parent(parent, default_ns))) { - LOG_WARN("unexpected error in find default ns from parent", K(ret)); - } else if (OB_NOT_NULL(default_ns) && !default_ns->get_value().empty()) { - // need to update the new node default ns with empty default ns - if (OB_ISNULL(empty_ns = OB_NEWx(ObXmlAttribute, (&allocator), ObMulModeNodeType::M_NAMESPACE, parent->get_mem_ctx()))) { - ret = OB_ALLOCATE_MEMORY_FAILED; - LOG_WARN("alloc failed", K(ret)); - } else { - empty_ns->set_key(ObXmlConstants::XMLNS_STRING); - empty_ns->set_value(ObString::make_empty_string()); - } - if (OB_FAIL(ret)) { - } else if (OB_FAIL(set_ns_recrusively(update_node, empty_ns))) { - LOG_WARN("fail to set empty default ns recrusively", K(ret)); - } - } - return ret; -} - -// found valid default ns from down to top -int ObExprUpdateXml::get_valid_default_ns_from_parent(ObXmlNode *cur_node, ObXmlAttribute* &default_ns) -{ - int ret = OB_SUCCESS; - ObXmlNode* t_node = NULL; - bool is_found = false; - if (OB_ISNULL(cur_node)) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("update node is NULL", K(ret)); - } else if (!ObXMLExprHelper::is_xml_element_node(cur_node->type())) { - t_node = cur_node->get_parent(); - } else { - t_node = cur_node; - } - - while(!is_found && OB_SUCC(ret) && OB_NOT_NULL(t_node)) { - ObXmlElement *t_element = static_cast(t_node); - ObArray attr_list; - if (OB_FAIL(t_element->get_namespace_list(attr_list))) { - LOG_WARN("fail to get namespace list", K(ret)); - } - for (int i = 0; !is_found && OB_SUCC(ret) && i < attr_list.size(); i ++) { - ObXmlAttribute *attr = static_cast(attr_list.at(i)); - if (attr->get_key().compare(ObXmlConstants::XMLNS_STRING) == 0) { - is_found = true; - default_ns = attr; - } - } - t_node = t_node->get_parent(); - } - return ret; -} - -int ObExprUpdateXml::set_ns_recrusively(ObXmlNode *update_node, ObXmlAttribute *ns) -{ - int ret = OB_SUCCESS; - if (OB_ISNULL(update_node) || OB_ISNULL(ns)) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("update node is NULL", K(ret), K(update_node), K(ns)); - } else if (!ObXMLExprHelper::is_xml_element_node(update_node->type())) { - // no need to set default ns - } else { - bool is_stop = false; - ObXmlElement *ele_node = static_cast(update_node); - ObString key = ns->get_key(); - if (ele_node->type() != M_ELEMENT) { - // skip - } else if (key.compare(ObXmlConstants::XMLNS_STRING) == 0) { - // update default ns - if (ele_node->get_prefix().empty()) { - // this condition mean: has no ns || has non-empty default ns - is_stop = true; - if (OB_ISNULL(ele_node->get_ns())) { - ele_node->add_attribute(ns, false, 0); - ele_node->set_ns(ns); - } else { /* has non-empty default ns, skip and stop find */ } - } - } else { // has prefix - ObXmlAttribute *tmp_ns = NULL; - if (ele_node->get_ns() == ns || - ele_node->has_attribute_with_ns(ns) || - OB_NOT_NULL(tmp_ns = ele_node->get_ns_by_name(key))) { - // match condition below will stop recrusive - // element use this prefix ns || attributes of element use this prefix ns || this prefix in attributes - is_stop = true; - if (OB_NOT_NULL(tmp_ns)) { // if the prefix not in attributes - } else if (OB_FAIL(ele_node->add_attribute(ns, false, 0))) { - LOG_WARN("fail to add namespace node", K(ret), K(key)); - } - } - } - - if (!is_stop) { - // find its child node recrusivle when no need to set default ns - for (int64_t i = 0; OB_SUCC(ret) && i < ele_node->size(); i++) { - if (OB_FAIL(SMART_CALL(set_ns_recrusively(ele_node->at(i), ns)))) { - LOG_WARN("fail set default ns in origin tree recursively", K(ret)); - } - } // end for - } // end is_stop - } - return ret; -} - // for xml nodes other than xmlattribute(including attribute and namespace) int ObExprUpdateXml::update_xml_child_node(ObIAllocator &allocator, ObXmlNode *old_node, ObXmlNode *update_node) { @@ -903,7 +1065,7 @@ int ObExprUpdateXml::update_xml_child_node(ObIAllocator &allocator, ObXmlNode *o } if (OB_FAIL(ret)) { - } else if (OB_FAIL(update_new_nodes_ns(allocator, ele_node, update_node))) { + } else if (OB_FAIL(ObXMLExprHelper::update_new_nodes_ns(allocator, ele_node, update_node))) { LOG_WARN("fail to update new node ns", K(ret)); } else if (OB_FAIL(remove_and_insert_element_node(ele_node, update_node, pos, true))) { LOG_WARN("fail to update element node", K(ret)); @@ -944,13 +1106,15 @@ int ObExprUpdateXml::remove_and_insert_element_node(ObXmlElement *ele_node, ObXm return ret; } -#endif - int ObExprUpdateXml::cg_expr(ObExprCGCtx &expr_cg_ctx, const ObRawExpr &raw_expr, ObExpr &rt_expr) const { UNUSED(expr_cg_ctx); UNUSED(raw_expr); - rt_expr.eval_func_ = eval_update_xml; + if (lib::is_oracle_mode()) { + rt_expr.eval_func_ = eval_update_xml; + } else { + rt_expr.eval_func_ = eval_mysql_update_xml; + } return OB_SUCCESS; } diff --git a/src/sql/engine/expr/ob_expr_update_xml.h b/src/sql/engine/expr/ob_expr_update_xml.h index ce24b7c601..0ca3cadc66 100644 --- a/src/sql/engine/expr/ob_expr_update_xml.h +++ b/src/sql/engine/expr/ob_expr_update_xml.h @@ -15,11 +15,9 @@ #define OCEANBASE_SQL_ENGINE_EXPR_OB_EXPR_UPDATE_XML_H #include "sql/engine/expr/ob_expr_operator.h" -#ifdef OB_BUILD_ORACLE_XML #include "lib/xml/ob_multi_mode_interface.h" #include "lib/xml/ob_xml_tree.h" #include "lib/xml/ob_xpath.h" -#endif namespace oceanbase { @@ -27,6 +25,12 @@ namespace oceanbase namespace sql { +enum ObUpdateXMLRetType: uint32_t { + ObRetInputStr, + ObRetNullType, + ObRetMax +}; + class ObExprUpdateXml : public ObFuncExprOperator { public: @@ -36,17 +40,20 @@ public: ObExprResType *types, int64_t param_num, common::ObExprTypeCtx &type_ctx) const override; -#ifdef OB_BUILD_ORACLE_XML static int eval_update_xml(const ObExpr &expr, ObEvalCtx &ctx, ObDatum &res); -#else - static int eval_update_xml(const ObExpr &expr, ObEvalCtx &ctx, ObDatum &res) { return OB_NOT_SUPPORTED; } -#endif + static int eval_mysql_update_xml(const ObExpr &expr, ObEvalCtx &ctx, ObDatum &res); virtual int cg_expr(ObExprCGCtx &expr_cg_ctx, const ObRawExpr &raw_expr, ObExpr &rt_expr) const override; -#ifdef OB_BUILD_ORACLE_XML private: + static int update_xml_tree_mysql(ObMulModeMemCtx* xml_mem_ctx, + ObString xml_target, + ObEvalCtx &ctx, + ObString &xpath_str, + ObIMulModeBase *&xml_tree, + ObUpdateXMLRetType &res_origin); + static int update_xml_child_text(ObXmlNode *old_node, ObXmlNode *text_node); static int update_xml_tree(ObMulModeMemCtx* xml_mem_ctx, const ObExpr *expr, ObEvalCtx &ctx, @@ -80,7 +87,6 @@ private: static int remove_and_insert_element_node(ObXmlElement *ele_node, ObXmlNode *update_node, int64_t pos, bool is_remove); // for pi node static int update_pi_node(ObMulModeMemCtx* xml_mem_ctx, ObXmlNode *xml_node, const ObExpr *expr, ObEvalCtx &ctx); -#endif private: DISALLOW_COPY_AND_ASSIGN(ObExprUpdateXml); }; diff --git a/src/sql/engine/expr/ob_expr_xml_attributes.cpp b/src/sql/engine/expr/ob_expr_xml_attributes.cpp index 0b176edd38..4a7f8bc023 100644 --- a/src/sql/engine/expr/ob_expr_xml_attributes.cpp +++ b/src/sql/engine/expr/ob_expr_xml_attributes.cpp @@ -13,10 +13,9 @@ #define USING_LOG_PREFIX SQL_ENG #include "ob_expr_xml_attributes.h" +#include "share/ob_json_access_utils.h" #include "sql/engine/expr/ob_expr_json_func_helper.h" // may need json for kv pairs -#ifdef OB_BUILD_ORACLE_XML #include "sql/engine/expr/ob_expr_xml_func_helper.h" -#endif using namespace oceanbase::common; using namespace oceanbase::sql; @@ -82,7 +81,6 @@ int ObExprXmlAttributes::calc_result_typeN(ObExprResType &type, return ret; } -#ifdef OB_BUILD_ORACLE_XML int ObExprXmlAttributes::eval_xml_attributes(const ObExpr &expr, ObEvalCtx &ctx, ObDatum &res) { INIT_SUCC(ret); @@ -133,7 +131,7 @@ int ObExprXmlAttributes::eval_xml_attributes(const ObExpr &expr, ObEvalCtx &ctx, // set result(json bin) if (OB_SUCC(ret)) { ObString raw_bin; - if (OB_FAIL(j_base->get_raw_binary(raw_bin, &tmp_allocator))) { + if (OB_FAIL(ObJsonWrapper::get_raw_binary(j_base, raw_bin, &tmp_allocator))) { LOG_WARN("failed: get json raw binary", K(ret)); } else { uint64_t length = raw_bin.length(); @@ -150,7 +148,6 @@ int ObExprXmlAttributes::eval_xml_attributes(const ObExpr &expr, ObEvalCtx &ctx, } return ret; } -#endif int ObExprXmlAttributes::cg_expr(ObExprCGCtx &expr_cg_ctx, const ObRawExpr &raw_expr, ObExpr &rt_expr) const { diff --git a/src/sql/engine/expr/ob_expr_xml_attributes.h b/src/sql/engine/expr/ob_expr_xml_attributes.h index 52679c1571..d1f1ee9dff 100644 --- a/src/sql/engine/expr/ob_expr_xml_attributes.h +++ b/src/sql/engine/expr/ob_expr_xml_attributes.h @@ -32,11 +32,7 @@ public: int64_t param_num, common::ObExprTypeCtx& type_ctx) const override; -#ifdef OB_BUILD_ORACLE_XML static int eval_xml_attributes(const ObExpr &expr, ObEvalCtx &ctx, ObDatum &res); -#else - static int eval_xml_attributes(const ObExpr &expr, ObEvalCtx &ctx, ObDatum &res) { return OB_NOT_SUPPORTED; } -#endif virtual int cg_expr(ObExprCGCtx &expr_cg_ctx, const ObRawExpr &raw_expr, ObExpr &rt_expr) const override; diff --git a/src/sql/engine/expr/ob_expr_xml_delete_xml.cpp b/src/sql/engine/expr/ob_expr_xml_delete_xml.cpp new file mode 100644 index 0000000000..ea1a0a3771 --- /dev/null +++ b/src/sql/engine/expr/ob_expr_xml_delete_xml.cpp @@ -0,0 +1,395 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + * This file is for func deletexml. + */ + +#include "ob_expr_xml_delete_xml.h" +#include "ob_expr_lob_utils.h" +#include "lib/xml/ob_xml_parser.h" +#include "lib/xml/ob_xml_util.h" +#include "sql/engine/expr/ob_expr_xml_func_helper.h" +#include "lib/utility/utility.h" +#include "sql/session/ob_sql_session_info.h" +#include "sql/engine/ob_exec_context.h" + +#define USING_LOG_PREFIX SQL_ENG + +using namespace oceanbase::common; + +namespace oceanbase +{ +namespace sql +{ + +ObExprDeleteXml::ObExprDeleteXml(common::ObIAllocator &alloc) + : ObFuncExprOperator(alloc, T_FUN_SYS_DELETEXML, N_DELETEXML, MORE_THAN_ONE, VALID_FOR_GENERATED_COL, NOT_ROW_DIMENSION) +{ +} + +ObExprDeleteXml::~ObExprDeleteXml() {} + +int ObExprDeleteXml::calc_result_typeN(ObExprResType &type, + ObExprResType *types, + int64_t param_num, + common::ObExprTypeCtx &type_ctx) const +{ + INIT_SUCC(ret); + + if (param_num != 3) { + ret = OB_ERR_PARAM_SIZE; + LOG_WARN("invalid argument number.", K(ret), K(param_num)); + } else if (!is_called_in_sql()) { + ret = OB_ERR_SP_LILABEL_MISMATCH; + LOG_WARN("expr call in pl semantics disallowed", K(ret), K(N_DELETEXML)); + LOG_USER_ERROR(OB_ERR_SP_LILABEL_MISMATCH, static_cast(strlen(N_DELETEXML)), N_DELETEXML); + } else { + ObObjType in_type = types[0].get_type(); + if (types[0].is_ext() && types[0].get_udt_id() == T_OBJ_XML) { + types[0].get_calc_meta().set_sql_udt(ObXMLSqlType); + } else if (!ob_is_xml_sql_type(in_type, types[0].get_subschema_id())) { + ret = OB_ERR_INVALID_TYPE_FOR_OP; + LOG_USER_ERROR(OB_ERR_INVALID_TYPE_FOR_OP, "ANYDATA", "-"); + LOG_WARN("inconsistent datatypes", K(ret), K(ob_obj_type_str(in_type))); + } else if (!types[1].is_string_type()) { + ret = OB_ERR_INVALID_TYPE_FOR_OP; + LOG_WARN("inconsistent datatypes", K(ret), K(types[1].get_type())); + } else if (!types[2].is_string_type() && !types[2].is_null()) { + ret = OB_ERR_INVALID_TYPE_FOR_OP; + LOG_WARN("inconsistent datatypes", K(ret), K(types[2].get_type())); + } + + for (int8_t i = 1; OB_SUCC(ret) && i < param_num; i++) { + ObObjType param_type = types[i].get_type(); + if (param_type == ObNullType) { + } else if (ob_is_string_type(param_type)) { + if (types[i].get_charset_type() != CHARSET_UTF8MB4) { + types[i].set_calc_collation_type(CS_TYPE_UTF8MB4_BIN); + } + } + } + + if (OB_SUCC(ret)) { + type.set_sql_udt(ObXMLSqlType); + } + } + return ret; +} + +int ObExprDeleteXml::eval_delete_xml(const ObExpr &expr, ObEvalCtx &ctx, ObDatum &res) +{ + INIT_SUCC(ret); + ObEvalCtx::TempAllocGuard tmp_alloc_g(ctx); + common::ObArenaAllocator &allocator = tmp_alloc_g.get_allocator(); + ObDatum *xml_datum = NULL; + ObIMulModeBase *xml_tree = NULL; + ObXmlDocument *xml_doc = NULL; + ObString xpath_str; + ObString namespace_str; + ObString default_ns; + ObPathVarObject prefix_ns(allocator); + ObMulModeNodeType node_type = M_MAX_TYPE; + ObPathExprIter xpath_iter(&allocator); + bool should_reparse = false; + + ObMulModeMemCtx* mem_ctx = nullptr; + lib::ObMallocHookAttrGuard malloc_guard(lib::ObMemAttr(MTL_ID(), "XMLModule")); + if (OB_FAIL(ObXmlUtil::create_mulmode_tree_context(&allocator, mem_ctx))) { + LOG_WARN("fail to create tree memory context", K(ret)); + } else if (OB_UNLIKELY(expr.arg_cnt_ != 3)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("invalid arg_cnt_", K(ret), K(expr.arg_cnt_)); + } else if (ObNullType == expr.args_[1]->datum_meta_.type_) { + ret = OB_ERR_INVALID_XPATH_EXPRESSION; + LOG_WARN("invalid xpath expression", K(ret)); + } else if (OB_FAIL(ObXMLExprHelper::get_xmltype_from_expr(expr.args_[0], ctx, xml_datum))) { + LOG_WARN("fail to get xmltype value", K(ret)); + } else if (OB_FAIL(ObXMLExprHelper::get_str_from_expr(expr.args_[1], ctx, xpath_str, allocator))) { + LOG_WARN("fail to get xpath str", K(ret)); + } else if (ObNullType == expr.args_[2]->datum_meta_.type_) { + } else if (OB_FAIL(ObXMLExprHelper::get_str_from_expr(expr.args_[2], ctx, namespace_str, allocator))) { + LOG_WARN("fail to get xpath str", K(ret)); + } else if (OB_FAIL(ObXMLExprHelper::construct_namespace_params(namespace_str, default_ns, prefix_ns, allocator))) { + LOG_WARN("fail to construct namespace params", K(ret), K(namespace_str)); + } + + if (OB_FAIL(ret)) { + } else if (xpath_str.empty()) { + // do nothing + } else if (OB_FAIL(ObXMLExprHelper::get_xml_base(mem_ctx, xml_datum, ObCollationType::CS_TYPE_INVALID, ObNodeMemType::TREE_TYPE, xml_tree, node_type, ObGetXmlBaseType::OB_IS_REPARSE))) { + LOG_WARN("fail to parse xml doc", K(ret)); + } else if (OB_ISNULL(xml_tree)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get xml tree null", K(ret)); + } else if (OB_FAIL(xpath_iter.init(mem_ctx, xpath_str, default_ns, xml_tree, &prefix_ns))) { + LOG_WARN("fail to init xpath iterator", K(xpath_str), K(default_ns), K(ret)); + ObXMLExprHelper::replace_xpath_ret_code(ret); + } else if (OB_FAIL(delete_xml(xpath_iter, should_reparse))) { + LOG_WARN("delete xml failed.", K(ret)); + } + + if (OB_FAIL(ret)) { + } else if (OB_ISNULL(xml_tree) || xml_tree->count() == 0) { + res.set_null(); + } else { + ObString plain_text; + int element_count = 0; + int text_count = 0; + ObStringBuffer buff(&allocator); + + for (int i = 0; OB_SUCC(ret) && i < xml_tree->count(); i++) { + ObIMulModeBase *child_node = xml_tree->at(i); + if (OB_ISNULL(child_node)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("xml tree child null", K(ret), K(i)); + } else if (ObMulModeNodeType::M_TEXT == child_node->type() || + ObMulModeNodeType::M_CDATA == child_node->type()) { + text_count++; + } else if (ObMulModeNodeType::M_ELEMENT == child_node->type()) { + element_count++; + } + } + + if (OB_FAIL(ret)) { + } else if (element_count == 0) { + res.set_null(); + } else if (node_type == M_CONTENT) { + node_type = ObMulModeNodeType::M_UNPARSED; + } else if (element_count > 1) { + node_type = ObMulModeNodeType::M_UNPARSED; + } else if (element_count == 1 && text_count > 0) { + node_type = ObMulModeNodeType::M_UNPARSED; + } else { + node_type = ObMulModeNodeType::M_DOCUMENT; + } + + if (OB_FAIL(ret) || element_count == 0) { + } else if (OB_ISNULL(xml_doc = static_cast(xml_tree))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("xml tree to xmldocument failed", K(ret)); + } else if (should_reparse) { + ObString plain_text; + if (OB_FAIL(xml_doc->print(buff, ObXmlFormatType::NO_FORMAT))) { + LOG_WARN("fail to print xml tree", K(ret)); + } else if (OB_FALSE_IT(plain_text.assign_ptr(buff.ptr(), buff.length()))) { + } else if (node_type == ObMulModeNodeType::M_DOCUMENT && + OB_FAIL(ObXmlParserUtils::parse_document_text(mem_ctx, plain_text, xml_doc))) { + if (ret == OB_ERR_PARSER_SYNTAX) { + ret = OB_ERR_XML_PARSE; + } + LOG_WARN("parsing document failed", K(ret), K(plain_text)); + } else if (node_type != ObMulModeNodeType::M_DOCUMENT && + OB_FAIL(ObXmlParserUtils::parse_content_text(mem_ctx, plain_text, xml_doc))) { + if (ret == OB_ERR_PARSER_SYNTAX) { + ret = OB_ERR_XML_PARSE; + } + LOG_WARN("parsing document failed", K(ret), K(plain_text)); + } else if (OB_FAIL(ObXMLExprHelper::pack_xml_res(expr, ctx, res, xml_doc, mem_ctx, node_type, plain_text))) { + LOG_WARN("failed to pack xml res", K(ret)); + } + } else if (ObMulModeNodeType::M_UNPARSED == node_type) { + if (OB_FAIL(xml_doc->print(buff, ObXmlFormatType::NO_FORMAT))) { + LOG_WARN("fail to print xml tree", K(ret)); + } else if (OB_FALSE_IT(plain_text.assign_ptr(buff.ptr(), buff.length()))) { + } else if (OB_FAIL(ObXMLExprHelper::pack_xml_res(expr, ctx, res, NULL, mem_ctx, node_type, plain_text))) { + LOG_WARN("failed to pack xml res", K(ret)); + } + } else if (OB_FAIL(ObXMLExprHelper::pack_xml_res(expr, ctx, res, xml_doc, mem_ctx, node_type, plain_text))) { + LOG_WARN("failed to pack xml res", K(ret)); + } + } + + return ret; +} + +int ObExprDeleteXml::delete_xml(ObPathExprIter &xpath_iter, bool &should_reparse) +{ + INIT_SUCC(ret); + ObIMulModeBase *node = NULL; + ObArray res_array; + + if (OB_FAIL(xpath_iter.open())) { + LOG_WARN("fail to open xpath iterator", K(ret)); + ObXMLExprHelper::replace_xpath_ret_code(ret); + } + + while (OB_SUCC(ret)) { + if (OB_FAIL(xpath_iter.get_next_node(node))) { + if (ret != OB_ITER_END) { + LOG_WARN("fail to get next xml node", K(ret)); + } + } else if (node->type() == ObMulModeNodeType::M_CONTENT) { + ret = OB_ERR_XML_NOT_SUPPORT_OPERATION; + LOG_USER_ERROR(OB_ERR_XML_NOT_SUPPORT_OPERATION, "fragment"); + LOG_WARN("XML node '' (type=fragment) does not support this operation", K(ret)); + } else if (node->type() == ObMulModeNodeType::M_DOCUMENT) { + ret = OB_ERR_XML_NOT_SUPPORT_OPERATION; + LOG_USER_ERROR(OB_ERR_XML_NOT_SUPPORT_OPERATION, "document"); + LOG_WARN("XML node '' (type=document) does not support this operation", K(ret)); + } else if (OB_FAIL(res_array.push_back(node))) { + LOG_WARN("fail to push xml node", K(ret)); + } + } + + if (ret == OB_ITER_END) { + ret = OB_SUCCESS; + } + + for (int i = 0; OB_SUCC(ret) && i < res_array.size(); i++) { + ObMulModeNodeType delete_type = M_MAX_TYPE; + ObXmlNode *delete_node = NULL; + if (OB_ISNULL(res_array[i])) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("xpath result node is null", K(ret)); + } else if (OB_ISNULL(delete_node = static_cast(res_array[i]))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("xpath result node xmlnode is null", K(ret)); + } else { + delete_type = delete_node->type(); + switch (delete_type) { + case M_TEXT: + case M_COMMENT: + case M_CDATA: + case M_ELEMENT: + case M_INSTRUCT: { + if (OB_FAIL(delete_leaf_node(delete_node))) { + LOG_WARN("delete leaf node failed.", K(ret), K(delete_type)); + } + break; + } + case M_ATTRIBUTE: { + if (OB_FAIL(delete_attribute_node(delete_node))) { + LOG_WARN("delete attributes failed.", K(ret)); + } + break; + } + case M_NAMESPACE: { + should_reparse = true; + if (OB_FAIL(delete_namespace_node(delete_node))) { + LOG_WARN("delete namespace failed.", K(ret)); + } + break; + } + default: { + ret = OB_NOT_SUPPORTED; + LOG_WARN("unsupported xml node type", K(ret), K(delete_type)); + break; + } + } + } + } + + int tmp_ret = OB_SUCCESS; + if (OB_SUCCESS != (tmp_ret = xpath_iter.close())) { + LOG_WARN("fail to close xpath iter", K(tmp_ret)); + ret = COVER_SUCC(tmp_ret); + } + + return ret; +} + +int ObExprDeleteXml::delete_namespace_node(ObXmlNode *delete_node) +{ + INIT_SUCC(ret); + ObString key; + int64_t pos = -1; + ObXmlNode *parent = NULL; + ObXmlElement *ele_node = NULL; + ObXmlAttribute *ns_node = NULL; + + if (OB_ISNULL(delete_node)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get delete node null.", K(ret)); + } else if (M_NAMESPACE != delete_node->type()) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get delete node null.", K(ret)); + } else if (OB_ISNULL(ns_node = static_cast(delete_node))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("delete node cast to attributes node null.", K(ret)); + } else if (OB_ISNULL(parent = ns_node->get_parent())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("attribute parent node is NULL", K(ret)); + } else if (OB_FALSE_IT(key = ns_node->get_key())) { + } else if (parent->type() != M_ELEMENT) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("parent of namespace node is not an element node", K(ret), K(parent->type())); + } else if (FALSE_IT(ele_node = static_cast(parent))) { + } else if (OB_FAIL(ele_node->get_attribute_pos(ns_node->type(), key, pos))) { + LOG_WARN("fail to get attribute pos", K(ret), K(key), K(ns_node->type())); + } else if (OB_FAIL(ele_node->remove_namespace(pos))) { + LOG_WARN("remove namespace failed.", K(ret), K(pos)); + } + + return ret; +} + +int ObExprDeleteXml::delete_attribute_node(ObXmlNode *delete_node) +{ + INIT_SUCC(ret); + int64_t pos = -1; + ObXmlNode *parent = NULL; + ObXmlElement *ele_node = NULL; + ObString key; + + if (OB_ISNULL(delete_node)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get delete node null.", K(ret)); + } else if (OB_FALSE_IT(key = delete_node->get_key())) { + } else if (OB_ISNULL(parent = delete_node->get_parent())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("xml node get parent is null", K(ret)); + } else if (parent->type() != M_ELEMENT) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("parent of attribute node is not an element node", K(ret), K(parent->type())); + } else if (OB_ISNULL(ele_node = static_cast(parent))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get parent cast to element node null.", K(ret)); + } else if (OB_FAIL(ele_node->get_attribute_pos(delete_node->type(), key, pos))) { + LOG_WARN("get attributes pos failed.", K(ret), K(key), K(delete_node->type())); + } else if (OB_FAIL(ele_node->remove_attribute(pos))) { + LOG_WARN("failed to remove attributes.", K(ret), K(pos)); + } + + return ret; +} + +int ObExprDeleteXml::delete_leaf_node(ObXmlNode *delete_node) +{ + INIT_SUCC(ret); + int64_t pos = -1; + ObXmlNode *parent = NULL; + + if (OB_ISNULL(delete_node)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get delete node null.", K(ret)); + } else if (OB_FALSE_IT(pos = delete_node->get_index())) { + } else if (OB_ISNULL(parent = delete_node->get_parent())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("xml node get parent is null", K(ret)); + } else if (pos >= parent->count()) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get pos invalid.", K(ret), K(pos), K(parent->count())); + } else if (OB_FAIL(parent->remove(pos))) { + LOG_WARN("failed to remove text.", K(ret), K(pos)); + } + + return ret; +} + +int ObExprDeleteXml::cg_expr(ObExprCGCtx &expr_cg_ctx, const ObRawExpr &raw_expr, ObExpr &rt_expr) const +{ + UNUSED(expr_cg_ctx); + UNUSED(raw_expr); + rt_expr.eval_func_ = eval_delete_xml; + return OB_SUCCESS; +} + +} // end of sql +} // end of oceanbase \ No newline at end of file diff --git a/src/sql/engine/expr/ob_expr_xml_delete_xml.h b/src/sql/engine/expr/ob_expr_xml_delete_xml.h new file mode 100644 index 0000000000..be90853432 --- /dev/null +++ b/src/sql/engine/expr/ob_expr_xml_delete_xml.h @@ -0,0 +1,53 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + * This file is for func deletexml. + */ + +#ifndef OCEANBASE_SQL_ENGINE_EXPR_OB_EXPR_XML_DELETE_XML_H +#define OCEANBASE_SQL_ENGINE_EXPR_OB_EXPR_XML_DELETE_XML_H + +#include "sql/engine/expr/ob_expr_operator.h" +#include "lib/xml/ob_xpath.h" + +namespace oceanbase { + +namespace sql +{ + +class ObExprDeleteXml : public ObFuncExprOperator +{ +public: + explicit ObExprDeleteXml(common::ObIAllocator &alloc); + virtual ~ObExprDeleteXml(); + + virtual int calc_result_typeN(ObExprResType &type, + ObExprResType *types, + int64_t param_num, + common::ObExprTypeCtx &type_ctx) const override; + + static int eval_delete_xml(const ObExpr &expr, ObEvalCtx &ctx, ObDatum &res); + virtual int cg_expr(ObExprCGCtx &expr_cg_ctx, + const ObRawExpr &raw_expr, + ObExpr &rt_expr) + const override; +private: + static int delete_xml(ObPathExprIter &xpath_iter, bool &should_reparse); + static int delete_leaf_node(ObXmlNode *delete_node); + static int delete_namespace_node(ObXmlNode *delete_node); + static int delete_attribute_node(ObXmlNode *delete_node); +private: + DISALLOW_COPY_AND_ASSIGN(ObExprDeleteXml); +}; + +} // sql +} // oceanbase + +#endif // OCEANBASE_SQL_ENGINE_EXPR_OB_EXPR_EXRACTVALUE_H \ No newline at end of file diff --git a/src/sql/engine/expr/ob_expr_xml_element.cpp b/src/sql/engine/expr/ob_expr_xml_element.cpp index 02fa0936a6..a4ab7ce894 100644 --- a/src/sql/engine/expr/ob_expr_xml_element.cpp +++ b/src/sql/engine/expr/ob_expr_xml_element.cpp @@ -14,10 +14,8 @@ #define USING_LOG_PREFIX SQL_ENG #include "ob_expr_xml_element.h" #include "sql/engine/ob_exec_context.h" -#ifdef OB_BUILD_ORACLE_XML #include "lib/xml/ob_xml_util.h" #include "sql/engine/expr/ob_expr_xml_func_helper.h" -#endif using namespace oceanbase::common; using namespace oceanbase::sql; @@ -99,7 +97,6 @@ int ObExprXmlElement::calc_result_typeN(ObExprResType& type, return ret; } -#ifdef OB_BUILD_ORACLE_XML int ObExprXmlElement::eval_xml_element(const ObExpr &expr, ObEvalCtx &ctx, ObDatum &res) { INIT_SUCC(ret); @@ -437,7 +434,7 @@ int ObExprXmlElement::construct_element(ObMulModeMemCtx* mem_ctx, LOG_WARN("element node is NULL", K(ret)); } else if (OB_FAIL(element->init())) { LOG_WARN("element init failed", K(ret)); - } else if (FALSE_IT(element->set_key(name))) { + } else if (FALSE_IT(element->set_xml_key(name))) { } else if (OB_FAIL(element->alter_member_sort_policy(false))) { LOG_WARN("fail to sort child element", K(ret)); } else if (OB_FAIL(construct_attribute(mem_ctx, attr, element))) { @@ -453,7 +450,6 @@ int ObExprXmlElement::construct_element(ObMulModeMemCtx* mem_ctx, } return ret; } -#endif int ObExprXmlElement::cg_expr(ObExprCGCtx &expr_cg_ctx, const ObRawExpr &raw_expr, diff --git a/src/sql/engine/expr/ob_expr_xml_element.h b/src/sql/engine/expr/ob_expr_xml_element.h index 88b444c6ee..aeba3399c8 100644 --- a/src/sql/engine/expr/ob_expr_xml_element.h +++ b/src/sql/engine/expr/ob_expr_xml_element.h @@ -19,9 +19,7 @@ #include "lib/json_type/ob_json_bin.h" // for ObJsonBin #include "sql/engine/expr/ob_expr_lob_utils.h" #include "lib/container/ob_vector.h" -#ifdef OB_BUILD_ORACLE_XML #include "lib/xml/ob_xml_util.h" -#endif using namespace oceanbase::common; @@ -41,16 +39,11 @@ public: int64_t param_num, common::ObExprTypeCtx& type_ctx) const override; -#ifdef OB_BUILD_ORACLE_XML static int eval_xml_element(const ObExpr &expr, ObEvalCtx &ctx, ObDatum &res); -#else - static int eval_xml_element(const ObExpr &expr, ObEvalCtx &ctx, ObDatum &res) { return OB_NOT_SUPPORTED; } -#endif virtual int cg_expr(ObExprCGCtx &expr_cg_ctx, const ObRawExpr &raw_expr, ObExpr &rt_expr) const override; private: -#ifdef OB_BUILD_ORACLE_XML static int get_keys_from_wrapper(ObIJsonBase *json_doc, ObIAllocator *allocator, ObString &str); @@ -70,7 +63,6 @@ private: static int construct_value_array(ObIAllocator &allocator, const ObString &value, ObVector &res_value); -#endif DISALLOW_COPY_AND_ASSIGN(ObExprXmlElement); }; diff --git a/src/sql/engine/expr/ob_expr_xml_func_helper.cpp b/src/sql/engine/expr/ob_expr_xml_func_helper.cpp index 2803ac5e38..eea19da2d6 100644 --- a/src/sql/engine/expr/ob_expr_xml_func_helper.cpp +++ b/src/sql/engine/expr/ob_expr_xml_func_helper.cpp @@ -15,17 +15,16 @@ #include "lib/ob_errno.h" #include "sql/engine/expr/ob_expr_xml_func_helper.h" #include "sql/engine/expr/ob_expr_lob_utils.h" -#ifdef OB_BUILD_ORACLE_XML #include "lib/xml/ob_xpath.h" #include "lib/xml/ob_xml_bin.h" #include "lib/xml/ob_xml_util.h" #include "lib/xml/ob_xml_parser.h" -#endif // OB_BUILD_ORACLE_XML #include "sql/engine/expr/ob_expr_sql_udt_utils.h" #include "sql/session/ob_sql_session_info.h" #include "sql/engine/ob_exec_context.h" #include "sql/ob_result_set.h" #include "sql/ob_spi.h" +#include "lib/xml/ob_binary_aggregate.h" #ifdef OB_BUILD_ORACLE_PL #include "pl/sys_package/ob_sdo_geometry.h" #endif @@ -37,7 +36,6 @@ namespace oceanbase { namespace sql { -#ifdef OB_BUILD_ORACLE_XML uint64_t ObXMLExprHelper::get_tenant_id(ObSQLSessionInfo *session) { uint64_t tenant_id = 0; @@ -79,7 +77,7 @@ int ObXMLExprHelper::add_binary_to_element(ObMulModeMemCtx* mem_ctx, ObString bi LOG_WARN("fail to allocate buffer", K(ret)); } else if (OB_FAIL(bin.parse())) { LOG_WARN("fail to parse binary.", K(ret)); - } else if (OB_FAIL(bin.print_xml(*buffer, ObXmlFormatType::NO_FORMAT, 0, 0))) { + } else if (OB_FAIL(bin.print(*buffer, ObXmlFormatType::NO_FORMAT, 0, 0))) { LOG_WARN("fail to print xml", K(ret)); } else if (FALSE_IT(xml_text.assign_ptr(buffer->ptr(), buffer->length()))) { } else if (OB_FAIL(ObXmlParserUtils::parse_content_text(mem_ctx, xml_text, x_doc))) { @@ -130,6 +128,23 @@ int ObXMLExprHelper::add_binary_to_element(ObMulModeMemCtx* mem_ctx, ObString bi return ret; } +int ObXMLExprHelper::get_xml_base(ObMulModeMemCtx *mem_ctx, + ObDatum *xml_datum, + ObEvalCtx &ctx, + ObIMulModeBase *&xml_doc, + ObGetXmlBaseType base_flag) +{ + INIT_SUCC(ret); + ObMulModeNodeType node_type = M_DOCUMENT; + ObNodeMemType expect_type = ObNodeMemType::BINARY_TYPE; + ObCollationType cs_type = CS_TYPE_UTF8MB4_BIN; + + if (OB_FAIL(ObXMLExprHelper::get_xml_base(mem_ctx, xml_datum, cs_type, expect_type, xml_doc, node_type, base_flag))) { + LOG_WARN("fail to parse xml doc", K(ret)); + } + return ret; +} + int ObXMLExprHelper::get_xml_base(ObMulModeMemCtx *ctx, ObDatum *xml_datum, ObCollationType cs_type, @@ -137,7 +152,7 @@ int ObXMLExprHelper::get_xml_base(ObMulModeMemCtx *ctx, ObIMulModeBase *&node) { ObMulModeNodeType node_type = M_MAX_TYPE; - return get_xml_base(ctx, xml_datum, cs_type, expect_type, node, node_type, false); + return get_xml_base(ctx, xml_datum, cs_type, expect_type, node, node_type, ObGetXmlBaseType::OB_MAX); } int ObXMLExprHelper::get_xml_base(ObMulModeMemCtx *ctx, ObDatum *xml_datum, @@ -145,7 +160,7 @@ int ObXMLExprHelper::get_xml_base(ObMulModeMemCtx *ctx, ObNodeMemType expect_type, ObIMulModeBase *&node, ObMulModeNodeType &node_type, - bool is_reparse) + ObGetXmlBaseType base_flag) { int ret = OB_SUCCESS; // temporary use until xml binary ready @@ -169,9 +184,11 @@ int ObXMLExprHelper::get_xml_base(ObMulModeMemCtx *ctx, } } else if (OB_FAIL(ObXmlUtil::xml_bin_type(xml_text, node_type))) { LOG_WARN("failed to get bin header.", K(ret)); - } else if (is_reparse) { + } else if (ObGetXmlBaseType::OB_IS_REPARSE == base_flag) { ObStringBuffer *buff = nullptr; ParamPrint param_list; + ObNsSortedVector* ns_vec_point = nullptr; + ObNsSortedVector ns_vec; if (OB_ISNULL(buff = OB_NEWx(ObStringBuffer, ctx->allocator_, (ctx->allocator_)))) { ret = OB_ALLOCATE_MEMORY_FAILED; LOG_WARN("create obstrinbuffer failed", K(ret)); @@ -182,9 +199,11 @@ int ObXMLExprHelper::get_xml_base(ObMulModeMemCtx *ctx, LOG_WARN("fail to get xml base", K(ret)); } else if (node_type == M_UNPARESED_DOC && OB_FALSE_IT(node_type = M_DOCUMENT)) { } else if (node_type == M_UNPARSED) { - } else if (node_type == M_DOCUMENT && OB_FAIL(node->print_document(*buff, CS_TYPE_INVALID, ObXmlFormatType::NO_FORMAT, 0))) { + } else if (OB_FAIL(ObXmlUtil::init_print_ns(&(*ctx->allocator_), node, ns_vec, ns_vec_point))) { + LOG_WARN("fail to init ns vector by extend area", K(ret)); + } else if (node_type == M_DOCUMENT && OB_FAIL(node->print_document(*buff, CS_TYPE_INVALID, ObXmlFormatType::NO_FORMAT, 0, ns_vec_point))) { LOG_WARN("failed to convert xml binary to xml text", K(ret)); - } else if (node_type == M_CONTENT && OB_FAIL(node->print_content(*buff, false, false, ObXmlFormatType::NO_FORMAT, param_list))) { + } else if (node_type == M_CONTENT && OB_FAIL(node->print_content(*buff, false, false, ObXmlFormatType::NO_FORMAT, param_list, ns_vec_point))) { LOG_WARN("failed to convert xml binary to xml text", K(ret)); } else { xml_text.assign_ptr(buff->ptr(), buff->length()); @@ -200,7 +219,10 @@ int ObXMLExprHelper::get_xml_base(ObMulModeMemCtx *ctx, } else if (OB_FAIL(ObMulModeFactory::get_xml_base(ctx, xml_text, ObNodeMemType::BINARY_TYPE, expect_type, - node))) { + node, + M_DOCUMENT, + false, + ObGetXmlBaseType::OB_SHOULD_CHECK == base_flag))) { LOG_WARN("fail to get xml base", K(ret)); } @@ -225,6 +247,8 @@ int ObXMLExprHelper::try_to_parse_unparse_binary(ObMulModeMemCtx* mem_ctx, ObString unparse_str; ObXmlDocument *xml_doc = nullptr; ObStringBuffer *buff = nullptr; + ObNsSortedVector* ns_vec_point = nullptr; + ObNsSortedVector ns_vec; if (OB_ISNULL(input_node) || !(input_node->type() == M_DOCUMENT || input_node->type() == M_UNPARSED)) { ret = OB_ERR_UNEXPECTED; @@ -233,7 +257,9 @@ int ObXMLExprHelper::try_to_parse_unparse_binary(ObMulModeMemCtx* mem_ctx, ret = OB_ALLOCATE_MEMORY_FAILED; LOG_WARN("create obstrinbuffer failed", K(input_node)); } else { - if (OB_FAIL(input_node->print_document(*buff, cs_type, ObXmlFormatType::NO_FORMAT, 0))) { + if (OB_FAIL(ObXmlUtil::init_print_ns(mem_ctx->allocator_, input_node, ns_vec, ns_vec_point))) { + LOG_WARN("fail to init ns vector by extend area", K(ret)); + } else if (OB_FAIL(input_node->print_document(*buff, cs_type, ObXmlFormatType::NO_FORMAT, 0, ns_vec_point))) { LOG_WARN("fail to serialize unparse string", K(ret)); } else { unparse_str.assign_ptr(buff->ptr(), buff->length()); @@ -366,8 +392,8 @@ int ObXMLExprHelper::parse_namespace_str(ObString &ns_str, ObString &prefix, ObS LOG_WARN("not invalid xml namespace string", K(ret), K(ns_str), K(idx)); } } else { - ret = OB_ERR_INVALID_XPATH_EXPRESSION; - LOG_WARN("not invalid xml namespace string", K(ret), K(ns_str), K(idx)); + uri_start = str + idx; + uri_len = str_len - idx; } } else { ret = OB_ERR_INVALID_XPATH_EXPRESSION; @@ -420,6 +446,43 @@ int ObXMLExprHelper::construct_namespace_params(ObString &namespace_str, return ret; } +int ObXMLExprHelper::construct_namespace_params(ObIArray &namespace_arr, + ObString &default_ns, + void *&prefix_ns, + ObIAllocator &allocator) +{ + int ret = OB_SUCCESS; + ObPathVarObject *prefix_ns_ = NULL; + if (OB_ISNULL(prefix_ns_ = static_cast(allocator.alloc(sizeof(ObPathVarObject))))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("fail to alloc ObPathVarObject", K(ret)); + } else { + prefix_ns_ = new (prefix_ns_) ObPathVarObject(allocator); + } + for (int64_t i = 0; i < namespace_arr.count() && OB_SUCC(ret); i += 2) { + if ((i + 1) >= namespace_arr.count()) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("fail to get value from namespace arr", K(ret)); + } else if (namespace_arr.at(i + 1).empty()) { + default_ns = namespace_arr.at(i); + } else { + ObString prefix_str; + if (namespace_arr.at(i).empty()) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("uri is not allow to empty", K(ret)); + } else if (OB_FAIL(ob_write_string(allocator, namespace_arr.at(i + 1), prefix_str))) { // deep copy prefix, path only copy value + LOG_WARN("fail to wirte prefix string", K(ret), K(default_ns)); + } else if (OB_FAIL(add_ns_to_container_node(*prefix_ns_, prefix_str, namespace_arr.at(i), allocator))) { + LOG_WARN("fail to add prefix namespace node", K(ret), K(namespace_arr.at(i + 1)), K(namespace_arr.at(i))); + } + } + } // end for + if (OB_SUCC(ret)) { + prefix_ns = prefix_ns_; + } + return ret; +} + int ObXMLExprHelper::add_ns_to_container_node(ObPathVarObject &container, ObString &prefix, ObString &uri, @@ -512,6 +575,44 @@ int ObXMLExprHelper::get_xmltype_from_expr(const ObExpr *expr, return ret; } +int ObXMLExprHelper::get_xml_base_from_expr(const ObExpr *expr, + ObMulModeMemCtx *mem_ctx, + ObEvalCtx &ctx, + ObIMulModeBase *&node) +{ + INIT_SUCC(ret); + ObDatum *t_datum = NULL; + ObObjType val_type = expr->datum_meta_.type_; + ObString xml_text; + uint16_t sub_schema_id = expr->obj_meta_.get_subschema_id(); + if (OB_FAIL(expr->eval(ctx, t_datum))) { + LOG_WARN("eval xml arg failed", K(ret)); + } else if (!ob_is_xml_sql_type(val_type, sub_schema_id)) { + if (ob_is_string_type(val_type)) { + if (OB_FAIL(ObTextStringHelper::read_real_string_data(*mem_ctx->allocator_, *t_datum, expr->datum_meta_, expr->obj_meta_.has_lob_header(), xml_text))) { + LOG_WARN("fail to get real data.", K(ret), K(xml_text)); + } else if (xml_text.empty()) { + ret = OB_ERR_XQUERY_TYPE_MISMATCH; + LOG_WARN("node is NULL", K(ret)); + } else if (OB_FAIL(ObMulModeFactory::get_xml_base(mem_ctx, xml_text, + ObNodeMemType::TREE_TYPE, + ObNodeMemType::BINARY_TYPE, + node, M_DOCUMENT, false, true))) { + LOG_WARN("fail to get xml base", K(ret)); + } + } else { + ret = OB_ERR_INVALID_TYPE_FOR_OP; + LOG_USER_ERROR(OB_ERR_INVALID_TYPE_FOR_OP, ob_obj_type_str(val_type), "xmltype"); + LOG_WARN("inconsistent datatypes", K(ret), K(ob_obj_type_str(val_type))); + } + } else if (t_datum->is_null()) { + ret = OB_ITER_END; + } else if (OB_FAIL(get_xml_base(mem_ctx, t_datum, ctx, node, ObGetXmlBaseType::OB_SHOULD_CHECK))) { + LOG_WARN("fail to get xml node", K(ret)); + } + return ret; +} + bool ObXMLExprHelper::is_xml_leaf_node(ObMulModeNodeType node_type) { return node_type == ObMulModeNodeType::M_ATTRIBUTE || @@ -574,7 +675,22 @@ int ObXMLExprHelper::check_xml_document_unparsed(ObMulModeMemCtx* mem_ctx, ObStr validity = false; } else { validity = true; - } + } + } + return ret; +} + +int ObXMLExprHelper::append_header_in_front(ObIAllocator &allocator, ObXmlDocument *&root, ObIMulModeBase *node) +{ + INIT_SUCC(ret); + ObXmlDocument *xml_root = NULL; + if (OB_ISNULL(xml_root = OB_NEWx(ObXmlDocument, (&allocator), ObMulModeNodeType::M_CONTENT, (root->get_mem_ctx())))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("fail to create xml text node", K(ret)); + } else if (OB_FAIL(xml_root->append(node))) { + LOG_WARN("fail to append node value to content", K(ret)); + } else if (OB_FAIL(root->append(xml_root))) { + LOG_WARN("fail to append node value to content", K(ret)); } return ret; } @@ -588,6 +704,8 @@ int ObXMLExprHelper::parse_xml_document_unparsed(ObMulModeMemCtx* mem_ctx, ObStr ObIMulModeBase* tree = nullptr; ObXmlParser parser(mem_ctx); ObStringBuffer buff(mem_ctx->allocator_); + ObNsSortedVector* ns_vec_point = nullptr; + ObNsSortedVector ns_vec; if (OB_FAIL(ObXmlUtil::xml_bin_type(binary_str, node_type))) { LOG_WARN("get xml bin type failed", K(ret)); } else if (node_type == ObMulModeNodeType::M_UNPARESED_DOC) { @@ -596,7 +714,9 @@ int ObXMLExprHelper::parse_xml_document_unparsed(ObMulModeMemCtx* mem_ctx, ObStr } else if (FALSE_IT(tree = doc)) { } else if (OB_FAIL(bin.parse_tree(tree))) { LOG_WARN("parse tree failed", K(ret)); - } else if (OB_FAIL(bin.print_document(buff, CS_TYPE_UTF8MB4_GENERAL_CI, ObXmlFormatType::NO_FORMAT))) { + } else if (OB_FAIL(ObXmlUtil::init_print_ns(mem_ctx->allocator_, &bin, ns_vec, ns_vec_point))) { + LOG_WARN("fail to init ns vector by extend area", K(ret)); + } else if (OB_FAIL(bin.print_document(buff, CS_TYPE_UTF8MB4_GENERAL_CI, ObXmlFormatType::NO_FORMAT, 2, ns_vec_point))) { LOG_WARN("print document failed"); } else if (OB_FAIL(parser.parse_document(buff.string()))) { ret = OB_ERR_XML_PARSE; @@ -620,6 +740,8 @@ int ObXMLExprHelper::content_unparsed_binary_check_doc(ObMulModeMemCtx* mem_ctx, ObStringBuffer buff(mem_ctx->allocator_); ObXmlDocument* new_doc = nullptr; ParamPrint param_list; // unused + ObNsSortedVector* ns_vec_point = nullptr; + ObNsSortedVector ns_vec; if (OB_FAIL(ObXmlUtil::xml_bin_type(binary_str, node_type))) { LOG_WARN("get xml bin type failed", K(ret)); } else if (node_type == ObMulModeNodeType::M_UNPARSED) { @@ -628,7 +750,9 @@ int ObXMLExprHelper::content_unparsed_binary_check_doc(ObMulModeMemCtx* mem_ctx, } else if (FALSE_IT(tree = doc)) { } else if (OB_FAIL(bin.parse_tree(tree))) { LOG_WARN("parse tree failed", K(ret)); - } else if (OB_FAIL(bin.print_content(buff, false, false, ObXmlFormatType::NO_FORMAT, param_list))) { + } else if (bin.check_extend() && OB_FAIL(ObXmlUtil::init_print_ns(mem_ctx->allocator_, &bin, ns_vec, ns_vec_point))) { + LOG_WARN("fail to init ns vector by extend area", K(ret)); + } else if (OB_FAIL(bin.print_content(buff, false, false, ObXmlFormatType::NO_FORMAT, param_list, ns_vec_point))) { LOG_WARN("print document failed"); } else if (OB_FAIL(parser.parse_document(buff.string()))) { // try to parse content intto document, if it fails, leave the content unchanged @@ -642,6 +766,246 @@ int ObXMLExprHelper::content_unparsed_binary_check_doc(ObMulModeMemCtx* mem_ctx, return ret; } +// is_root is symbol of scan node seek +int ObXMLExprHelper::check_xpath_valid(ObPathExprIter &xpath_iter, bool is_root) +{ + INIT_SUCC(ret); + ObPathNodeAxis first_axis; + ObSeekType first_type; + // check axis + if (OB_FAIL(xpath_iter.get_first_axis(first_axis))) { + LOG_WARN("fail to get first node axis", K(ret)); + } else if (OB_FAIL(xpath_iter.get_first_seektype(first_type))) { + LOG_WARN("fail to get first seek type", K(ret)); + } else { + switch (first_axis) { + case ObPathNodeAxis::ANCESTOR: { + if (is_root) { + ret = OB_ERR_XPATH_INVALID_NODE; // ORA-19276: XPST0005 - XPath step specifies an invalid element/attribute name: + LOG_USER_ERROR(OB_ERR_TOO_MANY_PREFIX_DECLARE, xpath_iter.get_path_str().length(), xpath_iter.get_path_str().ptr()); + } else if (xpath_iter.get_path_str()[0] == '/') { + break; // '/' in first will not report error + } else { + ret = OB_ERR_XQUERY_UNSUPPORTED; // ORA-19110: unsupported XQuery expression + LOG_WARN("xquery unsupported", K(ret)); + } + break; + } + case ObPathNodeAxis::SELF: + case ObPathNodeAxis::ANCESTOR_OR_SELF: { + if (is_root && first_type != ObSeekType::TEXT && first_type != ObSeekType::NODES) { + ret = OB_ERR_XPATH_INVALID_NODE; // ORA-19276: XPST0005 - XPath step specifies an invalid element/attribute name: + LOG_USER_ERROR(OB_ERR_TOO_MANY_PREFIX_DECLARE, xpath_iter.get_path_str().length(), xpath_iter.get_path_str().ptr()); + } else if (is_root) { + } else if (xpath_iter.get_path_str()[0] == '.' || xpath_iter.get_path_str()[0] == '/') { + // '.' or '/' in first will not report error + } else { + ret = OB_ERR_XQUERY_UNSUPPORTED; // ORA-19110: unsupported XQuery expression + LOG_WARN("xquery unsupported", K(ret)); + } + break; + } + case ObPathNodeAxis::PARENT: { + if (is_root) { + ret = OB_ERR_XPATH_NO_NODE; // ORA-19277: XPST0005 - XPath step specifies an item type matching no node: + LOG_USER_ERROR(OB_ERR_TOO_MANY_PREFIX_DECLARE, xpath_iter.get_path_str().length(), xpath_iter.get_path_str().ptr()); + } else { + ret = OB_ERR_XQUERY_UNSUPPORTED; // ORA-19110: unsupported XQuery expression + LOG_WARN("xquery unsupported", K(ret)); + } + break; + } + case ObPathNodeAxis::ATTRIBUTE: { + if (is_root) { + ret = OB_ERR_XPATH_INVALID_NODE; // ORA-19276: XPST0005 - XPath step specifies an invalid element/attribute name: + LOG_USER_ERROR(OB_ERR_TOO_MANY_PREFIX_DECLARE, xpath_iter.get_path_str().length(), xpath_iter.get_path_str().ptr()); + } + break; + } + default: { + break; + } + } + } + if (OB_SUCC(ret)) { + switch (first_type) { + case ObSeekType::TEXT: { + if (is_root && first_axis == ObPathNodeAxis::CHILD) { + ret = OB_ERR_XPATH_NO_NODE; // ORA-19277: XPST0005 - XPath step specifies an item type matching no node: + LOG_USER_ERROR(OB_ERR_TOO_MANY_PREFIX_DECLARE, xpath_iter.get_path_str().length(), xpath_iter.get_path_str().ptr()); + } + break; + } + default: { + break; + } + } + } + return ret; +} + +int ObXMLExprHelper::get_xpath_result(ObPathExprIter &xpath_iter, ObIMulModeBase *&xml_res, ObMulModeMemCtx *mem_ctx, bool add_ns) +{ + int ret = OB_SUCCESS; + ObStringBuffer buff(mem_ctx->allocator_); + ObXmlDocument *root = NULL; + ObIMulModeBase *node = NULL; + int64_t append_node_num = 0; + ObString blob_locator; + ObStringBuffer res_buf(mem_ctx->allocator_); + ObMulModeNodeType node_type = M_MAX_TYPE; + if (!xpath_iter.is_first_init()) { + } else if (OB_FAIL(xpath_iter.open())) { + ret = OB_ERR_PARSE_XQUERY_EXPR; + LOG_USER_ERROR(OB_ERR_PARSE_XQUERY_EXPR, xpath_iter.get_path_str().length(), xpath_iter.get_path_str().ptr()); + LOG_WARN("fail to open xpath iterator", K(ret)); + // ObXMLExprHelper::replace_xpath_ret_code(ret); + } else if (OB_FAIL(check_xpath_valid(xpath_iter, false))) { + LOG_WARN("check xpath valid failed", K(ret)); + } + if (OB_FAIL(ret)) { + } else if (OB_FAIL(ObXMLExprHelper::binary_agg_xpath_result(xpath_iter, node_type, mem_ctx, res_buf, append_node_num, add_ns))) { + LOG_WARN("agg xpath failed", K(ret)); + } else if (OB_FAIL(ObMulModeFactory::get_xml_base(mem_ctx, res_buf.string(), + ObNodeMemType::BINARY_TYPE, + ObNodeMemType::BINARY_TYPE, + xml_res))) { + LOG_WARN("fail to get xml node", K(ret)); + } + + if (ret == OB_ITER_END) { + ret = OB_SUCCESS; + } + return ret; +} + +int ObXMLExprHelper::binary_agg_xpath_result(ObPathExprIter &xpath_iter, + ObMulModeNodeType &node_type, + ObMulModeMemCtx* mem_ctx, + ObStringBuffer &res, + int64_t &append_node_num, + bool add_ns) +{ + INIT_SUCC(ret); + ObIMulModeBase *node = NULL; + int element_count = 0; + int text_count = 0; + ObString version; + ObString encoding; + uint16_t standalone; + ObXmlBin extend; + ObIMulModeBase* last_parent = nullptr; + bool first_is_doc = false; + common::hash::ObHashMap ns_map; + xpath_iter.set_add_ns(add_ns); + ObBinAggSerializer bin_agg(mem_ctx->allocator_, ObBinAggType::AGG_XML, static_cast(M_CONTENT)); + bin_agg.close_merge_text(); + if (add_ns && OB_FAIL(ns_map.create(10, lib::ObMemAttr(MTL_ID(), "XMLModule")))) { + LOG_WARN("ns map create failed", K(ret)); + } + while (OB_SUCC(ret)) { + ObIMulModeBase* tmp = nullptr; + if (OB_FAIL(xpath_iter.get_next_node(node))) { + if (ret != OB_ITER_END) { + ret = OB_ERR_PARSE_XQUERY_EXPR; + LOG_USER_ERROR(OB_ERR_PARSE_XQUERY_EXPR, xpath_iter.get_path_str().length(), xpath_iter.get_path_str().ptr()); + LOG_WARN("fail to get next xml node", K(ret)); + } + } else if (OB_ISNULL(node)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("xpath result node is null", K(ret)); + } else if (node->is_tree() && OB_FAIL(ObMulModeFactory::transform(mem_ctx, node, BINARY_TYPE, node))) { + LOG_WARN("fail to transform to tree", K(ret)); + } else { + ObXmlBin *bin = nullptr; + if (OB_ISNULL(bin = static_cast(node))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get bin failed", K(ret)); + } else if (bin->meta_.len_ == 0) { + // do nothing + } else if (add_ns && bin->check_extend()) { + bool conflict = false; + // check key conflict + if (OB_FAIL(bin->get_extend(extend))) { + LOG_WARN("fail to get extend", K(ret)); + } else if (OB_FAIL(ObXmlUtil::check_ns_conflict(xpath_iter.get_cur_res_parent(), last_parent, &extend, ns_map, conflict))) { + LOG_WARN("fail to check conflict", K(ret)); + } else if (conflict) { + // if conflict, merge bin + if (OB_FAIL(bin->merge_extend(extend))) { + LOG_WARN("fail to merge extend", K(ret)); + } else { + bin = &extend; + } + } else if (OB_FAIL(bin->remove_extend())) { // if not conflict, erase extend + LOG_WARN("fail to remove extend", K(ret)); + } + } + if (OB_FAIL(ret)) { + } else if (OB_FAIL(bin_agg.append_key_and_value(bin))) { + LOG_WARN("failed to append binary", K(ret)); + } else { + ObMulModeNodeType type = node->type(); + if (append_node_num == 0 && type == ObMulModeNodeType::M_DOCUMENT) { + version = node->get_version(); + encoding = node->get_encoding(); + standalone = node->get_standalone(); + first_is_doc = version.empty() ? false : true; + } + if (type == ObMulModeNodeType::M_ELEMENT || type == ObMulModeNodeType::M_DOCUMENT) { + element_count++; + } else if (type == ObMulModeNodeType::M_TEXT || type == ObMulModeNodeType::M_CDATA) { + text_count++; + } else if (type == ObMulModeNodeType::M_CONTENT) { + append_node_num += bin->count() - 1; + } + append_node_num++; + } + } + } + if (ret == OB_ITER_END) { + ret = OB_SUCCESS; + if (element_count > 1 || element_count == 0) { + node_type = ObMulModeNodeType::M_CONTENT; + } else if (element_count == 1 && text_count > 0) { + node_type = ObMulModeNodeType::M_CONTENT; + } else if (append_node_num == 0) { + // do nothing + } else { + node_type = ObMulModeNodeType::M_DOCUMENT; + } + bin_agg.set_header_type(node_type); + if (first_is_doc && append_node_num == 1) { + bin_agg.set_xml_decl(version, encoding, standalone); + } + if (OB_FAIL(bin_agg.serialize())) { + LOG_WARN("failed to serialize binary.", K(ret)); + } else if (add_ns && ns_map.size() > 0 && OB_FAIL(ObXmlUtil::ns_to_extend(mem_ctx, ns_map, bin_agg.get_buffer()))) { + LOG_WARN("failed to serialize extend.", K(ret)); + } else{ + res.append(bin_agg.get_buffer()->string()); + } + } + ns_map.clear(); + return ret; +} + +int ObXMLExprHelper::cast_to_res(ObIAllocator &allocator, ObString &xml_content, const ObExpr &expr, ObEvalCtx &ctx, ObDatum &res) +{ + int ret = OB_SUCCESS; + ObObj src_obj; + if (xml_content.empty()) { + res.set_null(); + } else { + src_obj.set_string(ObVarcharType, xml_content); + src_obj.set_collation_type(CS_TYPE_UTF8MB4_BIN); + if (OB_FAIL(cast_to_res(allocator, src_obj, expr, ctx, res))) { + LOG_WARN("fail to cast to res", K(ret)); + } + } + return ret; +} + int ObXMLExprHelper::check_element_validity(ObMulModeMemCtx* mem_ctx, ObXmlElement *in_ele, ObXmlElement *&out_ele, bool &validity) { INIT_SUCC(ret); ObXmlParser parser(mem_ctx); @@ -671,6 +1035,85 @@ int ObXMLExprHelper::check_element_validity(ObMulModeMemCtx* mem_ctx, ObXmlEleme return ret; } +int ObXMLExprHelper::cast_to_res(ObIAllocator &allocator, + ObObj &src_obj, + const ObExpr &expr, + ObEvalCtx &ctx, + ObDatum &res, + bool xt_need_acc_check) +{ + int ret = OB_SUCCESS; + ObCastMode def_cm = CM_NONE; + ObSQLSessionInfo *session = NULL; + ObObj dst_obj, buf_obj; + const ObObj *res_obj = NULL; + ObAccuracy out_acc; + if (src_obj.is_null()) { + res.set_null(); + } else { + ObSolidifiedVarsGetter helper(expr, ctx, ctx.exec_ctx_.get_my_session()); + ObDataTypeCastParams dtc_params; + // to type + if (OB_ISNULL(session = ctx.exec_ctx_.get_my_session())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("sessioninfo is NULL"); + } else if (OB_FAIL(ObSQLUtils::get_default_cast_mode(session->get_stmt_type(), + session, def_cm))) { + LOG_WARN("get_default_cast_mode failed", K(ret)); + } else if (OB_FAIL(helper.get_dtc_params(dtc_params))) { + LOG_WARN("get dtc params failed", K(ret)); + } else { + ObObjType obj_type = expr.datum_meta_.type_; + ObCollationType cs_type = expr.datum_meta_.cs_type_; + ObPhysicalPlanCtx *phy_plan_ctx = ctx.exec_ctx_.get_physical_plan_ctx(); + ObCastCtx cast_ctx(&allocator, &dtc_params, get_cur_time(phy_plan_ctx), def_cm, + cs_type, NULL, NULL); + if (OB_FAIL(ObObjCaster::to_type(obj_type, cs_type, cast_ctx, src_obj, dst_obj))) { + LOG_WARN("failed to cast object to ", K(ret), K(src_obj), K(obj_type)); + } else if (FALSE_IT(get_accuracy_from_expr(expr, out_acc))) { + } else if (FALSE_IT(res_obj = &dst_obj)) { + } else if (OB_FAIL(obj_accuracy_check(cast_ctx, out_acc, cs_type, dst_obj, buf_obj, res_obj))) { + if (!xt_need_acc_check && (ob_is_varchar_or_char(obj_type, cs_type) || ob_is_nchar(obj_type)) && ret == OB_ERR_DATA_TOO_LONG) { + ObLengthSemantics ls = lib::is_oracle_mode() ? + expr.datum_meta_.length_semantics_ : LS_CHAR; + const char* str = dst_obj.get_string_ptr(); + int32_t str_len_byte = dst_obj.get_string_len(); + int64_t char_len = 0; + int32_t trunc_len_byte = 0; + trunc_len_byte = (ls == LS_BYTE ? + ObCharset::max_bytes_charpos(cs_type, str, str_len_byte, + expr.max_length_, char_len): + ObCharset::charpos(cs_type, str, str_len_byte, expr.max_length_)); + if (trunc_len_byte == 0) { + (const_cast(res_obj))->set_null(); + } else { + (const_cast(res_obj))->set_common_value(ObString(trunc_len_byte, str)); + } + ret = OB_SUCCESS; + } else { + LOG_WARN("accuracy check failed", K(ret), K(out_acc), K(res_obj)); + } + } else if (OB_FAIL(ObSPIService::spi_pad_char_or_varchar(session, obj_type, out_acc, &allocator, const_cast(res_obj)))) { + LOG_WARN("fail to pad char", K(ret), K(*res_obj)); + } + + if (OB_SUCC(ret)) { + if (OB_NOT_NULL(res_obj)) { + res.from_obj(*res_obj); + ObExprStrResAlloc res_alloc(expr, ctx); + if (OB_FAIL(res.deep_copy(res, res_alloc))) { + LOG_WARN("fail to deep copy for res datum", K(ret), KPC(res_obj), K(res)); + } + } else { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("res obj is NULL", K(ret)); + } + } + } + } + return ret; +} + int ObXMLExprHelper::check_doc_validity(ObMulModeMemCtx* mem_ctx, ObXmlDocument *&doc, bool &validity) { INIT_SUCC(ret); ObXmlParser parser(mem_ctx); @@ -688,7 +1131,7 @@ int ObXMLExprHelper::check_doc_validity(ObMulModeMemCtx* mem_ctx, ObXmlDocument } return ret; } -#endif // OB_BUILD_ORACLE_XML + // not all udts sql types based on lobs, so not handle xml in process_lob_locator_results int ObXMLExprHelper::process_sql_udt_results(ObObj& value, sql::ObResultSet &result) { @@ -723,7 +1166,6 @@ int ObXMLExprHelper::process_sql_udt_results(common::ObObj& value, ret = OB_NOT_SUPPORTED; OB_LOG(WARN, "not supported udt type", K(ret), K(value.get_type()), K(value.get_udt_subschema_id())); -#ifdef OB_BUILD_ORACLE_XML } else if (value.is_xml_sql_type()) { bool is_client_support_binary_xml = false; // client receive xml as json, like json if (value.is_null() || value.is_nop_value()) { @@ -747,6 +1189,8 @@ int ObXMLExprHelper::process_sql_udt_results(common::ObObj& value, param_list.indent = 2; ObIMulModeBase *node = nullptr; ObMulModeMemCtx* ctx = nullptr; + ObNsSortedVector* ns_vec_point = nullptr; + ObNsSortedVector ns_vec; if (OB_FAIL(ObXmlUtil::create_mulmode_tree_context(allocator, ctx))) { LOG_WARN("fail to create tree memory context", K(ret)); @@ -761,6 +1205,8 @@ int ObXMLExprHelper::process_sql_udt_results(common::ObObj& value, ObNodeMemType::BINARY_TYPE, node))) { LOG_WARN("fail to get xml base", K(ret), K(data.length())); + } else if (OB_FAIL(ObXmlUtil::init_print_ns(allocator, node, ns_vec, ns_vec_point))) { + LOG_WARN("fail to init ns vector by extend area", K(ret)); } ObCollationType cs_type = session_info->get_local_collation_connection(); @@ -771,11 +1217,11 @@ int ObXMLExprHelper::process_sql_udt_results(common::ObObj& value, LOG_WARN("fail to construct buffer", K(ret)); } else if ((ObMulModeNodeType::M_DOCUMENT == node_type || ObMulModeNodeType::M_UNPARESED_DOC == node_type) // /Although it is judged here that node_type=M_UNPARESED_DOC, it can actually go here, and the label has been replaced with DOCUMENT - && OB_FAIL(node->print_document(*jbuf, cs_type, ObXmlFormatType::WITH_FORMAT))) { + && OB_FAIL(node->print_document(*jbuf, cs_type, ObXmlFormatType::WITH_FORMAT, 2, ns_vec_point))) { // default size value of print_document is 2 LOG_WARN("print document failed", K(ret)); } else if ((ObMulModeNodeType::M_CONTENT == node_type || ObMulModeNodeType::M_UNPARSED == node_type) && - OB_FAIL(node->print_content(*jbuf, false, false, ObXmlFormatType::WITH_FORMAT, param_list))) { + OB_FAIL(node->print_content(*jbuf, false, false, ObXmlFormatType::WITH_FORMAT, param_list, ns_vec_point))) { LOG_WARN("print content failed", K(ret)); } else { data.assign_ptr(jbuf->ptr(), jbuf->length()); @@ -796,7 +1242,6 @@ int ObXMLExprHelper::process_sql_udt_results(common::ObObj& value, } } } -#endif // OB_BUILD_ORACLE_XML } else if (value.is_geometry()) { if (is_ps_protocol) { #ifdef OB_BUILD_ORACLE_PL @@ -866,5 +1311,197 @@ int ObXMLExprHelper::process_sql_udt_results(common::ObObj& value, return ret; } +int ObXMLExprHelper::extract_xml_text_node(ObMulModeMemCtx* mem_ctx, ObIMulModeBase *xml_doc, ObString &res) +{ + int ret = OB_SUCCESS; + ObStringBuffer buff(mem_ctx->allocator_); + ObPathExprIter xpath_iter(mem_ctx->allocator_); + ObString xpath_str = ObString::make_string("//node()"); + ObString default_ns; // unused + ObIMulModeBase *xml_node = NULL; + bool is_xml_document = false; + bool is_head_comment = true; + if (OB_ISNULL(xml_doc)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("xml doc node is NULL", K(ret)); + } else if (FALSE_IT(is_xml_document = xml_doc->type() == M_DOCUMENT)) { + } else if (OB_FAIL(xpath_iter.init(mem_ctx, xpath_str, default_ns, xml_doc, NULL, false))) { + LOG_WARN("fail to init xpath iterator", K(xpath_str), K(default_ns), K(ret)); + } else if (OB_FAIL(xpath_iter.open())) { + LOG_WARN("fail to open xpath iterator", K(ret)); + } + + while (OB_SUCC(ret)) { + ObString content; + if (OB_FAIL(xpath_iter.get_next_node(xml_node))) { + if (ret != OB_ITER_END) { + LOG_WARN("fail to get next xml node", K(ret)); + } + } else if (OB_ISNULL(xml_node)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("xpath result node is null", K(ret)); + } else { + ObMulModeNodeType node_type = xml_node->type(); + if (node_type != M_TEXT && + node_type != M_CDATA && + node_type != M_COMMENT && + node_type != M_INSTRUCT) { + is_head_comment = false; + } else if ((node_type == M_COMMENT || node_type == M_INSTRUCT) && + (is_xml_document || (!is_xml_document && !is_head_comment))) { + /* filter the comment node */ + } else if (OB_FAIL(xml_node->get_value(content))) { + LOG_WARN("fail to get text node content", K(ret)); + } else if (OB_FAIL(buff.append(content))) { + LOG_WARN("fail to append text node content", K(ret), K(content)); + } + } + } + + if (ret == OB_ITER_END) { + res.assign_ptr(buff.ptr(), buff.length()); + ret = OB_SUCCESS; + } + + int tmp_ret = OB_SUCCESS; + if (OB_SUCCESS != (tmp_ret = xpath_iter.close())) { + LOG_WARN("fail to close xpath iter", K(tmp_ret)); + ret = COVER_SUCC(tmp_ret); + } + return ret; +} + +// update the new node default ns to empty when the parent node has default ns +int ObXMLExprHelper::update_new_nodes_ns(ObIAllocator &allocator, ObXmlNode *parent, ObXmlNode *update_node) +{ + int ret = OB_SUCCESS; + ObXmlAttribute *empty_ns = NULL; + ObXmlAttribute *default_ns = NULL; + ObXmlAttribute *update_node_default_ns = NULL; + if (OB_ISNULL(parent) || OB_ISNULL(update_node)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("node is NULL", K(ret), K(parent), K(update_node)); + } else if (OB_FAIL(get_valid_default_ns_from_parent(parent, default_ns))) { + LOG_WARN("unexpected error in find default ns from parent", K(ret)); + } else if (OB_NOT_NULL(default_ns) && !default_ns->get_value().empty()) { + // need to update the new node default ns with empty default ns + if (OB_FAIL(get_valid_default_ns_from_parent(update_node, update_node_default_ns))) { + LOG_WARN("unexpected error in find default ns from parent", K(ret)); + } else if (OB_ISNULL(update_node_default_ns) || update_node_default_ns->get_value().empty()) { + if (OB_ISNULL(empty_ns = OB_NEWx(ObXmlAttribute, (&allocator), ObMulModeNodeType::M_NAMESPACE, parent->get_mem_ctx()))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("alloc failed", K(ret)); + } else { + empty_ns->set_xml_key(ObXmlConstants::XMLNS_STRING); + empty_ns->set_value(ObString::make_empty_string()); + } + if (OB_FAIL(ret)) { + } else if (OB_FAIL(set_ns_recrusively(update_node, empty_ns))) { + LOG_WARN("fail to set empty default ns recrusively", K(ret)); + } + } + } + return ret; +} + +// found valid default ns from down to top +int ObXMLExprHelper::get_valid_default_ns_from_parent(ObXmlNode *cur_node, ObXmlAttribute* &default_ns) +{ + int ret = OB_SUCCESS; + ObXmlNode* t_node = NULL; + bool is_found = false; + if (OB_ISNULL(cur_node)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("update node is NULL", K(ret)); + } else if (!ObXMLExprHelper::is_xml_element_node(cur_node->type())) { + t_node = cur_node->get_parent(); + } else { + t_node = cur_node; + } + + while(!is_found && OB_SUCC(ret) && OB_NOT_NULL(t_node)) { + ObXmlElement *t_element = static_cast(t_node); + ObArray attr_list; + if (OB_FAIL(t_element->get_namespace_list(attr_list))) { + LOG_WARN("fail to get namespace list", K(ret)); + } + for (int i = 0; !is_found && OB_SUCC(ret) && i < attr_list.size(); i ++) { + ObXmlAttribute *attr = static_cast(attr_list.at(i)); + if (attr->get_key().compare(ObXmlConstants::XMLNS_STRING) == 0) { + is_found = true; + default_ns = attr; + } + } + t_node = t_node->get_parent(); + } + return ret; +} + +int ObXMLExprHelper::set_ns_recrusively(ObXmlNode *update_node, ObXmlAttribute *ns) +{ + int ret = OB_SUCCESS; + if (OB_ISNULL(update_node) || OB_ISNULL(ns)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("update node is NULL", K(ret), K(update_node), K(ns)); + } else if (!ObXMLExprHelper::is_xml_element_node(update_node->type())) { + // no need to set default ns + } else { + bool is_stop = false; + ObXmlElement *ele_node = static_cast(update_node); + ObString key = ns->get_key(); + if (ele_node->type() != M_ELEMENT) { + // skip + } else if (key.compare(ObXmlConstants::XMLNS_STRING) == 0) { + // update default ns + if (ele_node->get_prefix().empty()) { + // this condition mean: has no ns || has non-empty default ns + is_stop = true; + ObXmlAttribute *default_ns = NULL; + if (OB_FAIL(get_valid_default_ns_from_parent(update_node, default_ns))) { + LOG_WARN("get default ns failed.", K(ret)); + } else if (OB_ISNULL(default_ns) || default_ns->get_value().empty()) { + ele_node->add_attribute(ns, false, 0); + ele_node->set_ns(ns); + } else { /* has non-empty default ns, skip and stop find */ } + } + } else { // has prefix + ObXmlAttribute *tmp_ns = NULL; + if (ele_node->get_ns() == ns || + ele_node->has_attribute_with_ns(ns) || + OB_NOT_NULL(tmp_ns = ele_node->get_ns_by_name(key))) { + // match condition below will stop recrusive + // element use this prefix ns || attributes of element use this prefix ns || this prefix in attributes + is_stop = true; + if (OB_NOT_NULL(tmp_ns)) { // if the prefix not in attributes + } else if (OB_FAIL(ele_node->add_attribute(ns, false, 0))) { + LOG_WARN("fail to add namespace node", K(ret), K(key)); + } + } + } + + if (!is_stop) { + // find its child node recrusivle when no need to set default ns + for (int64_t i = 0; OB_SUCC(ret) && i < ele_node->size(); i++) { + if (OB_FAIL(SMART_CALL(set_ns_recrusively(ele_node->at(i), ns)))) { + LOG_WARN("fail set default ns in origin tree recursively", K(ret)); + } + } // end for + } // end is_stop + } + return ret; +} + +void ObXMLExprHelper::get_accuracy_from_expr(const ObExpr &expr, ObAccuracy &accuracy) +{ + accuracy.set_length(expr.max_length_); + accuracy.set_scale(expr.datum_meta_.scale_); + const ObObjTypeClass &dst_tc = ob_obj_type_class(expr.datum_meta_.type_); + if (ObStringTC == dst_tc || ObTextTC == dst_tc) { + accuracy.set_length_semantics(expr.datum_meta_.length_semantics_); + } else { + accuracy.set_precision(expr.datum_meta_.precision_); + } +} + } // sql } // oceanbase \ No newline at end of file diff --git a/src/sql/engine/expr/ob_expr_xml_func_helper.h b/src/sql/engine/expr/ob_expr_xml_func_helper.h index 2ee9767d34..8e58e565dc 100644 --- a/src/sql/engine/expr/ob_expr_xml_func_helper.h +++ b/src/sql/engine/expr/ob_expr_xml_func_helper.h @@ -17,12 +17,10 @@ #include "sql/engine/expr/ob_expr_util.h" #include "sql/engine/expr/ob_expr_lob_utils.h" #include "sql/engine/expr/ob_expr_result_type_util.h" -#ifdef OB_BUILD_ORACLE_XML #include "lib/xml/ob_xml_parser.h" #include "lib/xml/ob_xpath.h" #include "lib/xml/ob_xml_tree.h" #include "lib/xml/ob_xml_util.h" -#endif // OB_BUILD_ORACLE_XML using namespace oceanbase::common; @@ -31,15 +29,26 @@ namespace oceanbase namespace sql { +enum ObGetXmlBaseType { + OB_IS_REPARSE, + OB_SHOULD_CHECK, + OB_MAX +}; + class ObXMLExprHelper final { public: -#ifdef OB_BUILD_ORACLE_XML static int add_binary_to_element(ObMulModeMemCtx* mem_ctx, ObString bianry_value, ObXmlElement &element); + static int get_xml_base(ObMulModeMemCtx *mem_ctx, + ObDatum *xml_datum, + ObEvalCtx &ctx, + ObIMulModeBase *&xml_doc, + ObGetXmlBaseType base_flag); static int get_xml_base(ObMulModeMemCtx* mem_ctx, ObDatum *xml_datum, ObCollationType cs_type, ObNodeMemType expect_type, ObIMulModeBase *&node); static int get_xml_base(ObMulModeMemCtx* mem_ctx, ObDatum *xml_datum, ObCollationType cs_type, - ObNodeMemType expect_type, ObIMulModeBase *&node, ObMulModeNodeType &node_type, bool is_reparse = false); + ObNodeMemType expect_type, ObIMulModeBase *&node, ObMulModeNodeType &node_type, + ObGetXmlBaseType base_flag = ObGetXmlBaseType::OB_MAX); static int try_to_parse_unparse_binary(ObMulModeMemCtx* mem_ctx, ObCollationType cs_type, ObIMulModeBase *input_node, ObNodeMemType expect_type, ObIMulModeBase *&res_node); @@ -53,6 +62,13 @@ public: ObString &default_ns, ObPathVarObject &prefix_ns, ObIAllocator &allocator); + static int construct_namespace_params(ObIArray &namespace_arr, + ObString &default_ns, + void *&prefix_ns, + ObIAllocator &allocator); + static int get_xpath_result(ObPathExprIter &xpath_iter, ObIMulModeBase *&xml_res, ObMulModeMemCtx *mem_ctx, bool add_ns = false); + static int check_xpath_valid(ObPathExprIter &xpath_iter, bool is_root); + static int parse_namespace_str(ObString &ns_str, ObString &prefix, ObString &uri); // set string result static int set_string_result(const ObExpr &expr, ObEvalCtx &ctx, ObDatum &res, ObString &res_str); @@ -65,6 +81,16 @@ public: ObString &res, ObIAllocator &allocator); static int parse_xml_str(ObMulModeMemCtx *ctx, const ObString &xml_text, ObXmlDocument *&xml_doc); + static int get_xml_base_from_expr(const ObExpr *expr, + ObMulModeMemCtx *mem_ctx, + ObEvalCtx &ctx, + ObIMulModeBase *&node); + static int binary_agg_xpath_result(ObPathExprIter &xpath_iter, + ObMulModeNodeType &node_type, + ObMulModeMemCtx* mem_ctx, + ObStringBuffer &res, + int64_t &append_node_num, + bool add_ns); // classify xml node type static bool is_xml_leaf_node(ObMulModeNodeType node_type); @@ -79,7 +105,6 @@ public: static int content_unparsed_binary_check_doc(ObMulModeMemCtx* mem_ctx, ObString binary_str, ObString &res_str); static int check_element_validity(ObMulModeMemCtx* mem_ctx, ObXmlElement *in_ele, ObXmlElement *&out_ele, bool &validity); static int check_doc_validity(ObMulModeMemCtx* mem_ctx, ObXmlDocument *&doc, bool &validity); -#endif // OB_BUILD_ORACLE_XML static int process_sql_udt_results(common::ObObj& value, sql::ObResultSet &result); static int process_sql_udt_results(common::ObObj& value, common::ObIAllocator *allocator, @@ -89,13 +114,21 @@ public: const ColumnsFieldIArray *fields = NULL, ObSchemaGetterGuard *schema_guard = NULL); static uint64_t get_tenant_id(ObSQLSessionInfo *session); -#ifdef OB_BUILD_ORACLE_XML + static int append_header_in_front(ObIAllocator &allocator, ObXmlDocument *&root, ObIMulModeBase *node); + static int cast_to_res(ObIAllocator &allocator, ObString &xml_content, const ObExpr &expr, ObEvalCtx &ctx, ObDatum &res); + static int cast_to_res(ObIAllocator &allocator, ObObj &src_obj, const ObExpr &expr, ObEvalCtx &ctx, ObDatum &res, bool xt_need_acc_check = false); + + static int extract_xml_text_node(ObMulModeMemCtx* mem_ctx, ObIMulModeBase *xml_doc, ObString &res); + static void get_accuracy_from_expr(const ObExpr &expr, ObAccuracy &accuracy); + static int update_new_nodes_ns(ObIAllocator &allocator, ObXmlNode *parent, ObXmlNode *update_node); + static int get_valid_default_ns_from_parent(ObXmlNode *cur_node, ObXmlAttribute* &default_ns); + static int set_ns_recrusively(ObXmlNode *update_node, ObXmlAttribute *ns); + private: static int add_ns_to_container_node(ObPathVarObject &container, ObString &prefix, ObString &uri, ObIAllocator &allocator); -#endif // OB_BUILD_ORACLE_XML }; } // sql } // oceanbase diff --git a/src/sql/engine/expr/ob_expr_xml_sequence.cpp b/src/sql/engine/expr/ob_expr_xml_sequence.cpp new file mode 100644 index 0000000000..67d6feffbc --- /dev/null +++ b/src/sql/engine/expr/ob_expr_xml_sequence.cpp @@ -0,0 +1,227 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + * This file is for func xmlsequence. + */ + +#include "ob_expr_xml_sequence.h" +#include "ob_expr_lob_utils.h" +#include "sql/engine/expr/ob_expr_xml_func_helper.h" +#include "pl/ob_pl_resolver.h" +#include "pl/ob_pl_package.h" +#include "lib/utility/utility.h" +#define USING_LOG_PREFIX SQL_ENG + +using namespace oceanbase::common; + +namespace oceanbase +{ +namespace sql +{ + +OB_SERIALIZE_MEMBER(ObExprXmlSequence::ObSequenceExtraInfo, + type_, not_null_, elem_type_, capacity_, udt_id_); + +int ObExprXmlSequence::ObSequenceExtraInfo::deep_copy(common::ObIAllocator &allocator, + const ObExprOperatorType type, + ObIExprExtraInfo *&copied_info) const +{ + int ret = OB_SUCCESS; + OZ(ObExprExtraInfoFactory::alloc(allocator, type, copied_info)); + ObSequenceExtraInfo &other = *static_cast(copied_info); + if (OB_SUCC(ret)) { + other = *this; + } + return ret; +} + +ObExprXmlSequence::ObExprXmlSequence(common::ObIAllocator &alloc) + : ObFuncExprOperator(alloc, T_FUN_SYS_XMLSEQUENCE, N_XMLSEQUENCE, 1, VALID_FOR_GENERATED_COL, NOT_ROW_DIMENSION) +{ +} + +ObExprXmlSequence::~ObExprXmlSequence() {} + +int ObExprXmlSequence::calc_result_type1(ObExprResType &type, + ObExprResType &text, + common::ObExprTypeCtx &type_ctx) const +{ + INIT_SUCC(ret); + if (!ob_is_xml_pl_type(text.get_type(), text.get_udt_id()) && !ob_is_xml_sql_type(text.get_type(), text.get_subschema_id())) { + ret = OB_ERR_PARAM_INVALID; + LOG_WARN("get type invaid.", K(ret), K(text)); + } else { + type.get_calc_meta().set_sql_udt(ObXMLSqlType); + type.set_type(ObExtendType); + type.set_udt_id(XmlSequenceUdtID); + OX (type.set_extend_type(pl::PL_VARRAY_TYPE)); + } + return ret; +} + +#ifdef OB_BUILD_ORACLE_PL +int ObExprXmlSequence::eval_xml_sequence(const ObExpr &expr, ObEvalCtx &ctx, ObDatum &res) +{ + INIT_SUCC(ret); + ObIAllocator &allocator = ctx.exec_ctx_.get_allocator(); + pl::ObPLCollection *coll = NULL; + ObSQLSessionInfo *session = ctx.exec_ctx_.get_my_session(); + ObExecContext &exec_ctx = ctx.exec_ctx_; + ObDatum *xml_datum = NULL; + ObMulModeNodeType node_type = M_MAX_TYPE; + ObIMulModeBase *xml_tree = NULL; + ObMulModeMemCtx* mem_ctx = nullptr; + ObSequenceExtraInfo *info = nullptr; + if (OB_ISNULL(info = OB_NEWx(ObSequenceExtraInfo, (&allocator), allocator, T_FUN_SYS_XMLSEQUENCE))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("allocate memory failed", K(ret)); + } else if (OB_FAIL(ObXmlUtil::create_mulmode_tree_context(&allocator, mem_ctx))) { + LOG_WARN("fail to create tree memory context", K(ret)); + } else if (expr.arg_cnt_ < 1) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("invalid arg_cnt_", K(ret), K(expr.arg_cnt_)); + } else if (OB_FAIL(ObXMLExprHelper::get_xmltype_from_expr(expr.args_[0], ctx, xml_datum))) { + LOG_WARN("fail to get xmltype value", K(ret)); + } else if (OB_FAIL(ObXMLExprHelper::get_xml_base(mem_ctx, xml_datum, ObCollationType::CS_TYPE_INVALID, ObNodeMemType::TREE_TYPE, xml_tree, node_type, ObGetXmlBaseType::OB_IS_REPARSE))) { + LOG_WARN("fail to parse xml doc", K(ret)); + } else { + info->type_ = info->type_ = pl::PL_VARRAY_TYPE; + info->not_null_ = true; + info->capacity_ = xml_tree->count(); + info->udt_id_ = XmlSequenceUdtID; + info->elem_type_.set_obj_type(ObExtendType); + } + + if (OB_FAIL(ret)) { + } else if (OB_ISNULL(coll = static_cast(allocator.alloc(sizeof(pl::ObPLVArray) + 8)))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("failed to allocate memory for pl collection", K(ret), K(coll)); + } else { + coll = new(coll)pl::ObPLVArray(info->udt_id_); + static_cast(coll)->set_capacity(info->capacity_); + } + + OZ(expr.eval_param_value(ctx)); + + if (OB_SUCC(ret)) { + const pl::ObUserDefinedType *type = NULL; + const pl::ObCollectionType *collection_type = NULL; + pl::ObElemDesc elem_desc; + pl::ObPLPackageGuard package_guard(session->get_effective_tenant_id()); + pl::ObPLResolveCtx resolve_ctx(allocator, + *session, + *(exec_ctx.get_sql_ctx()->schema_guard_), + package_guard, + *(exec_ctx.get_sql_proxy()), + false); + OZ (package_guard.init()); + pl::ObPLINS *ns = NULL; + if (NULL == session->get_pl_context()) { + ns = &resolve_ctx; + } else { + ns = session->get_pl_context()->get_current_ctx(); + } + + CK (OB_NOT_NULL(ns)); + OZ (ns->get_user_type(info->udt_id_, type)); + OV (OB_NOT_NULL(type), OB_ERR_UNEXPECTED, K(info->udt_id_)); + CK (type->is_collection_type()); + CK (OB_NOT_NULL(collection_type = static_cast(type))); + OX (elem_desc.set_obj_type(common::ObExtendType)); + OX (elem_desc.set_pl_type(collection_type->get_element_type().get_type())); + OX (elem_desc.set_field_count(1)); + OX (elem_desc.set_udt_id(collection_type->get_element_type().get_user_type_id())); + + OX (coll->set_element_desc(elem_desc)); + OX (coll->set_not_null(info->not_null_)); + + OZ (ObSPIService::spi_set_collection(session->get_effective_tenant_id(), + ns, + allocator, + *coll, + xml_tree->size())); + + if (OB_SUCC(ret)) { + int64_t array_count = 0; + for (int64_t i = 0; OB_SUCC(ret) && i < xml_tree->count(); i++) { + ObIMulModeBase *ele = xml_tree->at(i); + ObObj ele_obj; + ObXmlDocument *doc = nullptr; + ObXmlElement *xml_node = nullptr; + ObString blob_locator; + ObString binary_str; + if (OB_ISNULL(coll->get_data())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get coll data null", K(ret)); + } else if (M_ELEMENT != ele->type()) { + // do nothing + } else if (OB_ISNULL(xml_node = static_cast(ele))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get ele null.", K(ret)); + } else if (OB_ISNULL(doc = OB_NEWx(ObXmlDocument, mem_ctx->allocator_, ObMulModeNodeType::M_DOCUMENT, mem_ctx))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("fail to create document", K(ret)); + } else if (OB_FAIL(doc->add_element(xml_node))) { + } else if (OB_FAIL(doc->get_raw_binary(binary_str, mem_ctx->allocator_))) { + LOG_WARN("get raw binary failed", K(ret)); + } else { + ObTextStringResult text_result(ObLongTextType, true, &allocator); + if (OB_FAIL(text_result.init(binary_str.length()))) { + LOG_WARN("failed to init text result.", K(ret)); + } else if (OB_FAIL(text_result.append(binary_str))) { + LOG_WARN("failed to append binary str.", K(ret), K(binary_str)); + } else { + text_result.get_result_buffer(blob_locator); + ele_obj.meta_ = expr.args_[0]->obj_meta_; + ele_obj.meta_.set_subschema_id(ObXMLSqlType); + ele_obj.set_string(ObUserDefinedSQLType, blob_locator); + OZ(deep_copy_obj(*coll->get_allocator(), ele_obj, static_cast(coll->get_data())[array_count])); + array_count++; + } + } + } + } + + ObObj result; + result.set_extend(reinterpret_cast(coll), coll->get_type()); + OZ(res.from_obj(result, expr.obj_datum_map_)); + + if (OB_FAIL(ret)) { + } else if (OB_NOT_NULL(coll->get_allocator())) { + int tmp_ret = OB_SUCCESS; + if (OB_ISNULL(exec_ctx.get_pl_ctx())) { + tmp_ret = exec_ctx.init_pl_ctx(); + } + if (OB_SUCCESS == tmp_ret && OB_NOT_NULL(exec_ctx.get_pl_ctx())) { + tmp_ret = exec_ctx.get_pl_ctx()->add(result); + } + if (OB_SUCCESS != tmp_ret) { + LOG_ERROR("fail to collect pl collection allocator, may be exist memory issue", K(tmp_ret)); + } + ret = OB_SUCCESS == ret ? tmp_ret : ret; + } + + } + + return ret; +} +#endif + +int ObExprXmlSequence::cg_expr(ObExprCGCtx &expr_cg_ctx, const ObRawExpr &raw_expr, ObExpr &rt_expr) const +{ + UNUSED(expr_cg_ctx); + UNUSED(raw_expr); + rt_expr.eval_func_ = eval_xml_sequence; + return OB_SUCCESS; +} + +}; + +}; \ No newline at end of file diff --git a/src/sql/engine/expr/ob_expr_xml_sequence.h b/src/sql/engine/expr/ob_expr_xml_sequence.h new file mode 100644 index 0000000000..f1a8e2f54e --- /dev/null +++ b/src/sql/engine/expr/ob_expr_xml_sequence.h @@ -0,0 +1,81 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + * This file is for func xmlsequence. + */ + +#ifndef OCEANBASE_SQL_ENGINE_EXPR_OB_EXPR_XML_SEQUENCE_H +#define OCEANBASE_SQL_ENGINE_EXPR_OB_EXPR_XML_SEQUENCE_H + +#include "sql/engine/expr/ob_expr_operator.h" +#include "pl/ob_pl_type.h" +#include "pl/ob_pl_user_type.h" + +namespace oceanbase { + +namespace sql +{ + +enum XmlSequenceUdtType +{ + XmlSequenceUdtID = 300026 +}; + +class ObExprXmlSequence : public ObFuncExprOperator +{ +public: + explicit ObExprXmlSequence(common::ObIAllocator &alloc); + virtual ~ObExprXmlSequence(); + + virtual int calc_result_type1(ObExprResType &type, + ObExprResType &text, + common::ObExprTypeCtx &type_ctx) const override; + +#ifdef OB_BUILD_ORACLE_PL + static int eval_xml_sequence(const ObExpr &expr, ObEvalCtx &ctx, ObDatum &res); +#else + static int eval_xml_sequence(const ObExpr &expr, ObEvalCtx &ctx, ObDatum &res) { return OB_NOT_SUPPORTED; } +#endif + virtual int cg_expr(ObExprCGCtx &expr_cg_ctx, + const ObRawExpr &raw_expr, + ObExpr &rt_expr) + const override; + struct ObSequenceExtraInfo : public ObIExprExtraInfo + { + OB_UNIS_VERSION(1); + public: + ObSequenceExtraInfo(common::ObIAllocator &alloc, ObExprOperatorType type) + : ObIExprExtraInfo(alloc, type), + type_(pl::ObPLType::PL_INVALID_TYPE), + not_null_(false), + elem_type_(), + capacity_(common::OB_INVALID_SIZE), + udt_id_(common::OB_INVALID_ID) + { + } + virtual int deep_copy(common::ObIAllocator &allocator, + const ObExprOperatorType type, + ObIExprExtraInfo *&copied_info) const override; + pl::ObPLType type_; + bool not_null_; + common::ObDataType elem_type_; + int64_t capacity_; + uint64_t udt_id_; + }; + +private: + DISALLOW_COPY_AND_ASSIGN(ObExprXmlSequence); +}; + + +} +} + +#endif diff --git a/src/sql/engine/expr/ob_expr_xml_serialize.cpp b/src/sql/engine/expr/ob_expr_xml_serialize.cpp index d045700dbd..315e84a0ad 100644 --- a/src/sql/engine/expr/ob_expr_xml_serialize.cpp +++ b/src/sql/engine/expr/ob_expr_xml_serialize.cpp @@ -13,11 +13,9 @@ #define USING_LOG_PREFIX SQL_ENG #include "ob_expr_xml_serialize.h" -#ifdef OB_BUILD_ORACLE_XML #include "sql/engine/expr/ob_expr_xml_func_helper.h" #include "lib/xml/ob_xml_parser.h" #include "lib/xml/ob_xml_util.h" -#endif #include "sql/session/ob_sql_session_info.h" #include "sql/engine/ob_exec_context.h" @@ -116,7 +114,6 @@ int ObExprXmlSerialize::get_dest_type(const ObExprResType as_type, ObExprResType } -#ifdef OB_BUILD_ORACLE_XML int ObExprXmlSerialize::eval_xml_serialize(const ObExpr &expr, ObEvalCtx &ctx, ObDatum &res) { int ret = OB_SUCCESS; @@ -280,8 +277,12 @@ int ObExprXmlSerialize::print_format_xml_text(ObIAllocator &allocator, print_params.encode = ObString(ObXmlUtil::get_charset_name(ctx.exec_ctx_.get_my_session()->get_local_collation_connection())); } } + ObNsSortedVector* ns_vec_point = nullptr; + ObNsSortedVector ns_vec; - if (OB_FAIL(xml_node->print_content(buff, with_encoding, with_version, format_flag, print_params))) { + if (OB_FAIL(ObXmlUtil::init_print_ns(&allocator, xml_node, ns_vec, ns_vec_point))) { + LOG_WARN("fail to init ns vector by extend area", K(ret)); + } else if (OB_FAIL(xml_node->print_content(buff, with_encoding, with_version, format_flag, print_params, ns_vec_point))) { LOG_WARN("fail to print xml content", K(ret), K(with_encoding), K(with_version), K(format_flag)); } if (OB_SUCC(ret)) { @@ -408,7 +409,11 @@ int ObExprXmlSerialize::get_xml_base_by_doc_type(ObMulModeMemCtx* mem_ctx, ObString check_xml; ObStringBuffer buff(mem_ctx->allocator_); ObXmlDocument *xml_doc = nullptr; - if (OB_FAIL(node->print_document(buff, CS_TYPE_INVALID, ObXmlFormatType::NO_FORMAT))) { + ObNsSortedVector* ns_vec_point = nullptr; + ObNsSortedVector ns_vec; + if (OB_FAIL(ObXmlUtil::init_print_ns(mem_ctx->allocator_, node, ns_vec, ns_vec_point))) { + LOG_WARN("fail to init ns vector by extend area", K(ret)); + } else if (OB_FAIL(node->print_document(buff, CS_TYPE_INVALID, ObXmlFormatType::NO_FORMAT, 2, ns_vec_point))) { LOG_WARN("fail to serialize unparse string", K(ret)); } else { check_xml.assign_ptr(buff.ptr(), buff.length()); @@ -439,7 +444,6 @@ int ObExprXmlSerialize::get_xml_base_by_doc_type(ObMulModeMemCtx* mem_ctx, } return ret; } -#endif int ObExprXmlSerialize::cg_expr(ObExprCGCtx &expr_cg_ctx, const ObRawExpr &raw_expr, diff --git a/src/sql/engine/expr/ob_expr_xml_serialize.h b/src/sql/engine/expr/ob_expr_xml_serialize.h index 6b9f0064e8..e5dada7c2b 100644 --- a/src/sql/engine/expr/ob_expr_xml_serialize.h +++ b/src/sql/engine/expr/ob_expr_xml_serialize.h @@ -15,10 +15,8 @@ #define OCEANBASE_SQL_OB_EXPR_XML_SERIALIZE_H_ #include "sql/engine/expr/ob_expr_operator.h" -#ifdef OB_BUILD_ORACLE_XML #include "lib/xml/ob_multi_mode_interface.h" #include "lib/xml/ob_xml_tree.h" -#endif using namespace oceanbase::common; namespace oceanbase @@ -35,16 +33,11 @@ public: int64_t param_num, common::ObExprTypeCtx& type_ctx) const override; -#ifdef OB_BUILD_ORACLE_XML static int eval_xml_serialize(const ObExpr &expr, ObEvalCtx &ctx, ObDatum &res); -#else - static int eval_xml_serialize(const ObExpr &expr, ObEvalCtx &ctx, ObDatum &res) { return OB_NOT_SUPPORTED; } -#endif virtual int cg_expr(ObExprCGCtx &expr_cg_ctx, const ObRawExpr &raw_expr, ObExpr &rt_expr) const override; int get_dest_type(const ObExprResType as_type, ObExprResType &dst_type) const; -#ifdef OB_BUILD_ORACLE_XML private: static bool is_supported_return_type(ObObjType val_type, ObCollationType cs_type); @@ -100,7 +93,6 @@ private: const static int64_t OB_XML_DEFAULTS_IMPLICIT = 0; const static int64_t OB_XML_HIDE_DEFAULTS = 1; const static int64_t OB_XML_SHOW_DEFAULTS = 2; -#endif private: DISALLOW_COPY_AND_ASSIGN(ObExprXmlSerialize); }; diff --git a/src/sql/engine/expr/ob_expr_xmlcast.cpp b/src/sql/engine/expr/ob_expr_xmlcast.cpp index a98a3af610..171345dd9b 100644 --- a/src/sql/engine/expr/ob_expr_xmlcast.cpp +++ b/src/sql/engine/expr/ob_expr_xmlcast.cpp @@ -15,13 +15,11 @@ #include "share/object/ob_obj_cast.h" #include "sql/engine/ob_exec_context.h" #include "sql/session/ob_sql_session_info.h" -#ifdef OB_BUILD_ORACLE_XML #include "lib/xml/ob_xml_parser.h" #include "lib/xml/ob_xml_tree.h" #include "lib/xml/ob_xml_util.h" #include "lib/xml/ob_xpath.h" #include "sql/engine/expr/ob_expr_xml_func_helper.h" -#endif #include "sql/session/ob_sql_session_info.h" #include "sql/engine/ob_exec_context.h" #include "sql/ob_spi.h" @@ -147,7 +145,6 @@ int ObExprXmlcast::cg_expr(ObExprCGCtx &expr_cg_ctx, const ObRawExpr &raw_expr, return OB_SUCCESS; } -#ifdef OB_BUILD_ORACLE_XML int ObExprXmlcast::eval_xmlcast(const ObExpr &expr, ObEvalCtx &ctx, ObDatum &res) { int ret = OB_SUCCESS; @@ -175,163 +172,14 @@ int ObExprXmlcast::eval_xmlcast(const ObExpr &expr, ObEvalCtx &ctx, ObDatum &res LOG_WARN("fail to get xml str", K(ret)); } else if (OB_FAIL(ObXMLExprHelper::get_xml_base(mem_ctx, xml_datum, cs_type, ObNodeMemType::BINARY_TYPE, xml_doc))) { LOG_WARN("fail to parse xml doc", K(ret)); - } else if (OB_FAIL(extract_xml_text_node(mem_ctx, xml_doc, xml_res_str))) { + } else if (OB_FAIL(ObXMLExprHelper::extract_xml_text_node(mem_ctx, xml_doc, xml_res_str))) { LOG_WARN("fail to extract xml text node", K(ret), K(xml_res_str)); - } else if (OB_FAIL(cast_to_res(allocator, xml_res_str, expr, ctx, res))) { + } else if (OB_FAIL(ObXMLExprHelper::cast_to_res(allocator, xml_res_str, expr, ctx, res))) { LOG_WARN("fail to cast to res", K(ret), K(xml_res_str)); } return ret; } -int ObExprXmlcast::cast_to_res(ObIAllocator &allocator, ObString &xml_content, const ObExpr &expr, ObEvalCtx &ctx, ObDatum &res) -{ - int ret = OB_SUCCESS; - ObCastMode def_cm = CM_NONE; - ObSQLSessionInfo *session = NULL; - ObObj src_obj,dst_obj, buf_obj; - const ObObj *res_obj = NULL; - ObAccuracy out_acc; - if (xml_content.empty()) { - res.set_null(); - } else { - src_obj.set_string(ObVarcharType, xml_content); - src_obj.set_collation_type(CS_TYPE_UTF8MB4_BIN); - ObSolidifiedVarsGetter helper(expr, ctx, ctx.exec_ctx_.get_my_session()); - ObDataTypeCastParams dtc_params; - // to type - if (OB_ISNULL(session = ctx.exec_ctx_.get_my_session())) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("sessioninfo is NULL"); - } else if (OB_FAIL(ObSQLUtils::get_default_cast_mode(session->get_stmt_type(), - session, def_cm))) { - LOG_WARN("get_default_cast_mode failed", K(ret)); - } else if (OB_FAIL(helper.get_dtc_params(dtc_params))) { - LOG_WARN("get dtc params failed", K(ret)); - } else { - ObObjType obj_type = expr.datum_meta_.type_; - ObCollationType cs_type = expr.datum_meta_.cs_type_; - ObPhysicalPlanCtx *phy_plan_ctx = ctx.exec_ctx_.get_physical_plan_ctx(); - ObCastCtx cast_ctx(&allocator, &dtc_params, get_cur_time(phy_plan_ctx), def_cm, - cs_type, NULL, NULL); - if (OB_FAIL(ObObjCaster::to_type(obj_type, cs_type, cast_ctx, src_obj, dst_obj))) { - LOG_WARN("failed to cast object to ", K(ret), K(src_obj), K(obj_type)); - } else if (FALSE_IT(get_accuracy_from_expr(expr, out_acc))) { - } else if (FALSE_IT(res_obj = &dst_obj)) { - } else if (OB_FAIL(obj_accuracy_check(cast_ctx, out_acc, cs_type, dst_obj, buf_obj, res_obj))) { - if ((ob_is_varchar_or_char(obj_type, cs_type) || ob_is_nchar(obj_type)) && ret == OB_ERR_DATA_TOO_LONG) { - ObLengthSemantics ls = lib::is_oracle_mode() ? - expr.datum_meta_.length_semantics_ : LS_CHAR; - const char* str = dst_obj.get_string_ptr(); - int32_t str_len_byte = dst_obj.get_string_len(); - int64_t char_len = 0; - int32_t trunc_len_byte = 0; - trunc_len_byte = (ls == LS_BYTE ? - ObCharset::max_bytes_charpos(cs_type, str, str_len_byte, - expr.max_length_, char_len): - ObCharset::charpos(cs_type, str, str_len_byte, expr.max_length_)); - if (trunc_len_byte == 0) { - (const_cast(res_obj))->set_null(); - } else { - (const_cast(res_obj))->set_common_value(ObString(trunc_len_byte, str)); - } - ret = OB_SUCCESS; - } else { - LOG_WARN("accuracy check failed", K(ret), K(out_acc), K(res_obj)); - } - } else if (OB_FAIL(ObSPIService::spi_pad_char_or_varchar(session, obj_type, out_acc, &allocator, const_cast(res_obj)))) { - LOG_WARN("fail to pad char", K(ret), K(*res_obj)); - } - - if (OB_SUCC(ret)) { - if (OB_NOT_NULL(res_obj)) { - res.from_obj(*res_obj); - ObExprStrResAlloc res_alloc(expr, ctx); - if (OB_FAIL(res.deep_copy(res, res_alloc))) { - LOG_WARN("fail to deep copy for res datum", K(ret), KPC(res_obj), K(res)); - } - } else { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("res obj is NULL", K(ret)); - } - } - } - } - return ret; -} - - -int ObExprXmlcast::extract_xml_text_node(ObMulModeMemCtx* mem_ctx, ObIMulModeBase *xml_doc, ObString &res) -{ - int ret = OB_SUCCESS; - ObStringBuffer buff(mem_ctx->allocator_); - ObPathExprIter xpath_iter(mem_ctx->allocator_); - ObString xpath_str = ObString::make_string("//node()"); - ObString default_ns; // unused - ObIMulModeBase *xml_node = NULL; - bool is_xml_document = false; - bool is_head_comment = true; - if (OB_ISNULL(xml_doc)) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("xml doc node is NULL", K(ret)); - } else if (FALSE_IT(is_xml_document = xml_doc->type() == M_DOCUMENT)) { - } else if (OB_FAIL(xpath_iter.init(mem_ctx, xpath_str, default_ns, xml_doc, NULL))) { - LOG_WARN("fail to init xpath iterator", K(xpath_str), K(default_ns), K(ret)); - } else if (OB_FAIL(xpath_iter.open())) { - LOG_WARN("fail to open xpath iterator", K(ret)); - } - - while (OB_SUCC(ret)) { - ObString content; - if (OB_FAIL(xpath_iter.get_next_node(xml_node))) { - if (ret != OB_ITER_END) { - LOG_WARN("fail to get next xml node", K(ret)); - } - } else if (OB_ISNULL(xml_node)) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("xpath result node is null", K(ret)); - } else { - ObMulModeNodeType node_type = xml_node->type(); - if (node_type != M_TEXT && - node_type != M_CDATA && - node_type != M_COMMENT && - node_type != M_INSTRUCT) { - is_head_comment = false; - } else if ((node_type == M_COMMENT || node_type == M_INSTRUCT) && - (is_xml_document || (!is_xml_document && !is_head_comment))) { - /* filter the comment node */ - } else if (OB_FAIL(xml_node->get_value(content))) { - LOG_WARN("fail to get text node content", K(ret)); - } else if (OB_FAIL(buff.append(content))) { - LOG_WARN("fail to append text node content", K(ret), K(content)); - } - } - } - - if (ret == OB_ITER_END) { - res.assign_ptr(buff.ptr(), buff.length()); - ret = OB_SUCCESS; - } - - int tmp_ret = OB_SUCCESS; - if (OB_SUCCESS != (tmp_ret = xpath_iter.close())) { - LOG_WARN("fail to close xpath iter", K(tmp_ret)); - ret = COVER_SUCC(tmp_ret); - } - return ret; -} - -void ObExprXmlcast::get_accuracy_from_expr(const ObExpr &expr, ObAccuracy &accuracy) -{ - accuracy.set_length(expr.max_length_); - accuracy.set_scale(expr.datum_meta_.scale_); - const ObObjTypeClass &dst_tc = ob_obj_type_class(expr.datum_meta_.type_); - if (ObStringTC == dst_tc || ObTextTC == dst_tc) { - accuracy.set_length_semantics(expr.datum_meta_.length_semantics_); - } else { - accuracy.set_precision(expr.datum_meta_.precision_); - } -} - DEF_SET_LOCAL_SESSION_VARS(ObExprXmlcast, raw_expr) { int ret = OB_SUCCESS; SET_LOCAL_SYSVAR_CAPACITY(5); @@ -342,6 +190,6 @@ DEF_SET_LOCAL_SESSION_VARS(ObExprXmlcast, raw_expr) { EXPR_ADD_LOCAL_SYSVAR(SYS_VAR_COLLATION_CONNECTION); return ret; } -#endif + } // sql } // oceanbase \ No newline at end of file diff --git a/src/sql/engine/expr/ob_expr_xmlcast.h b/src/sql/engine/expr/ob_expr_xmlcast.h index d66f3867e9..bd3b8356ad 100644 --- a/src/sql/engine/expr/ob_expr_xmlcast.h +++ b/src/sql/engine/expr/ob_expr_xmlcast.h @@ -15,9 +15,7 @@ #define OCEANBASE_SQL_ENGINE_EXPR_OB_EXPR_XMLCAST_H #include "sql/engine/expr/ob_expr_operator.h" -#ifdef OB_BUILD_ORACLE_XML #include "lib/xml/ob_multi_mode_interface.h" -#endif namespace oceanbase { @@ -34,26 +32,18 @@ class ObExprXmlcast : public ObFuncExprOperator ObExprResType &type2, common::ObExprTypeCtx &type_ctx) const; -#ifdef OB_BUILD_ORACLE_XML static int eval_xmlcast(const ObExpr &expr, ObEvalCtx &ctx, ObDatum &res); -#else - static int eval_xmlcast(const ObExpr &expr, ObEvalCtx &ctx, ObDatum &res) { return OB_NOT_SUPPORTED; } -#endif virtual int cg_expr(ObExprCGCtx &expr_cg_ctx, const ObRawExpr &raw_expr, ObExpr &rt_expr) const override; -#ifdef OB_BUILD_ORACLE_XML DECLARE_SET_LOCAL_SESSION_VARS; -#endif private: int set_dest_type(ObExprResType ¶m_type, ObExprResType &dst_type, ObExprTypeCtx &type_ctx) const; -#ifdef OB_BUILD_ORACLE_XML static int extract_xml_text_node(ObMulModeMemCtx* mem_ctx, ObIMulModeBase *xml_doc, ObString &res); static int cast_to_res(ObIAllocator &allocator, ObString &xml_content, const ObExpr &expr, ObEvalCtx &ctx, ObDatum &res); static void get_accuracy_from_expr(const ObExpr &expr, ObAccuracy &acc); -#endif private: DISALLOW_COPY_AND_ASSIGN(ObExprXmlcast); }; diff --git a/src/sql/engine/expr/ob_expr_xmlparse.cpp b/src/sql/engine/expr/ob_expr_xmlparse.cpp index 6910997051..0b6588c3a1 100644 --- a/src/sql/engine/expr/ob_expr_xmlparse.cpp +++ b/src/sql/engine/expr/ob_expr_xmlparse.cpp @@ -13,11 +13,9 @@ #include "ob_expr_xmlparse.h" #include "sql/engine/ob_exec_context.h" -#ifdef OB_BUILD_ORACLE_XML #include "sql/engine/expr/ob_expr_xml_func_helper.h" #include "lib/xml/ob_xml_tree.h" #include "lib/xml/ob_xml_util.h" -#endif #define USING_LOG_PREFIX SQL_ENG @@ -86,7 +84,6 @@ int ObExprXmlparse::calc_result_typeN(ObExprResType &type, return ret; } -#ifdef OB_BUILD_ORACLE_XML int ObExprXmlparse::eval_xmlparse(const ObExpr &expr, ObEvalCtx &ctx, ObDatum &res) { INIT_SUCC(ret); @@ -186,7 +183,6 @@ int ObExprXmlparse::eval_xmlparse(const ObExpr &expr, ObEvalCtx &ctx, ObDatum &r return ret; } -#endif int ObExprXmlparse::get_clause_opt(const ObExpr &expr, ObEvalCtx &ctx, diff --git a/src/sql/engine/expr/ob_expr_xmlparse.h b/src/sql/engine/expr/ob_expr_xmlparse.h index 9d74164036..31129bf1aa 100644 --- a/src/sql/engine/expr/ob_expr_xmlparse.h +++ b/src/sql/engine/expr/ob_expr_xmlparse.h @@ -33,11 +33,7 @@ class ObExprXmlparse : public ObFuncExprOperator int64_t param_num, common::ObExprTypeCtx& type_ctx) const override; -#ifdef OB_BUILD_ORACLE_XML static int eval_xmlparse(const ObExpr &expr, ObEvalCtx &ctx, ObDatum &res); -#else - static int eval_xmlparse(const ObExpr &expr, ObEvalCtx &ctx, ObDatum &res) { return OB_NOT_SUPPORTED; } -#endif virtual int cg_expr(ObExprCGCtx &expr_cg_ctx, const ObRawExpr &raw_expr, ObExpr &rt_expr) diff --git a/src/sql/engine/expr/ob_infix_expression.cpp b/src/sql/engine/expr/ob_infix_expression.cpp index 1ac686c874..87622d8174 100644 --- a/src/sql/engine/expr/ob_infix_expression.cpp +++ b/src/sql/engine/expr/ob_infix_expression.cpp @@ -19,6 +19,7 @@ #include "sql/engine/expr/ob_expr_operator_factory.h" #include "sql/engine/expr/ob_expr_regexp.h" #include "share/ob_unique_index_row_transformer.h" +#include "share/ob_json_access_utils.h" #include "sql/engine/expr/ob_sql_expression.h" #include "lib/json_type/ob_json_tree.h" #include "lib/json_type/ob_json_base.h" @@ -362,7 +363,7 @@ int ObInfixExpression::calc_row(common::ObExprCtx &expr_ctx, const common::ObNew ObJsonBoolean j_bool(tmp->get_bool()); ObIJsonBase *j_base = &j_bool; ObString raw_bin; // - if (OB_FAIL(j_base->get_raw_binary(raw_bin, expr_ctx.calc_buf_))) { + if (OB_FAIL(ObJsonWrapper::get_raw_binary(j_base, raw_bin, expr_ctx.calc_buf_))) { LOG_WARN("get result binary failed", K(ret), K(*j_base)); } else { // bool type convert to json bool, need to know outside has lob header or not @@ -399,7 +400,7 @@ int ObInfixExpression::calc_row(common::ObExprCtx &expr_ctx, const common::ObNew ObJsonBoolean j_bool(stack->get_bool()); ObIJsonBase *j_base = &j_bool; ObString raw_bin; - if (OB_FAIL(j_base->get_raw_binary(raw_bin, expr_ctx.calc_buf_))) { + if (OB_FAIL(ObJsonWrapper::get_raw_binary(j_base, raw_bin, expr_ctx.calc_buf_))) { LOG_WARN("get result binary failed", K(ret), K(*j_base)); } else { // bool type convert to json bool, need to know outside has lob header or not diff --git a/src/sql/engine/expr/ob_json_param_type.h b/src/sql/engine/expr/ob_json_param_type.h index 9e9256fb6d..69e7e17276 100644 --- a/src/sql/engine/expr/ob_json_param_type.h +++ b/src/sql/engine/expr/ob_json_param_type.h @@ -110,19 +110,11 @@ typedef enum JsnValueClause { JSN_VAL_ASCII, // 4 JSN_VAL_EMPTY, // 5 JSN_VAL_EMPTY_DEF, // 6 - JSN_VAL_EMPTY_DEF_PRE, // 7 - JSN_VAL_ERROR, // 8 - JSN_VAL_ERROR_DEF, // 9 - JSN_VAL_ERROR_DEF_PRE, // 10 - JSN_VAL_MISMATCH // 11 + JSN_VAL_ERROR, // 7 + JSN_VAL_ERROR_DEF, // 8 + JSN_VAL_MISMATCH // 9 } JsnValueClause; -typedef enum JsnValueMisMatch { - JSN_VALUE_MISMATCH_ERROR, // 0 - JSN_VALUE_MISMATCH_NULL, // 1 - JSN_VALUE_MISMATCH_IGNORE, // 2 - JSN_VALUE_MISMATCH_IMPLICIT // 3 -} JsnValueMisMatch; typedef enum JsnValueOpt { JSN_VAL_TRUNC_OPT, // 0 diff --git a/src/sql/ob_sql_define.h b/src/sql/ob_sql_define.h index ef27455462..d0afb29c96 100644 --- a/src/sql/ob_sql_define.h +++ b/src/sql/ob_sql_define.h @@ -120,7 +120,10 @@ enum JtColType { COL_TYPE_QUERY, // 3 COL_TYPE_VALUE, // 4 NESTED_COL_TYPE, // 5 - COL_TYPE_QUERY_JSON_COL = 6, + COL_TYPE_QUERY_JSON_COL, // 6 + COL_TYPE_VAL_EXTRACT_XML, // 7 + COL_TYPE_XMLTYPE_XML, // 8 + COL_TYPE_ORDINALITY_XML = 9, }; enum ObNameTypeClass diff --git a/src/sql/optimizer/ob_join_order.cpp b/src/sql/optimizer/ob_join_order.cpp index 332ab0f957..62911c1b27 100644 --- a/src/sql/optimizer/ob_join_order.cpp +++ b/src/sql/optimizer/ob_join_order.cpp @@ -5907,6 +5907,8 @@ int JsonTablePath::assign(const JsonTablePath &other, common::ObIAllocator *allo int ret = OB_SUCCESS; if (OB_FAIL(Path::assign(other, allocator))) { LOG_WARN("failed to deep copy path", K(ret)); + } else if (OB_FAIL(column_param_default_exprs_.assign(other.column_param_default_exprs_))) { + LOG_WARN("fail to assgin default expr", K(ret)); } else { table_id_ = other.table_id_; value_expr_ = other.value_expr_; @@ -7689,6 +7691,8 @@ int ObJoinOrder::generate_json_table_paths() json_path->parent_ = this; ObSEArray nl_params; ObRawExpr* json_table_expr = NULL; + ObRawExpr* default_expr = NULL; + ObArray column_items; // magic number ? todo refine this output_rows_ = 199; output_row_size_ = 199; @@ -7709,6 +7713,37 @@ int ObJoinOrder::generate_json_table_paths() } else { json_path->value_expr_ = json_table_expr; } + // deal non_const default value + if (OB_FAIL(ret)) { + } else if (OB_FAIL(stmt->get_column_items(table_id_, column_items))) { + LOG_WARN("fail to get column item", K(ret)); + } else if (OB_FAIL(json_path->column_param_default_exprs_.reserve(column_items.count()))) { + LOG_WARN("fail to init column default map", K(ret)); + } + for (int64_t i = 0; OB_SUCC(ret) && i < column_items.count(); i++) { + ColumnItem& col_item = column_items.at(i); + ObColumnDefault col_val(col_item.column_id_); + default_expr = col_item.default_value_expr_; + if (OB_FAIL(generate_json_table_default_val(json_path->nl_params_, + json_path->subquery_exprs_, + default_expr))) { // default error + LOG_WARN("fail to check default error value", K(ret)); + } else { + col_val.default_error_expr_ = default_expr; + } + default_expr = col_item.default_empty_expr_; + if (OB_FAIL(ret)) { + } else if (OB_FAIL(generate_json_table_default_val(json_path->nl_params_, + json_path->subquery_exprs_, + default_expr))) { // default empty + LOG_WARN("fail to check default empty value", K(ret)); + } else { + col_val.default_empty_expr_ = default_expr; + } + if (OB_SUCC(ret) && OB_FAIL(json_path->column_param_default_exprs_.push_back(col_val))) { + LOG_WARN("fail to append col default into array", K(ret), K(col_val)); + } + } if (OB_SUCC(ret)) { if (OB_FAIL(json_path->estimate_cost())) { LOG_WARN("failed to estimate cost", K(ret)); @@ -7722,6 +7757,45 @@ int ObJoinOrder::generate_json_table_paths() return ret; } +int ObJoinOrder::generate_json_table_default_val(ObIArray &nl_param, + ObIArray &subquery_exprs, + ObRawExpr*& default_expr) +{ + int ret = OB_SUCCESS; + if (OB_NOT_NULL(default_expr)) { + ObArray t_nl_param; + const ObDMLStmt *stmt = NULL; + ObLogPlan *plan = get_plan(); + ObSEArray old_func_exprs; + ObSEArray new_func_exprs; + ObExecParamRawExpr *param = nullptr; + if (OB_ISNULL(plan) || OB_ISNULL(stmt = plan->get_stmt())) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("NULL pointer error", K(plan), K(ret)); + } else if (OB_FAIL(old_func_exprs.push_back(default_expr))) { + LOG_WARN("failed to push back function table expr", K(ret)); + } else if (OB_FAIL(extract_params_for_inner_path(default_expr->get_relation_ids(), + t_nl_param, + subquery_exprs, + old_func_exprs, + new_func_exprs))) { + LOG_WARN("failed to extract params", K(ret)); + } else if (OB_UNLIKELY(new_func_exprs.count() != 1) || + OB_ISNULL(new_func_exprs.at(0))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("new function table expr is invalid", K(ret), K(new_func_exprs)); + } else { + default_expr = new_func_exprs.at(0); + for (int64_t i = 0; OB_SUCC(ret) && i < t_nl_param.count(); i++) { + if (OB_FAIL(nl_param.push_back(t_nl_param.at(i)))) { + LOG_WARN("fail to push nl param", K(ret)); + } + } + } + } + return ret; +} + int ObJoinOrder::generate_function_table_paths() { int ret = OB_SUCCESS; diff --git a/src/sql/optimizer/ob_join_order.h b/src/sql/optimizer/ob_join_order.h index eaadd05d8e..6b0d5a928d 100644 --- a/src/sql/optimizer/ob_join_order.h +++ b/src/sql/optimizer/ob_join_order.h @@ -1092,8 +1092,9 @@ struct EstimateCostInfo { JsonTablePath() : Path(NULL), table_id_(OB_INVALID_ID), - value_expr_(NULL) {} - virtual ~JsonTablePath() { } + value_expr_(NULL), + column_param_default_exprs_() {} + virtual ~JsonTablePath() {} int assign(const JsonTablePath &other, common::ObIAllocator *allocator); virtual int estimate_cost() override; virtual int get_name_internal(char *buf, const int64_t buf_len, int64_t &pos) const @@ -1107,6 +1108,7 @@ struct EstimateCostInfo { public: uint64_t table_id_; ObRawExpr* value_expr_; + common::ObSEArray column_param_default_exprs_; private: DISALLOW_COPY_AND_ASSIGN(JsonTablePath); }; @@ -1405,6 +1407,9 @@ struct NullAwareAntiJoinInfo { int param_json_table_expr(ObRawExpr* &json_table_expr, ObIArray &nl_params, ObIArray &subquery_exprs); + int generate_json_table_default_val(ObIArray &nl_param, + ObIArray &subquery_exprs, + ObRawExpr*& default_expr); /** * 为本节点增加一条路径,代价竞争过程在这里实现 * @param path diff --git a/src/sql/optimizer/ob_log_json_table.cpp b/src/sql/optimizer/ob_log_json_table.cpp index 2b661fd09b..2399cfdfe2 100644 --- a/src/sql/optimizer/ob_log_json_table.cpp +++ b/src/sql/optimizer/ob_log_json_table.cpp @@ -81,16 +81,14 @@ int ObLogJsonTable::get_op_exprs(ObIArray &all_exprs) } else if (NULL != value_expr_ && OB_FAIL(all_exprs.push_back(value_expr_))) { LOG_WARN("failed to push back expr", K(ret)); } else { - for (int64_t i = 0; OB_SUCC(ret) && i < stmt->get_column_size(); ++i) { - const ColumnItem *col_item = stmt->get_column_item(i); - if (col_item->table_id_ == table_id_) { - if (OB_NOT_NULL(col_item->default_value_expr_) - && OB_FAIL(all_exprs.push_back(col_item->default_value_expr_))) { - LOG_WARN("failed to push back expr", K(ret)); - } else if (OB_NOT_NULL(col_item->default_empty_expr_) - && OB_FAIL(all_exprs.push_back(col_item->default_empty_expr_))) { - LOG_WARN("failed to push back expr", K(ret)); - } + // add default value into all exprs + for (int64_t i = 0; OB_SUCC(ret) && i < column_param_default_exprs_.count(); i ++) { + if (OB_NOT_NULL(column_param_default_exprs_.at(i).default_error_expr_) + && OB_FAIL(all_exprs.push_back(column_param_default_exprs_.at(i).default_error_expr_))) { + LOG_WARN("push error expr to array failed", K(ret)); + } else if (OB_NOT_NULL(column_param_default_exprs_.at(i).default_empty_expr_) + && OB_FAIL(all_exprs.push_back(column_param_default_exprs_.at(i).default_empty_expr_))) { + LOG_WARN("push empty expr to array failed", K(ret)); } } } @@ -138,5 +136,60 @@ uint64_t ObLogJsonTable::hash(uint64_t seed) const return seed; } +int ObLogJsonTable::set_namespace_arr(ObIArray &namespace_arr) +{ + int ret = OB_SUCCESS; + for (size_t i = 0; OB_SUCC(ret) && i < namespace_arr.count(); i++) { + if (OB_FAIL(namespace_arr_.push_back(namespace_arr.at(i)))) { + LOG_WARN("fail to push ns to arr", K(ret), K(i)); + } + } + return ret; +} + +int ObLogJsonTable::get_namespace_arr(ObIArray &namespace_arr) +{ + int ret = OB_SUCCESS; + for (size_t i = 0; OB_SUCC(ret) && i < namespace_arr_.count(); i++) { + if (OB_FAIL(namespace_arr.push_back(namespace_arr_.at(i)))) { + LOG_WARN("fail to push ns to arr", K(ret), K(i)); + } + } + return ret; +} + +int ObLogJsonTable::set_column_param_default_arr(ObIArray &column_param_default_exprs) +{ + int ret = OB_SUCCESS; + for (size_t i = 0; OB_SUCC(ret) && i < column_param_default_exprs.count(); i++) { + if (OB_FAIL(column_param_default_exprs_.push_back(column_param_default_exprs.at(i)))) { + LOG_WARN("fail to push ns to arr", K(ret), K(i)); + } + } + return ret; +} + +int ObLogJsonTable::get_column_param_default_arr(ObIArray &column_param_default_exprs) +{ + int ret = OB_SUCCESS; + for (size_t i = 0; OB_SUCC(ret) && i < column_param_default_exprs_.count(); i++) { + if (OB_FAIL(column_param_default_exprs.push_back(column_param_default_exprs_.at(i)))) { + LOG_WARN("fail to push ns to arr", K(ret), K(i)); + } + } + return ret; +} + +ObColumnDefault* ObLogJsonTable::get_column_param_default_val(int64_t index) +{ + ObColumnDefault* val = NULL; + for (size_t i = 0; i < column_param_default_exprs_.count(); i++) { + if (index == column_param_default_exprs_.at(i).column_id_) { + val = &column_param_default_exprs_.at(i); + } + } + return val; +} + } // namespace sql }// namespace oceanbase diff --git a/src/sql/optimizer/ob_log_json_table.h b/src/sql/optimizer/ob_log_json_table.h index 80c0ccf5d5..d78a6c61e8 100644 --- a/src/sql/optimizer/ob_log_json_table.h +++ b/src/sql/optimizer/ob_log_json_table.h @@ -28,7 +28,9 @@ public: value_expr_(NULL), table_name_(), access_exprs_(), - all_cols_def_() {} + all_cols_def_(), + table_type_(MulModeTableType::INVALID_TABLE_TYPE), + namespace_arr_() {} virtual ~ObLogJsonTable() {} void add_values_expr(ObRawExpr* expr) { value_expr_ = expr; } @@ -49,6 +51,15 @@ public: common::ObIArray& get_origin_cols_def() { return all_cols_def_; } virtual int get_plan_item_info(PlanText &plan_text, ObSqlPlanItem &plan_item) override; + inline void set_table_type(MulModeTableType table_type) { table_type_ = table_type; } + inline MulModeTableType get_table_type() { return table_type_; } + int set_namespace_arr(ObIArray &namespace_arr); + int get_namespace_arr(ObIArray &namespace_arr); + int64_t get_ns_size() { return namespace_arr_.count(); } + int64_t get_column_param_default_size() { return column_param_default_exprs_.count(); } + int set_column_param_default_arr(ObIArray &column_param_default_exprs); + int get_column_param_default_arr(ObIArray &column_param_default_exprs); + ObColumnDefault* get_column_param_default_val(int64_t index); private: uint64_t table_id_; ObRawExpr* value_expr_; @@ -56,6 +67,12 @@ private: common::ObSEArray access_exprs_; common::ObSEArray all_cols_def_; + // table func type + MulModeTableType table_type_; + // xml table param + common::ObSEArray namespace_arr_; + // default value array + common::ObSEArray column_param_default_exprs_; DISALLOW_COPY_AND_ASSIGN(ObLogJsonTable); }; diff --git a/src/sql/optimizer/ob_log_plan.cpp b/src/sql/optimizer/ob_log_plan.cpp index e15596b4c0..00b98a9fa1 100644 --- a/src/sql/optimizer/ob_log_plan.cpp +++ b/src/sql/optimizer/ob_log_plan.cpp @@ -2713,6 +2713,46 @@ int ObLogPlan::init_width_estimation_info(const ObDMLStmt *stmt) return ret; } +int ObLogPlan::init_default_val_json(ObRelIds& depend_table_set, + ObRawExpr*& default_expr) +{ + int ret = OB_SUCCESS; + if (OB_NOT_NULL(default_expr)) { + if (default_expr->get_relation_ids().is_empty()) { + //do nothing + } else if (OB_FAIL(depend_table_set.add_members(default_expr->get_relation_ids()))) { + LOG_WARN("failed to assign table ids", K(ret)); + } + } + return ret; +} + +int ObLogPlan::init_json_table_column_depend_info(ObRelIds& depend_table_set, + TableItem* json_table, + const ObDMLStmt *stmt) +{ + int ret = OB_SUCCESS; + ColumnItem* column_item = NULL; + common::ObArray stmt_column_items; + if (OB_ISNULL(stmt) || OB_ISNULL(json_table)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpect null stmt", K(ret)); + } else if (OB_FAIL(stmt->get_column_items(json_table->table_id_, stmt_column_items))) { + LOG_WARN("fail to get column_items", K(ret)); + } + for (int64_t i = 0; OB_SUCC(ret) && i < stmt_column_items.count(); i++) { + if (json_table->table_id_ != stmt_column_items.at(i).table_id_) { + } else if (OB_NOT_NULL(stmt_column_items.at(i).default_value_expr_) + && OB_FAIL(init_default_val_json(depend_table_set, stmt_column_items.at(i).default_value_expr_))) { + LOG_WARN("fail to init error default value depend info", K(ret)); + } else if (OB_NOT_NULL(stmt_column_items.at(i).default_empty_expr_) + && OB_FAIL(init_default_val_json(depend_table_set, stmt_column_items.at(i).default_empty_expr_))) { + LOG_WARN("fail to init error default value depend info", K(ret)); + } + } + return ret; +} + int ObLogPlan::init_json_table_depend_info(const ObIArray &table_items) { int ret = OB_SUCCESS; @@ -2736,6 +2776,8 @@ int ObLogPlan::init_json_table_depend_info(const ObIArray &table_ite //do thing } else if (OB_FAIL(info.depend_table_set_.add_members(table->json_table_def_->doc_expr_->get_relation_ids()))) { LOG_WARN("failed to assign table ids", K(ret)); + } else if (OB_FAIL(init_json_table_column_depend_info(info.depend_table_set_, table, stmt))) { // deal column items default value + LOG_WARN("fail to init json table default value depend info", K(ret)); } else if (OB_FALSE_IT(info.table_idx_ = stmt->get_table_bit_index(table->table_id_))) { } else if (OB_FAIL(table_depend_infos_.push_back(info))) { LOG_WARN("failed to push back info", K(ret)); @@ -4174,7 +4216,12 @@ int ObLogPlan::allocate_json_table_path(JsonTablePath *json_table_path, LOG_WARN("failed to compute property", K(ret)); } else if (OB_FAIL(op->pick_out_startup_filters())) { LOG_WARN("failed to pick out startup filters", K(ret)); + } else if (OB_FAIL(op->set_namespace_arr(tbl_def->namespace_arr_))) { + LOG_WARN("fail to get ns array from table def", K(ret)); + } else if (OB_FAIL(op->set_column_param_default_arr(json_table_path->column_param_default_exprs_))) { + LOG_WARN("fail to get default array from table def", K(ret)); } else { + op->set_table_type(tbl_def->table_type_); out_access_path_op = op; } } diff --git a/src/sql/optimizer/ob_log_plan.h b/src/sql/optimizer/ob_log_plan.h index cf2ccc76f5..e81c93296c 100644 --- a/src/sql/optimizer/ob_log_plan.h +++ b/src/sql/optimizer/ob_log_plan.h @@ -1529,7 +1529,12 @@ protected: int init_function_table_depend_info(const ObIArray &table_items); int init_json_table_depend_info(const ObIArray &table_items); - + // init json_table non_const default value + int init_json_table_column_depend_info(ObRelIds& depend_table_set, + TableItem* json_table, + const ObDMLStmt *stmt); + int init_default_val_json(ObRelIds& depend_table_set, + ObRawExpr*& default_expr); int check_need_bushy_tree(common::ObIArray &join_rels, const int64_t join_level, bool &need); diff --git a/src/sql/optimizer/ob_logical_operator.h b/src/sql/optimizer/ob_logical_operator.h index f55d04a3dc..37362c5f17 100644 --- a/src/sql/optimizer/ob_logical_operator.h +++ b/src/sql/optimizer/ob_logical_operator.h @@ -1960,6 +1960,33 @@ int ObLogicalOperator::init_all_traverse_ctx(Allocator &alloc) return ret; } +// json table default value struct +struct ObColumnDefault +{ +public: + ObColumnDefault() + : column_id_(common::OB_NOT_EXIST_COLUMN_ID), + default_error_expr_(nullptr), + default_empty_expr_(nullptr) + {} + ObColumnDefault(int64_t column_id) + : column_id_(column_id), + default_error_expr_(nullptr), + default_empty_expr_(nullptr) + {} + void reset() + { + column_id_ = common::OB_NOT_EXIST_COLUMN_ID; + default_error_expr_ = nullptr; + default_empty_expr_ = nullptr; + } + + TO_STRING_KV(K_(column_id), KPC_(default_error_expr), KPC_(default_empty_expr)); + int64_t column_id_; + ObRawExpr* default_error_expr_; + ObRawExpr* default_empty_expr_; +}; + } // end of namespace sql } // end of namespace oceanbase diff --git a/src/sql/parser/non_reserved_keywords_mysql_mode.c b/src/sql/parser/non_reserved_keywords_mysql_mode.c index 12c98c9b05..84adfdb25e 100644 --- a/src/sql/parser/non_reserved_keywords_mysql_mode.c +++ b/src/sql/parser/non_reserved_keywords_mysql_mode.c @@ -842,6 +842,7 @@ static const NonReservedKeyword Mysql_none_reserved_keywords[] = {"stop", STOP}, {"storage", STORAGE}, {"storage_format_version", STORAGE_FORMAT_VERSION}, + {"store", STORE}, {"stored", STORED}, {"storing", STORING}, {"string", STRING}, diff --git a/src/sql/parser/sql_parser_mysql_mode.y b/src/sql/parser/sql_parser_mysql_mode.y index bf5b12aa86..f50ac789e1 100644 --- a/src/sql/parser/sql_parser_mysql_mode.y +++ b/src/sql/parser/sql_parser_mysql_mode.y @@ -375,7 +375,7 @@ END_P SET_VAR DELIMITER %type sql_stmt stmt_list stmt opt_end_p %type select_stmt update_stmt delete_stmt %type insert_stmt single_table_insert values_clause dml_table_name -%type create_table_stmt create_table_like_stmt opt_table_option_list table_option_list table_option table_option_list_space_seperated create_function_stmt drop_function_stmt parallel_option +%type create_table_stmt create_table_like_stmt opt_table_option_list table_option_list table_option table_option_list_space_seperated create_function_stmt drop_function_stmt parallel_option lob_storage_clause lob_storage_parameter lob_storage_parameters lob_chunk_size %type opt_force %type create_sequence_stmt alter_sequence_stmt drop_sequence_stmt opt_sequence_option_list sequence_option_list sequence_option simple_num %type create_database_stmt drop_database_stmt alter_database_stmt use_database_stmt @@ -6540,6 +6540,10 @@ not NULLX merge_nodes(opt_skip_index_type_list, result, T_COL_SKIP_INDEX_LIST, $3); malloc_non_terminal_node($$, result->malloc_pool_, T_COL_SKIP_INDEX, 1, opt_skip_index_type_list); } +| lob_chunk_size +{ + $$ = $1; +} ; opt_column_default_value_list: @@ -6881,6 +6885,10 @@ TABLE_MODE opt_equal_mark STRING_VALUE (void)($2); /* make bison mute*/ malloc_non_terminal_node($$, result->malloc_pool_, T_LOB_INROW_THRESHOLD, 1, $3); } +| lob_storage_clause +{ + $$ = $1; +} ; parallel_option: @@ -16169,6 +16177,18 @@ ADD COLUMN column_definition { merge_nodes($$, result, T_COLUMN_ADD, $3); } +| ADD COLUMN '(' column_definition_list ')' lob_storage_clause +{ + ParseNode *column_node = NULL; + merge_nodes(column_node, result, T_COLUMN_ADD, $4); + malloc_non_terminal_node($$, result->malloc_pool_, T_COLUMN_ADD_WITH_LOB_PARAMS, 2, column_node, $6); +} +| ADD '(' column_definition_list ')' lob_storage_clause +{ + ParseNode *column_node = NULL; + merge_nodes(column_node, result, T_COLUMN_ADD, $3); + malloc_non_terminal_node($$, result->malloc_pool_, T_COLUMN_ADD_WITH_LOB_PARAMS, 2, column_node, $5); +} | DROP column_definition_ref opt_drop_behavior { malloc_non_terminal_node($$, result->malloc_pool_, T_COLUMN_DROP, 1, $2); @@ -19903,6 +19923,48 @@ MIN_MAX } ; +lob_chunk_size: +CHUNK INTNUM +{ + malloc_non_terminal_node($$, result->malloc_pool_, T_LOB_CHUNK_SIZE, 1, $2); +} +| +CHUNK STRING_VALUE +{ + malloc_non_terminal_node($$, result->malloc_pool_, T_CONSTR_LOB_CHUNK_SIZE, 1, $2); +} +; + +lob_storage_parameter: +lob_chunk_size +{ + $$ = $1; +} + +lob_storage_parameters: +lob_storage_parameter +{ + $$ = $1; +} +| +lob_storage_parameters lob_storage_parameter +{ + malloc_non_terminal_node($$, result->malloc_pool_, T_LINK_NODE, 2, $1, $2); +} +; + +lob_storage_clause: +JSON '(' column_name ')' STORE AS '(' lob_storage_parameters ')' +{ + ParseNode *type_node = NULL; + malloc_terminal_node(type_node, result->malloc_pool_, T_INT); + type_node->value_ = T_JSON; + ParseNode *params = NULL; + merge_nodes(params, result, T_ARGUMENT_LIST, $8); + malloc_non_terminal_node($$, result->malloc_pool_, T_LOB_STORAGE_CLAUSE, 3, type_node, $3, params); +} +; + unreserved_keyword: unreserved_keyword_normal { $$=$1;} | unreserved_keyword_special { $$=$1;} @@ -20527,6 +20589,7 @@ ACCOUNT | STOP | STORAGE | STORAGE_FORMAT_VERSION +| STORE | STORING | STRONG | STRING diff --git a/src/sql/plan_cache/ob_sql_parameterization.cpp b/src/sql/plan_cache/ob_sql_parameterization.cpp index b2d49524ac..8ee7471731 100644 --- a/src/sql/plan_cache/ob_sql_parameterization.cpp +++ b/src/sql/plan_cache/ob_sql_parameterization.cpp @@ -1842,6 +1842,13 @@ int ObSqlParameterization::mark_tree(ParseNode *tree ,SqlInfo &sql_info) sql_info.ps_need_parameterized_ = false; } else if ((0 == func_name.case_compare("json_extract"))) { sql_info.ps_need_parameterized_ = false; + } else if ((0 == func_name.case_compare("json_schema_valid")) + || (0 == func_name.case_compare("json_schema_validation_report"))) { + const int64_t ARGS_NUMBER_TWO = 2; + bool mark_arr[ARGS_NUMBER_TWO] = {1, 0}; + if (OB_FAIL(mark_args(node[1], mark_arr, ARGS_NUMBER_TWO, sql_info))) { + SQL_PC_LOG(WARN, "fail to mark arg", K(ret)); + } } } } else if (T_OP_LIKE == tree->type_) { @@ -1861,13 +1868,13 @@ int ObSqlParameterization::mark_tree(ParseNode *tree ,SqlInfo &sql_info) } } else { /*do nothing*/ } } else if(T_FUN_SYS_JSON_VALUE == tree->type_) { - if (9 != tree->num_child_) { + if (10 != tree->num_child_) { ret = OB_INVALID_ARGUMENT; SQL_PC_LOG(WARN, "invalid json value expr argument", K(ret), K(tree->num_child_)); } else { - const int64_t ARGS_NUMBER_NINE = 9; - bool mark_arr[ARGS_NUMBER_NINE] = {0, 1, 1, 1, 1, 1, 1, 1, 1}; - if (OB_FAIL(mark_args(tree, mark_arr, ARGS_NUMBER_NINE, sql_info))) { + const int64_t ARGS_NUMBER_TEN = 10; + bool mark_arr[ARGS_NUMBER_TEN] = {0, 1, 1, 1, 1, 1, 1, 1, 1, 1}; + if (OB_FAIL(mark_args(tree, mark_arr, ARGS_NUMBER_TEN, sql_info))) { SQL_PC_LOG(WARN, "fail to mark substr arg", K(ret)); } } @@ -1951,12 +1958,23 @@ int ObSqlParameterization::mark_tree(ParseNode *tree ,SqlInfo &sql_info) } else if (T_JSON_TABLE_EXPRESSION == tree->type_) { if (5 != tree->num_child_) { ret = OB_INVALID_ARGUMENT; - SQL_PC_LOG(WARN, "invalid json mergepatch expr argument", K(ret), K(tree->num_child_)); + SQL_PC_LOG(WARN, "invalid json table expr argument", K(ret), K(tree->num_child_)); } else { const int64_t ARGS_NUMBER_FIVE = 5; bool mark_arr[ARGS_NUMBER_FIVE] = {0, 1, 1, 1, 1}; if (OB_FAIL(mark_args(tree, mark_arr, ARGS_NUMBER_FIVE, sql_info))) { - SQL_PC_LOG(WARN, "fail to mark json mergepatch arg", K(ret)); + SQL_PC_LOG(WARN, "fail to mark json table arg", K(ret)); + } + } + } else if (T_XML_TABLE_EXPRESSION == tree->type_) { + if (6 != tree->num_child_) { + ret = OB_INVALID_ARGUMENT; + SQL_PC_LOG(WARN, "invalid xml table expr argument", K(ret), K(tree->num_child_)); + } else { + const int64_t ARGS_NUMBER_SIX = 6; + bool mark_arr[ARGS_NUMBER_SIX] = {1, 1, 0, 1, 1, 1}; // because of namespace deal in resolve, so can not parameter + if (OB_FAIL(mark_args(tree, mark_arr, ARGS_NUMBER_SIX, sql_info))) { + SQL_PC_LOG(WARN, "fail to mark xml table arg", K(ret)); } } } else if (T_FUN_SYS_TREAT == tree->type_) { diff --git a/src/sql/printer/ob_dml_stmt_printer.cpp b/src/sql/printer/ob_dml_stmt_printer.cpp index 19d76baa3a..bf2aa9de5e 100644 --- a/src/sql/printer/ob_dml_stmt_printer.cpp +++ b/src/sql/printer/ob_dml_stmt_printer.cpp @@ -446,11 +446,27 @@ int ObDMLStmtPrinter::print_table(const TableItem *table_item, break; } case TableItem::JSON_TABLE: { - DATA_PRINTF("JSON_TABLE("); - OZ (expr_printer_.do_print(table_item->json_table_def_->doc_expr_, T_FROM_SCOPE)); - OZ (print_json_table(table_item)); - DATA_PRINTF(")"); - DATA_PRINTF(" %.*s", LEN_AND_PTR(table_item->alias_name_)); + switch (table_item->json_table_def_->table_type_) { + case MulModeTableType::OB_ORA_JSON_TABLE_TYPE : { + DATA_PRINTF("JSON_TABLE("); + OZ (expr_printer_.do_print(table_item->json_table_def_->doc_expr_, T_FROM_SCOPE)); + OZ (print_json_table(table_item)); + DATA_PRINTF(")"); + DATA_PRINTF(" %.*s", LEN_AND_PTR(table_item->alias_name_)); + break; + } + case MulModeTableType::OB_ORA_XML_TABLE_TYPE : { + DATA_PRINTF("XMLTABLE("); + OZ (print_xml_table(table_item)); + DATA_PRINTF(")"); + DATA_PRINTF(" %.*s", LEN_AND_PTR(table_item->alias_name_)); + break; + } + default : { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected table function type"); + } + } break; } case TableItem::TEMP_TABLE: { @@ -677,6 +693,11 @@ int ObDMLStmtPrinter::print_json_return_type(int64_t value, ObDataType data_type } break; } + case T_EXTEND: { + ret = OB_ERR_INVALID_CAST_UDT; + LOG_WARN("invalid CAST to a type that is not a nested table or VARRAY", K(ret)); + break; + } default: { ret = OB_ERR_UNEXPECTED; LOG_WARN("unknown cast type", K(ret), K(cast_type)); @@ -1033,23 +1054,50 @@ int ObDMLStmtPrinter::print_mysql_json_return_type(int64_t value, ObDataType dat break; } case T_GEOMETRY: { - int32_t flag = parse_node.int32_values_[1]; - if (flag == 0) { - DATA_PRINTF("GEOMETRY "); - } else if (flag == 1) { - DATA_PRINTF("POINT "); - } else if (flag == 2) { - DATA_PRINTF("LINESTRING "); - } else if (flag == 3) { - DATA_PRINTF("POLYGON "); - } else if (flag == 4) { - DATA_PRINTF("MULTIPOINT "); - } else if (flag == 5) { - DATA_PRINTF("MULTILINESTRING "); - } else if (flag == 6) { - DATA_PRINTF("MULTIPOLYGON "); - } else if (flag == 7) { - DATA_PRINTF("GEOMETRYCOLLECTION "); + ObGeoType geo_type = static_cast(parse_node.int32_values_[1]); + switch (geo_type) { + case ObGeoType::GEOMETRY: { + DATA_PRINTF("geometry"); + break; + } + case ObGeoType::POINT: { + DATA_PRINTF("point"); + break; + } + case ObGeoType::LINESTRING: { + DATA_PRINTF("linestring"); + break; + } + case ObGeoType::POLYGON: { + DATA_PRINTF("polygon"); + break; + } + case ObGeoType::MULTIPOINT: { + DATA_PRINTF("multipoint"); + break; + } + case ObGeoType::MULTILINESTRING: { + DATA_PRINTF("multilinestring"); + break; + } + case ObGeoType::MULTIPOLYGON: { + DATA_PRINTF("multipolygon"); + break; + } + case ObGeoType::GEOMETRYCOLLECTION: { + DATA_PRINTF("geometrycollection"); + break; + } + case ObGeoType::GEOTYPEMAX: { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("invalid cast geo sub type", K(ret), K(cast_type), K(geo_type)); + break; + } + default: { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unknown cast geo sub type", K(ret), K(cast_type), K(geo_type)); + break; + } } break; } @@ -1179,7 +1227,8 @@ int ObDMLStmtPrinter::print_json_table_nested_column(const TableItem *table_item } if (OB_FAIL(ret)) { - } else if (col_info.col_type_ == static_cast(COL_TYPE_ORDINALITY)) { + } else if (col_info.col_type_ == static_cast(COL_TYPE_ORDINALITY) + || col_info.col_type_ == static_cast(COL_TYPE_ORDINALITY_XML)) { DATA_PRINTF(" for ordinality"); } else if (col_info.col_type_ == static_cast(COL_TYPE_EXISTS)) { // to print returning type @@ -1348,6 +1397,33 @@ int ObDMLStmtPrinter::print_json_table_nested_column(const TableItem *table_item } else if (col_info.on_mismatch_type_ == 2) { DATA_PRINTF(" (type error)"); } + } else if (col_info.col_type_ == static_cast(COL_TYPE_VAL_EXTRACT_XML)) { + OZ (print_json_return_type(col_info.res_type_, col_info.data_type_)); + if (OB_SUCC(ret) && col_info.path_.length() > 0) { + DATA_PRINTF(" path \'%.*s\'", LEN_AND_PTR(col_info.path_)); + } + if (OB_SUCC(ret) && col_info.on_empty_ == 2) { + DATA_PRINTF(" default "); + if (OB_SUCC(ret) + && OB_FAIL(expr_printer_.do_print(cur_def->empty_expr_, T_NONE_SCOPE))) { + LOG_WARN("fail to print default value col", K(ret)); + } + } + } else if (col_info.col_type_ == static_cast(COL_TYPE_XMLTYPE_XML)) { + DATA_PRINTF(" XMLTYPE"); + if (OB_SUCC(ret) && col_info.truncate_) { + DATA_PRINTF(" ( SEQUENCE ) BY REF"); + } + if (OB_SUCC(ret) && col_info.path_.length() > 0) { + DATA_PRINTF(" path \'%.*s\'", LEN_AND_PTR(col_info.path_)); + } + if (OB_SUCC(ret) && col_info.on_empty_ == 2) { + DATA_PRINTF(" default "); + if (OB_SUCC(ret) + && OB_FAIL(expr_printer_.do_print(cur_def->empty_expr_, T_NONE_SCOPE))) { + LOG_WARN("fail to print default value col", K(ret)); + } + } } } } @@ -1366,6 +1442,55 @@ int ObDMLStmtPrinter::print_json_table_nested_column(const TableItem *table_item return ret; } +int ObDMLStmtPrinter::print_xml_namespace(const TableItem *table_item) +{ + INIT_SUCC(ret); + DATA_PRINTF("XMLNAMESPACES( "); + bool is_default = false; + for (int64_t i = 0; OB_SUCC(ret) && i < table_item->json_table_def_->namespace_arr_.count(); i ++) { + if (i % 2 == 0) { // first value is uri + if (i > 0) { + DATA_PRINTF(", "); + } + if (table_item->json_table_def_->namespace_arr_.at(i + 1).empty()) { + DATA_PRINTF("DEFAULT \'%.*s\' ", LEN_AND_PTR(table_item->json_table_def_->namespace_arr_.at(i))); + } else { + DATA_PRINTF("\'%.*s\' AS ", LEN_AND_PTR(table_item->json_table_def_->namespace_arr_.at(i))); + } + } else if (!table_item->json_table_def_->namespace_arr_.at(i).empty()) { + DATA_PRINTF("%.*s ", LEN_AND_PTR(table_item->json_table_def_->namespace_arr_.at(i))); + } + } + DATA_PRINTF("),"); + return ret; +} +int ObDMLStmtPrinter::print_xml_table(const TableItem *table_item) +{ + int ret = OB_SUCCESS; + ObJsonTableDef* tbl_def = table_item->json_table_def_; + ObArenaAllocator alloc; + ObDmlJtColDef* root_def = nullptr; + if (OB_FAIL(build_json_table_nested_tree(table_item, &alloc, root_def))) { + LOG_WARN("fail to build column tree.", K(ret)); + } else if (table_item->json_table_def_->namespace_arr_.count() > 0 && OB_FAIL(print_xml_namespace(table_item))) { + LOG_WARN("fail to print xml ns", K(ret)); + } else if (root_def->col_base_info_.path_.length() > 0) { + DATA_PRINTF(" \'%.*s\'", LEN_AND_PTR(root_def->col_base_info_.path_)); + } + DATA_PRINTF(" PASSING "); + if (OB_FAIL(ret)) { + } else if (OB_FAIL(expr_printer_.do_print(table_item->json_table_def_->doc_expr_, T_FROM_SCOPE))) { + LOG_WARN("fail to print xml doc", K(ret)); + } else if (root_def->col_base_info_.allow_scalar_) { + DATA_PRINTF(" RETURNING SEQUENCE BY REF"); + } + DATA_PRINTF(" COLUMNS "); + OZ (print_json_table_nested_column(table_item, *root_def)); + + return ret; +} + + int ObDMLStmtPrinter::print_json_table(const TableItem *table_item) { int ret = OB_SUCCESS; diff --git a/src/sql/printer/ob_dml_stmt_printer.h b/src/sql/printer/ob_dml_stmt_printer.h index 2eef1f0227..fc696a7e4f 100644 --- a/src/sql/printer/ob_dml_stmt_printer.h +++ b/src/sql/printer/ob_dml_stmt_printer.h @@ -109,6 +109,7 @@ public: int print_fetch(); int print_returning(); int print_json_table(const TableItem *table_item); + int print_xml_table(const TableItem *table_item); int print_table(const TableItem *table_item, bool no_print_alias = false); int print_table_with_subquery(const TableItem *table_item); @@ -147,6 +148,8 @@ private: int print_binary_charset_collation(int64_t value, ObDataType data_type); int get_json_table_column_if_exists(int32_t id, ObDmlJtColDef* root, ObDmlJtColDef*& col); int build_json_table_nested_tree(const TableItem* table_item, ObIAllocator* allocator, ObDmlJtColDef*& root); + // add xml table namespace + int print_xml_namespace(const TableItem *table_item); // disallow copy DISALLOW_COPY_AND_ASSIGN(ObDMLStmtPrinter); diff --git a/src/sql/printer/ob_raw_expr_printer.cpp b/src/sql/printer/ob_raw_expr_printer.cpp index be58a08e3d..6d88df941c 100644 --- a/src/sql/printer/ob_raw_expr_printer.cpp +++ b/src/sql/printer/ob_raw_expr_printer.cpp @@ -20,6 +20,7 @@ #include "lib/worker.h" #include "pl/ob_pl_user_type.h" #include "pl/ob_pl_stmt.h" +#include "sql/engine/expr/ob_json_param_type.h" #include "lib/geo/ob_sdo_geo_object.h" namespace oceanbase @@ -1827,17 +1828,17 @@ int ObRawExprPrinter::print_json_value(ObSysFunRawExpr *expr) { INIT_SUCC(ret); if (OB_SUCC(ret)) { - if (OB_FAIL(print_json_return_type(expr->get_param_expr(2)))) { + if (OB_FAIL(print_json_return_type(expr->get_param_expr(JsnValueClause::JSN_VAL_RET)))) { LOG_WARN("fail to print cast_type", K(ret)); } } if (OB_SUCC(ret)) { - if (!static_cast(expr->get_param_expr(3))->get_value().is_int()) { + if (!static_cast(expr->get_param_expr(JsnValueClause::JSN_VAL_TRUNC))->get_value().is_int()) { ret = OB_ERR_UNEXPECTED; LOG_WARN("truncate value isn't int value"); } else { - int64_t type = static_cast(expr->get_param_expr(3))->get_value().get_int(); + int64_t type = static_cast(expr->get_param_expr(JsnValueClause::JSN_VAL_TRUNC))->get_value().get_int(); switch (type) { case 0: break; @@ -1851,13 +1852,12 @@ int ObRawExprPrinter::print_json_value(ObSysFunRawExpr *expr) } } } - if (OB_SUCC(ret)) { - if (!static_cast(expr->get_param_expr(4))->get_value().is_int()) { + if (!static_cast(expr->get_param_expr(JsnValueClause::JSN_VAL_ASCII))->get_value().is_int()) { ret = OB_ERR_UNEXPECTED; LOG_WARN("ascii value isn't int value"); } else { - int64_t type = static_cast(expr->get_param_expr(4))->get_value().get_int(); + int64_t type = static_cast(expr->get_param_expr(JsnValueClause::JSN_VAL_ASCII))->get_value().get_int(); switch (type) { case 0: DATA_PRINTF(""); @@ -1872,53 +1872,55 @@ int ObRawExprPrinter::print_json_value(ObSysFunRawExpr *expr) } } } + // print empty type type 5, default value 6. if (OB_SUCC(ret)) { - if (!static_cast(expr->get_param_expr(5))->get_value().is_int()) { + if (!static_cast(expr->get_param_expr(JsnValueClause::JSN_VAL_EMPTY))->get_value().is_int()) { ret = OB_ERR_UNEXPECTED; LOG_WARN("type value isn't int value"); } else { - int64_t type = static_cast(expr->get_param_expr(5))->get_value().get_int(); + int64_t type = static_cast(expr->get_param_expr(JsnValueClause::JSN_VAL_EMPTY))->get_value().get_int(); switch (type) { - case 0: + case JsnValueType::JSN_VALUE_ERROR: DATA_PRINTF(" error"); break; - case 1: - case 3: + case JsnValueType::JSN_VALUE_NULL: + case JsnValueType::JSN_VALUE_IMPLICIT: if (lib::is_mysql_mode() || type == 1) { DATA_PRINTF(" null"); } break; - case 2: + case JsnValueType::JSN_VALUE_DEFAULT: DATA_PRINTF(" default "); - PRINT_EXPR(expr->get_param_expr(6)); + PRINT_EXPR(expr->get_param_expr(JSN_VAL_EMPTY_DEF)); break; default: ret = OB_ERR_UNEXPECTED; LOG_WARN("invalid type value.", K(type)); break; } - if (OB_SUCC(ret) && (lib::is_mysql_mode() || type < 3)) { + if (OB_SUCC(ret) && (lib::is_mysql_mode() || type < JsnValueType::JSN_VALUE_IMPLICIT)) { DATA_PRINTF(" on empty"); } } } + // print error type type 7, default value 8. if (OB_SUCC(ret)) { - if (!static_cast(expr->get_param_expr(8))->get_value().is_int()) { + if (!static_cast(expr->get_param_expr(JsnValueClause::JSN_VAL_ERROR))->get_value().is_int()) { ret = OB_ERR_UNEXPECTED; LOG_WARN("type value isn't int value"); } else { - int64_t type = static_cast(expr->get_param_expr(8))->get_value().get_int(); + int64_t type = static_cast(expr->get_param_expr(JsnValueClause::JSN_VAL_ERROR))->get_value().get_int(); switch (type) { - case 0: + case JsnValueType::JSN_VALUE_ERROR: DATA_PRINTF(" error"); break; - case 1: - case 3: + case JsnValueType::JSN_VALUE_NULL: + case JsnValueType::JSN_VALUE_IMPLICIT: DATA_PRINTF(" null"); break; - case 2: + case JsnValueType::JSN_VALUE_DEFAULT: DATA_PRINTF(" default "); - PRINT_EXPR(expr->get_param_expr(9)); + PRINT_EXPR(expr->get_param_expr(JSN_VAL_ERROR_DEF)); break; default: ret = OB_ERR_UNEXPECTED; @@ -1933,7 +1935,7 @@ int ObRawExprPrinter::print_json_value(ObSysFunRawExpr *expr) if (lib::is_oracle_mode()) { if (OB_SUCC(ret)) { bool not_first_node = false; - for (size_t i = 11; OB_SUCC(ret) && i < expr->get_param_count(); i++) { + for (size_t i = JsnValueClause::JSN_VAL_MISMATCH; OB_SUCC(ret) && i < expr->get_param_count(); i++) { if (!static_cast(expr->get_param_expr(i))->get_value().is_int()) { ret = OB_ERR_UNEXPECTED; LOG_WARN("type value isn't int value"); @@ -2452,7 +2454,7 @@ int ObRawExprPrinter::print_json_expr(ObSysFunRawExpr *expr) switch (expr->get_expr_type()) { case T_FUN_SYS_JSON_VALUE: { // if json value only have one mismatch clause, the size of parameter is 13 - const int8_t JSN_VAL_WITH_ONE_MISMATCH = 13; + const int8_t JSN_VAL_WITH_ONE_MISMATCH = 11; // json value parameter count more than 13, because mismatch is multi-val and default value. // json_value(expr(0), expr(1) returning cast_type truncate ascii xxx on empty(default value) xxx on error(default value) xxx on mismatch (xxx)) if (OB_UNLIKELY(expr->get_param_count() < JSN_VAL_WITH_ONE_MISMATCH)) { diff --git a/src/sql/resolver/ddl/ob_alter_table_resolver.cpp b/src/sql/resolver/ddl/ob_alter_table_resolver.cpp index 31a9cfe757..8af7393023 100644 --- a/src/sql/resolver/ddl/ob_alter_table_resolver.cpp +++ b/src/sql/resolver/ddl/ob_alter_table_resolver.cpp @@ -25,10 +25,8 @@ #include "share/ob_index_builder_util.h" #include "sql/engine/expr/ob_expr_sql_udt_utils.h" #include "sql/engine/expr/ob_expr_lob_utils.h" -#ifdef OB_BUILD_ORACLE_XML #include "lib/xml/ob_xml_parser.h" #include "lib/xml/ob_xml_util.h" -#endif namespace oceanbase { @@ -1183,6 +1181,14 @@ int ObAlterTableResolver::resolve_column_options(const ParseNode &node, } break; } + case T_COLUMN_ADD_WITH_LOB_PARAMS: { + if (OB_FAIL(resolve_add_column(*column_node->children_[0]))) { + SQL_RESV_LOG(WARN, "Resolve column option error!", K(ret)); + } else if (OB_FAIL(resolve_lob_storage_parameters(column_node->children_[1]))) { + SQL_RESV_LOG(WARN, "Resolve lob storage parameters error!", K(ret)); + } + break; + } default:{ ret = OB_ERR_UNEXPECTED; SQL_RESV_LOG(WARN, "Unknown column option type!", @@ -4762,7 +4768,6 @@ int ObAlterTableResolver::add_udt_hidden_column(ObAlterTableStmt *alter_table_st LOG_WARN("fail to calc xmltype default value expr", K(ret)); } else { // calc result is 1. string type (not lob) or 2. xmltype binary (need to remove lob header) -#ifdef OB_BUILD_ORACLE_XML ObString res_string; ObObj xml_default; xml_default.set_null(); @@ -4812,10 +4817,6 @@ int ObAlterTableResolver::add_udt_hidden_column(ObAlterTableStmt *alter_table_st } else if (OB_FAIL(hidden_blob.set_orig_default_value(xml_default))) { LOG_WARN("fail to set orig default value", K(xml_default), K(ret)); } -#else - ret = OB_NOT_SUPPORTED; - LOG_WARN("xml type not supported in opensource version", K(ret), K(orig_default_value)); -#endif } if (OB_FAIL(ret)) { diff --git a/src/sql/resolver/ddl/ob_ddl_resolver.cpp b/src/sql/resolver/ddl/ob_ddl_resolver.cpp index 766d787593..ab45ac3090 100644 --- a/src/sql/resolver/ddl/ob_ddl_resolver.cpp +++ b/src/sql/resolver/ddl/ob_ddl_resolver.cpp @@ -2274,6 +2274,10 @@ int ObDDLResolver::resolve_table_option(const ParseNode *option_node, const bool ret = resolve_lob_inrow_threshold(option_node, is_index_option); break; } + case T_LOB_STORAGE_CLAUSE : { + ret = resolve_lob_storage_parameters(option_node); + break; + } default: { /* won't be here */ ret = OB_ERR_UNEXPECTED; @@ -3531,6 +3535,13 @@ int ObDDLResolver::resolve_normal_column_attribute(ObColumnSchemaV2 &column, } break; } + case T_LOB_CHUNK_SIZE: + case T_CONSTR_LOB_CHUNK_SIZE: { + if (OB_FAIL(resolve_lob_chunk_size(column, *attr_node))) { + SQL_RESV_LOG(WARN, "fail to resolve lob chunk size", K(ret), K(attr_node->type_)); + } + break; + } default: // won't be here ret = OB_ERR_PARSER_SYNTAX; SQL_RESV_LOG(WARN, "Wrong column attribute", K(ret), K(attr_node->type_)); @@ -3858,6 +3869,201 @@ int ObDDLResolver::resolve_srid_node(share::schema::ObColumnSchemaV2 &column, return ret; } +int ObDDLResolver::resolve_lob_storage_parameters(const ParseNode *node) +{ + int ret = OB_SUCCESS; + uint64_t tenant_data_version = 0; + ObColumnSchemaV2 *column_schema = nullptr; + ObString column_name; + ObObjType type = ObObjType::ObNullType; + if (OB_ISNULL(schema_checker_) || OB_ISNULL(stmt_) || + OB_ISNULL(session_info_) || OB_ISNULL(node)) { + ret = OB_ERR_UNEXPECTED; + SQL_RESV_LOG(WARN, "unexpected null value", K(ret), K(schema_checker_), + K(stmt_), K(session_info_), K(node)); + } else if (OB_FAIL(GET_MIN_DATA_VERSION(session_info_->get_effective_tenant_id(), tenant_data_version))) { + SQL_RESV_LOG(WARN, "get tenant data version failed", KR(ret)); + } else if (! ((DATA_VERSION_4_2_2_0 <= tenant_data_version && tenant_data_version < DATA_VERSION_4_3_0_0) || tenant_data_version >= DATA_VERSION_4_3_1_0)) { + ret = OB_NOT_SUPPORTED; + SQL_RESV_LOG(WARN, "chunk size attribute not support in current version", KR(ret), K(tenant_data_version)); + LOG_USER_ERROR(OB_NOT_SUPPORTED, "chunk size attribute not support in current version"); + } else if (is_oracle_mode()) { + ret = OB_NOT_SUPPORTED; + SQL_RESV_LOG(WARN, "lob chunk size column attribute is not supported in oracle mode", KR(ret)); + LOG_USER_ERROR(OB_NOT_SUPPORTED, "chunk size column attribute in oracle mode"); + } else if (T_LOB_STORAGE_CLAUSE != node->type_) { + ret = OB_INVALID_ARGUMENT; + SQL_RESV_LOG(WARN, "invalid argument", KR(ret), K(node->type_)); + } else if (node->num_child_ != 3) { + ret = OB_ERR_UNEXPECTED; + SQL_RESV_LOG(WARN, "num_child_ not correct", K(ret), K(node->num_child_)); + } else if (OB_ISNULL(node->children_[0])) { + ret = OB_ERR_UNEXPECTED; + SQL_RESV_LOG(WARN, "type node is null", K(ret), K(node)); + } else if (OB_FALSE_IT(type = static_cast(node->children_[0]->value_))) { + } else if (! ob_is_json(type)) { + ret = OB_ERR_UNEXPECTED; + SQL_RESV_LOG(WARN, "type is not support", K(ret), K(type), K(node)); + } else if (OB_ISNULL(node->children_[1])) { + ret = OB_ERR_UNEXPECTED; + SQL_RESV_LOG(WARN, "column name node is null", K(ret), K(node)); + } else if (OB_FALSE_IT(column_name.assign_ptr(node->children_[1]->str_value_, node->children_[1]->str_len_))) { + } else if (OB_ISNULL(node->children_[2])) { + ret = OB_ERR_UNEXPECTED; + SQL_RESV_LOG(WARN, "param node is null", K(ret), K(node)); + } else if (node->children_[2]->num_child_ <= 0) { + ret = OB_ERR_UNEXPECTED; + SQL_RESV_LOG(WARN, "param node is empty", K(ret), K(node), K(node->children_[2]->num_child_)); + } else if (stmt::T_CREATE_TABLE == stmt_->get_stmt_type()) { + ObCreateTableStmt *create_table_stmt = static_cast(stmt_); + column_schema = const_cast(create_table_stmt->get_column_schema(column_name)); + } else if (stmt::T_ALTER_TABLE == stmt_->get_stmt_type()) { + ObAlterTableStmt *alter_table_stmt = static_cast(stmt_); + ObTableSchema &tbl_schema = alter_table_stmt->get_alter_table_schema(); + for (ObTableSchema::const_column_iterator iter = tbl_schema.column_begin(); + iter != tbl_schema.column_end() && nullptr == column_schema; ++iter) { + ObColumnSchemaV2 &column = (**iter); + if (column.get_column_name_str().case_compare(column_name) == 0) { + const AlterColumnSchema &alter_col_schema = static_cast(column); + if (alter_col_schema.alter_type_ != OB_DDL_ADD_COLUMN) { + ret = OB_NOT_SUPPORTED; + SQL_RESV_LOG(WARN, "lob chunk size column attribute is not supported modify", + KR(ret), K(alter_col_schema)); + } else { + column_schema = &column; + } + } + } + } else { + ret = OB_NOT_SUPPORTED; + SQL_RESV_LOG(WARN, "not supported statement for lob storage parameter", K(ret), K(stmt_->get_stmt_type())); + } + if (OB_FAIL(ret)) { + } else if (OB_ISNULL(column_schema)) { + ret = OB_INVALID_ARGUMENT; + SQL_RESV_LOG(WARN, "column not found", K(ret), K(type), K(column_name)); + } else if (column_schema->get_data_type() != type) { + ret = OB_INVALID_ARGUMENT; + SQL_RESV_LOG(WARN, "column type node match", K(ret), K(type), K(*column_schema)); + } else { + for (int64_t i = 0; i < node->children_[2]->num_child_ && OB_SUCC(ret); i ++) { + if (OB_FAIL(resolve_lob_storage_parameter(*column_schema, *node->children_[2]->children_[i]))) { + SQL_RESV_LOG(WARN, "resolve_lob_storage_parameter fail", K(ret), K(type), K(*column_schema)); + } + } + } + return ret; +} +int ObDDLResolver::resolve_lob_storage_parameter(share::schema::ObColumnSchemaV2 &column, const ParseNode ¶m_node) +{ + int ret = OB_SUCCESS; + switch (param_node.type_) { + case T_LOB_CHUNK_SIZE: + case T_CONSTR_LOB_CHUNK_SIZE: { + if (OB_FAIL(resolve_lob_chunk_size(column, param_node))) { + SQL_RESV_LOG(WARN, "fail to resolve lob meta size", K(ret)); + } + break; + } + default: { + ret = OB_ERR_UNEXPECTED; + SQL_RESV_LOG(WARN, "UnKnown type", K(ret), K(param_node.type_)); + break; + } + } + return ret; +} +int ObDDLResolver::resolve_lob_chunk_size(const ParseNode &size_node, int64_t &lob_chunk_size) +{ + int ret = OB_SUCCESS; + const char *str = nullptr; + int64_t len = 0; + int64_t value = 0; + bool valid = false; + ObString unit; + if (T_CONSTR_LOB_CHUNK_SIZE != size_node.type_ && T_LOB_CHUNK_SIZE != size_node.type_) { + ret = OB_INVALID_ARGUMENT; + SQL_RESV_LOG(WARN, "invalid argument", KR(ret), K(size_node.type_)); + } else if (size_node.num_child_ != 1 + || OB_ISNULL(size_node.children_[0])) { + ret = OB_ERR_UNEXPECTED; + SQL_RESV_LOG(WARN, "invalid node", KR(ret)); + } else if (T_LOB_CHUNK_SIZE == size_node.type_) { + lob_chunk_size = size_node.children_[0]->value_ * 1024; + } else if (OB_ISNULL(str = size_node.children_[0]->str_value_)) { + ret = OB_INVALID_ARGUMENT; + SQL_RESV_LOG(WARN, "size node value is null", KR(ret), K(size_node.type_)); + } else { + int64_t i = 0; + len = size_node.children_[0]->str_len_; + // calc integer part + for (; i < len; ++i) { + char c = str[i]; + if (isdigit(c)) { + value = value * 10 + (c - '0'); + } else { + break; + } + } + // cacl unit part, only support kb + unit.assign_ptr(str + i, len - i); + if (i >= len) { + // if no unit, use kb as unit + valid = true; + value <<= 10; + } else if (0 == unit.case_compare("kb") + || 0 == unit.case_compare("k")) { + value <<= 10; + valid = true; + } + if (valid) { + lob_chunk_size = value; + } else { + ret = OB_INVALID_ARGUMENT; + SQL_RESV_LOG(WARN, "size input is invalid", KR(ret), K(ObString(len, str))); + } + } + if (OB_FAIL(ret)) { + } else if (lob_chunk_size < OB_MIN_LOB_CHUNK_SIZE || lob_chunk_size > OB_MAX_LOB_CHUNK_SIZE) { + ret = OB_INVALID_ARGUMENT; + SQL_RESV_LOG(WARN, "lob meta size invalid", KR(ret), K(lob_chunk_size)); + LOG_USER_ERROR(OB_INVALID_ARGUMENT, "invalid CHUNK LOB storage option value"); + } + return ret; +} +int ObDDLResolver::resolve_lob_chunk_size( + share::schema::ObColumnSchemaV2 &column, + const ParseNode &lob_chunk_size_node) +{ + int ret = OB_SUCCESS; + int64_t lob_chunk_size = 0; + uint64_t tenant_id = session_info_->get_effective_tenant_id(); + uint64_t tenant_data_version = 0; + if (OB_FAIL(GET_MIN_DATA_VERSION(tenant_id, tenant_data_version))) { + SQL_RESV_LOG(WARN, "get tenant data version failed", KR(ret)); + } else if (! ((DATA_VERSION_4_2_2_0 <= tenant_data_version && tenant_data_version < DATA_VERSION_4_3_0_0) || tenant_data_version >= DATA_VERSION_4_3_1_0)) { + ret = OB_NOT_SUPPORTED; + SQL_RESV_LOG(WARN, "lob chunk size column attribute is not supported in oracle mode", + KR(ret), K(tenant_data_version)); + LOG_USER_ERROR(OB_NOT_SUPPORTED, "chunk size attribute not support in current version"); + } else if (is_oracle_mode()) { + ret = OB_NOT_SUPPORTED; + SQL_RESV_LOG(WARN, "lob chunk size column attribute is not supported in oracle mode", + KR(ret), K(lob_chunk_size_node.type_)); + LOG_USER_ERROR(OB_NOT_SUPPORTED, "chunk size column attribute in oracle mode"); + } else if (! column.is_json()) { + ret = OB_NOT_SUPPORTED; + SQL_RESV_LOG(WARN, "lob chunk size column attribute is only supported for json", + KR(ret), K(lob_chunk_size_node.type_), K(column)); + LOG_USER_ERROR(OB_NOT_SUPPORTED, "chunk size column attribute only supported for json, others"); + } else if (OB_FAIL(resolve_lob_chunk_size(lob_chunk_size_node, lob_chunk_size))) { + SQL_RESV_LOG(WARN, "resolve size node fail", KR(ret)); + } else { + column.set_lob_chunk_size(lob_chunk_size); + } + return ret; +} + int ObDDLResolver::resolve_lob_inrow_threshold(const ParseNode *option_node, const bool is_index_option) { int ret = OB_SUCCESS; diff --git a/src/sql/resolver/ddl/ob_ddl_resolver.h b/src/sql/resolver/ddl/ob_ddl_resolver.h index 001a12cb21..2cf6fa3772 100644 --- a/src/sql/resolver/ddl/ob_ddl_resolver.h +++ b/src/sql/resolver/ddl/ob_ddl_resolver.h @@ -564,6 +564,11 @@ protected: int check_skip_index(share::schema::ObTableSchema &table_schema); int resolve_lob_inrow_threshold(const ParseNode *option_node, const bool is_index_option); + int resolve_lob_storage_parameters(const ParseNode *node); + int resolve_lob_storage_parameter(share::schema::ObColumnSchemaV2 &column, const ParseNode ¶m_node); + int resolve_lob_chunk_size(const ParseNode &size_node, int64_t &lob_chunk_size); + int resolve_lob_chunk_size(share::schema::ObColumnSchemaV2 &column, const ParseNode &lob_chunk_size_node); + /* int resolve_generated_column_definition( share::schema::ObColumnSchemaV2 &column, diff --git a/src/sql/resolver/dml/ob_del_upd_resolver.cpp b/src/sql/resolver/dml/ob_del_upd_resolver.cpp index 6a70619871..2a16588acd 100644 --- a/src/sql/resolver/dml/ob_del_upd_resolver.cpp +++ b/src/sql/resolver/dml/ob_del_upd_resolver.cpp @@ -234,6 +234,10 @@ int ObDelUpdResolver::resolve_assignments(const ParseNode &parse_node, } } } + if (OB_FAIL(ret)) { + } else if (OB_FAIL(resolve_json_partial_update_flag(table_assigns, scope))) { + LOG_WARN("resolve_json_partial_update_flag fail", K(ret)); + } } return ret; } @@ -911,7 +915,7 @@ int ObDelUpdResolver::set_base_table_for_updatable_view(TableItem &table_item, ret = dml->is_insert_stmt() ? OB_ERR_NON_INSERTABLE_TABLE : OB_ERR_NON_UPDATABLE_TABLE; LOG_WARN("view is not updatable", K(ret)); } else if (new_table_item->is_json_table()) { - ret = OB_ERR_NON_INSERTABLE_TABLE; + ret = is_mysql_mode() ? OB_ERR_NON_INSERTABLE_TABLE : OB_ERR_VIRTUAL_COL_NOT_ALLOWED; LOG_WARN("json table can not be insert", K(ret)); } else { ret = OB_ERR_UNEXPECTED; @@ -4637,5 +4641,68 @@ int ObDelUpdResolver::check_need_match_all_params(const common::ObIArray &table_assigns, ObStmtScope scope) +{ + INIT_SUCC(ret); + if (T_UPDATE_SCOPE == scope) { + bool need_partial_update = false; + if (OB_ISNULL(session_info_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("session is NULL", K(ret)); + } else { + ObString option = session_info_->get_log_row_value_option(); + need_partial_update = option.case_compare(OB_LOG_ROW_VALUE_PARTIAL_JSON) == 0 + || option.case_compare(OB_LOG_ROW_VALUE_PARTIAL_ALL) == 0; + } + for (int64_t i = 0; OB_SUCC(ret) && need_partial_update && i < table_assigns.count(); ++i) { + ObTableAssignment &table_assign = table_assigns.at(i); + for (int64_t j = 0; OB_SUCC(ret) && j < table_assign.assignments_.count(); ++j) { + ObAssignment &assign = table_assign.assignments_.at(j); + bool allow_json_partial_update = false; + if (OB_FAIL(mark_json_partial_update_flag(assign.column_expr_, assign.expr_, 0, allow_json_partial_update))) { + LOG_WARN("mark_json_partial_update_flag fail", K(ret), K(table_assign), K(assign)); + } + } + } + } + return ret; +} +int ObDelUpdResolver::mark_json_partial_update_flag(const ObColumnRefRawExpr *ref_expr, ObRawExpr *expr, int depth, bool &allow_json_partial_update) +{ + INIT_SUCC(ret); + ObItemType expr_type = expr->get_expr_type(); + if (OB_ISNULL(ref_expr) || OB_ISNULL(expr)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("ref_expr or expr is NULL", K(ret), KP(ref_expr), KP(expr)); + } else if (! ob_is_json(ref_expr->get_data_type())) { + } else if (expr->is_column_ref_expr()) { + allow_json_partial_update = ObRawExprUtils::is_same_column_ref(ref_expr, expr); + } else if (T_FUN_COLUMN_CONV == expr_type) { + if (0 == depth && ob_is_json(expr->get_data_type()) && ob_is_json(expr->get_param_expr(4)->get_data_type())) { + ObRawExpr *json_expr = expr->get_param_expr(4); + if (OB_FAIL(mark_json_partial_update_flag(ref_expr, json_expr, depth + 1, allow_json_partial_update))) { + LOG_WARN("mark_json_partial_update_flag fail", K(ret), KP(ref_expr), KP(expr), KP(json_expr)); + } else if (allow_json_partial_update) { + json_expr->set_extra(OB_JSON_PARTIAL_UPDATE_LAST_EXPR | json_expr->get_extra()); + } + } + } else if (expr_type != T_FUN_SYS_JSON_REPLACE + && expr_type != T_FUN_SYS_JSON_SET + && expr_type != T_FUN_SYS_JSON_REMOVE) { + } else if (OB_FAIL(mark_json_partial_update_flag(ref_expr, expr->get_param_expr(0), depth + 1, allow_json_partial_update))) { + LOG_WARN("mark fail", K(ret)); + } else if (allow_json_partial_update) { + expr->set_extra(OB_JSON_PARTIAL_UPDATE_ALLOW | expr->get_extra()); + if (depth == 0) { + expr->set_extra(OB_JSON_PARTIAL_UPDATE_LAST_EXPR | expr->get_extra()); + } + if (expr->get_param_expr(0)->is_column_ref_expr()) { + expr->set_extra(OB_JSON_PARTIAL_UPDATE_FIRST_EXPR | expr->get_extra()); + } + } + return ret; +} + } /* namespace sql */ } /* namespace oceanbase */ diff --git a/src/sql/resolver/dml/ob_del_upd_resolver.h b/src/sql/resolver/dml/ob_del_upd_resolver.h index c85129d188..840f6a4651 100644 --- a/src/sql/resolver/dml/ob_del_upd_resolver.h +++ b/src/sql/resolver/dml/ob_del_upd_resolver.h @@ -256,6 +256,8 @@ protected: int add_default_sequence_id_to_stmt(const uint64_t table_id); int recursive_search_sequence_expr(const ObRawExpr *default_expr); int check_need_match_all_params(const common::ObIArray &value_desc, bool &need_match); + int resolve_json_partial_update_flag(ObIArray &table_assigns, ObStmtScope scope); + int mark_json_partial_update_flag(const ObColumnRefRawExpr *ref_expr, ObRawExpr *expr, int depth, bool &allow_json_partial_update); private: common::hash::ObPlacementHashSet insert_column_ids_; bool is_column_specify_; diff --git a/src/sql/resolver/dml/ob_dml_resolver.cpp b/src/sql/resolver/dml/ob_dml_resolver.cpp index b21e8fcb1d..7b543ef0b7 100755 --- a/src/sql/resolver/dml/ob_dml_resolver.cpp +++ b/src/sql/resolver/dml/ob_dml_resolver.cpp @@ -61,6 +61,7 @@ #include "share/stat/ob_opt_ds_stat.h" #include "lib/udt/ob_udt_type.h" #include "sql/resolver/dml/ob_insert_resolver.h" +#include "lib/xml/ob_path_parser.h" namespace oceanbase { @@ -2039,7 +2040,8 @@ static int check_is_pl_jsontype(const oceanbase::pl::ObUserDefinedType *user_typ if (OB_ISNULL(user_type)) { } else if (user_type->get_type() == oceanbase::pl::PL_OPAQUE_TYPE) { if (user_type->get_name().compare("JSON_OBJECT_T") == 0 - || user_type->get_name().compare("JSON_ELEMENT_T") == 0) { + || user_type->get_name().compare("JSON_ELEMENT_T") == 0 + || user_type->get_name().compare("JSON_ARRAY_T") == 0) { ret = OB_ERR_PL_JSONTYPE_USAGE; LOG_WARN("invalid pl json type userage in pl/sql", K(ret), K(user_type->get_type()), K(user_type->get_user_type_id())); @@ -4128,11 +4130,13 @@ int ObDMLResolver::resolve_table(const ParseNode &parse_tree, OZ (resolve_function_table_item(*table_node, table_item)); break; } - case T_JSON_TABLE_EXPRESSION: { + case T_JSON_TABLE_EXPRESSION: + case T_XML_TABLE_EXPRESSION: { if (OB_ISNULL(session_info_)) { ret = OB_INVALID_ARGUMENT; LOG_WARN("invalid argument", K(ret)); - } else if (lib::is_mysql_mode() && GET_MIN_CLUSTER_VERSION() < DATA_VERSION_4_2_1_0) { + } else if (lib::is_mysql_mode() && T_JSON_TABLE_EXPRESSION == table_node->type_ + && GET_MIN_CLUSTER_VERSION() < DATA_VERSION_4_2_1_0) { ret = OB_INVALID_ARGUMENT; LOG_WARN("json table in mysql mode not support before 4.2.1", K(ret), K(GET_MIN_CLUSTER_VERSION())); } @@ -5013,6 +5017,55 @@ int ObDMLResolver::resolve_generate_table_item(ObSelectStmt *ref_query, return ret; } +int ObDMLResolver::resolve_str_const(const ParseNode &parse_tree, ObString& path_str) +{ + INIT_SUCC(ret); + ObObjParam val; + char *buf = NULL; + ObString literal_prefix; + bool is_paramlize = false; + ObExprInfo parents_expr_info; + const ObLengthSemantics default_length_semantics = (OB_NOT_NULL(session_info_) ? session_info_->get_actual_nls_length_semantics() : LS_BYTE); + const ObSQLSessionInfo *session_info = session_info_; + int64_t server_collation = CS_TYPE_INVALID; + ObCollationType nation_collation = OB_NOT_NULL(session_info_) ? session_info_->get_nls_collation_nation() : CS_TYPE_INVALID; + ObCollationType collation_connection = CS_TYPE_INVALID; + ObCharsetType character_set_connection = CHARSET_INVALID; + if (OB_ISNULL(params_.expr_factory_) || OB_ISNULL(params_.session_info_)) { + ret = OB_NOT_INIT; + LOG_WARN("resolve status is invalid", K_(params_.expr_factory), K_(params_.session_info)); + } else if (OB_FAIL(params_.session_info_->get_collation_connection(collation_connection))) { + LOG_WARN("fail to get collation_connection", K(ret)); + } else if (OB_FAIL(params_.session_info_->get_character_set_connection(character_set_connection))) { + LOG_WARN("fail to get character_set_connection", K(ret)); + } else if (lib::is_oracle_mode() && nullptr == session_info) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("session info is null", K(ret)); + } else if (lib::is_oracle_mode() && OB_FAIL( + session_info->get_sys_variable(share::SYS_VAR_COLLATION_SERVER, server_collation))) { + LOG_WARN("get sys variables failed", K(ret)); + } else if (OB_FAIL(ObResolverUtils::resolve_const(&parse_tree, + // stmt_type is only used in oracle mode + lib::is_oracle_mode() ? session_info->get_stmt_type() : stmt::T_NONE, + params_.expr_factory_->get_allocator(), + collation_connection, nation_collation, TZ_INFO(params_.session_info_), + val, is_paramlize, + literal_prefix, + default_length_semantics, + static_cast(server_collation), + &parents_expr_info, + session_info->get_sql_mode(), + nullptr != params_.secondary_namespace_))) { + LOG_WARN("failed to resolve const", K(ret)); + } else if (OB_ISNULL(buf = static_cast(allocator_->alloc(val.get_string().length())))) { // deep copy str value + ret = common::OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("failed to allocate memory", K(ret), K(buf)); + } else { + MEMCPY(buf, val.get_string().ptr(), val.get_string().length()); + path_str.assign_ptr(buf, val.get_string().length()); + } + return ret; +} int ObDMLResolver::resolve_json_table_item(const ParseNode &parse_tree, TableItem *&tbl_item) { @@ -5025,17 +5078,37 @@ int ObDMLResolver::resolve_json_table_item(const ParseNode &parse_tree, TableIte int32_t id = 0; ObRawExpr *error_expr = NULL; ObRawExpr *empty_expr = NULL; + ParseNode *doc_node = NULL; + ParseNode *path_node = NULL; + ParseNode *alias_node = NULL; + ParseNode *on_err_seq_node = NULL; + ParseNode *chil_col_node = NULL; + ParseNode *namespace_node = NULL; + + if (T_JSON_TABLE_EXPRESSION == parse_tree.type_ && 5 == parse_tree.num_child_) { + doc_node = parse_tree.children_[0]; + path_node = parse_tree.children_[1]; + alias_node = parse_tree.children_[4]; + on_err_seq_node = parse_tree.children_[2]; + // namespace_node = parse_tree.children_[5]; + chil_col_node =parse_tree.children_[3]; + } else if (T_XML_TABLE_EXPRESSION == parse_tree.type_ && 6 == parse_tree.num_child_ && OB_NOT_NULL(parse_tree.children_[0])) { + doc_node = parse_tree.children_[2]; + path_node = parse_tree.children_[1]; + alias_node = parse_tree.children_[5]; + on_err_seq_node = parse_tree.children_[3]; + namespace_node = parse_tree.children_[0]; + chil_col_node =parse_tree.children_[4]; + } else { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("table type not support ot param num mismatch", K(parse_tree.type_), K(parse_tree.num_child_)); + } - CK (OB_LIKELY(T_JSON_TABLE_EXPRESSION == parse_tree.type_)); - CK (OB_LIKELY(5 == parse_tree.num_child_)); CK (OB_NOT_NULL(parse_tree.children_[0])); CK (OB_NOT_NULL(parse_tree.children_[1])); CK (OB_NOT_NULL(parse_tree.children_[2])); CK (OB_NOT_NULL(parse_tree.children_[3])); - ParseNode *doc_node = parse_tree.children_[0]; - ParseNode *path_node = parse_tree.children_[1]; - // json document node if (OB_FAIL(ret)) { } else if ((OB_ISNULL(stmt) || OB_ISNULL(allocator_))) { @@ -5060,6 +5133,14 @@ int ObDMLResolver::resolve_json_table_item(const ParseNode &parse_tree, TableIte } else { table_def = static_cast(new (table_buf) ObJsonTableDef()); table_def->doc_expr_ = json_doc_expr; + if (T_JSON_TABLE_EXPRESSION == parse_tree.type_) { + table_def->table_type_ = MulModeTableType::OB_ORA_JSON_TABLE_TYPE; + } else if (T_XML_TABLE_EXPRESSION == parse_tree.type_) { + table_def->table_type_ = MulModeTableType::OB_ORA_XML_TABLE_TYPE; + } else { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unknow table function", K(ret), K(parse_tree.type_)); + } } } @@ -5067,7 +5148,6 @@ int ObDMLResolver::resolve_json_table_item(const ParseNode &parse_tree, TableIte ObDmlJtColDef* root_col_def = NULL; if (OB_SUCC(ret)) { ObString alias_name; - ParseNode *alias_node = parse_tree.children_[4]; if (lib::is_mysql_mode() && OB_ISNULL(alias_node)) { ret = OB_ERR_TABLE_WITHOUT_ALIAS; LOG_WARN("table function need alias", K(ret)); @@ -5104,25 +5184,31 @@ int ObDMLResolver::resolve_json_table_item(const ParseNode &parse_tree, TableIte ObString path_str; if (path_node->type_ == T_NULL) { path_str = ObString("$"); - } else { - path_str.assign_ptr(path_node->str_value_, path_node->str_len_); + } else if (OB_FAIL(resolve_str_const(*path_node, path_str))) { + LOG_WARN("fail to resolve json path", K(ret)); + } + if (OB_SUCC(ret) + && table_def->table_type_ == MulModeTableType::OB_ORA_XML_TABLE_TYPE) { // xmltable check xpath + if (path_str.length() == 0 || path_node->type_ == T_NULL) { + ret = OB_ERR_LACK_XQUERY_LITERAL; + LOG_WARN("xmltable need xquery literal", K(ret)); + } } ObIAllocator& alloc_ref = *allocator_; - if (OB_FAIL(ob_write_string(alloc_ref, path_str, root_col_def->col_base_info_.path_))) { + if (OB_SUCC(ret) && OB_FAIL(ob_write_string(alloc_ref, path_str, root_col_def->col_base_info_.path_))) { LOG_WARN("failed to write string.", K(ret), K(path_str.length())); } } // error node process - if (OB_SUCC(ret)) { - ParseNode *on_err_node = parse_tree.children_[2]; - if (on_err_node->num_child_ != 2) { + if (OB_SUCC(ret) && T_JSON_TABLE_EXPRESSION == parse_tree.type_) { + if (on_err_seq_node->num_child_ != 2) { ret = OB_INVALID_ARGUMENT; - LOG_WARN("failed to resolve json table error node count not correct", K(ret), K(on_err_node->num_child_)); + LOG_WARN("failed to resolve json table error node count not correct", K(ret), K(on_err_seq_node->num_child_)); } else { - ParseNode *error_node = on_err_node->children_[0]; - ParseNode *empty_node = on_err_node->children_[1]; + ParseNode *error_node = on_err_seq_node->children_[0]; + ParseNode *empty_node = on_err_seq_node->children_[1]; if (OB_ISNULL(error_node) || OB_ISNULL(empty_node)) { ret = OB_ERR_UNEXPECTED; @@ -5170,18 +5256,82 @@ int ObDMLResolver::resolve_json_table_item(const ParseNode &parse_tree, TableIte } } } + // xmltable sequence & namespace node + if (OB_SUCC(ret) && T_XML_TABLE_EXPRESSION == parse_tree.type_) { + root_col_def->col_base_info_.allow_scalar_ = on_err_seq_node->value_; + root_col_def->col_base_info_.on_empty_ = 1; + root_col_def->col_base_info_.on_error_ = 0; + if (OB_FAIL(resolve_xml_namespaces(namespace_node, table_def))) { + LOG_WARN("fail to resolve xml namespace", K(ret)); + } + } // column node process if (OB_FAIL(ret)) { } else if (OB_FAIL(json_table_infos_.push_back(root_col_def))) { LOG_WARN("failed to push back column info", K(ret)); - } else if (OB_FAIL(resolve_json_table_column_item(*parse_tree.children_[3], item, + } else if (OB_FAIL(resolve_json_table_column_item(*chil_col_node, item, root_col_def, -1, id, cur_column_id))) { LOG_WARN("failed to resovle json table column item", K(ret)); } return ret; } +int ObDMLResolver::resolve_xml_namespaces(const ParseNode *namespace_node, ObJsonTableDef*& table_def) +{ + INIT_SUCC(ret); + ObString t_str; + common::hash::ObHashSet key_ns; + bool has_default = false; + int64_t bucket_num = (namespace_node->num_child_ / 2) + 1; + if (namespace_node->type_ == T_NULL) { // ns is null + } else if (namespace_node->num_child_ > 0 && OB_FAIL(key_ns.create(bucket_num))) { + LOG_WARN("init hash failed", K(ret), K(bucket_num)); + } else { + for (int i = 0; OB_SUCC(ret) && i < namespace_node->num_child_; i++) { + if (i % 2 == 1 && namespace_node->children_[i]->type_ == T_NULL && namespace_node->children_[i]->value_ == 1) { + if (has_default) { + ret = OB_ERR_DUP_DEF_NAMESPACE; + ObString str_def(namespace_node->children_[i]->text_len_, namespace_node->children_[i]->raw_text_); + LOG_USER_ERROR(OB_ERR_DUP_DEF_NAMESPACE, str_def.ptr()); + LOG_WARN("can not input one more default ns", K(ret)); + } else { + t_str.assign_ptr("", 0); + has_default = true; + } + } else if (i % 2 == 1 && namespace_node->children_[i]->text_len_ == 0) { + ret = OB_ERR_KEY_COLUMN_DOES_NOT_EXITS; + LOG_WARN("can not input without ns prefix", K(ret)); + } else { + if (i % 2 == 0) { // drop single quote + size_t path_len = namespace_node->children_[i]->text_len_; + char* str_buf = static_cast(allocator_->alloc(path_len - 2)); + if (OB_ISNULL(str_buf)) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("allocate memory failed", K(ret), K(path_len)); + } else { + MEMCPY(str_buf, namespace_node->children_[i]->raw_text_ + 1, path_len - 2); + t_str.assign_ptr(str_buf, path_len - 2); + } + } else { + t_str.assign_ptr(namespace_node->children_[i]->raw_text_, namespace_node->children_[i]->text_len_); + } + } + if (OB_FAIL(ret)) { + } else if (i % 2 == 1 && OB_HASH_EXIST == key_ns.exist_refactored(t_str)) { + ret = OB_ERR_TOO_MANY_PREFIX_DECLARE; + LOG_USER_ERROR(OB_ERR_TOO_MANY_PREFIX_DECLARE, t_str.length(), t_str.ptr()); + LOG_WARN("duplicate key", K(ret)); + } else if (i % 2 == 1 && OB_FAIL(key_ns.set_refactored(t_str, 0))) { + LOG_WARN("store key to vector failed", K(ret), K(key_ns.size())); + } else if (OB_FAIL(table_def->namespace_arr_.push_back(t_str))) { + LOG_WARN("failed push string in namespace", K(ret), K(t_str), K(i)); + } + } + } + return ret; +} + int ObDMLResolver::resolve_function_table_item(const ParseNode &parse_tree, TableItem *&tbl_item) { @@ -9249,7 +9399,8 @@ int ObDMLResolver::resolve_function_table_column_item(const TableItem &table_ite int ObDMLResolver::json_table_make_json_path(const ParseNode &parse_tree, ObIAllocator* allocator, - ObString& path_str) + ObString& path_str, + MulModeTableType table_type) { int ret = OB_SUCCESS; if (OB_ISNULL(allocator)) { @@ -9297,21 +9448,121 @@ int ObDMLResolver::json_table_make_json_path(const ParseNode &parse_tree, } } } else { - char* str_buf = static_cast(allocator->alloc(parse_tree.text_len_ + 3)); + if (table_type == MulModeTableType::OB_ORA_JSON_TABLE_TYPE) { + char* str_buf = static_cast(allocator->alloc(parse_tree.text_len_ + 3)); + if (OB_ISNULL(str_buf)) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("allocate memory failed", K(ret), K(parse_tree.str_len_)); + } else { + MEMCPY(str_buf, "$.", 2); + str_buf[parse_tree.text_len_ + 2] = '\0'; + if (parse_tree.text_len_ > 0 + && (parse_tree.raw_text_[0] == '\'' && parse_tree.raw_text_[parse_tree.text_len_-1] == '\'')) { + MEMCPY(str_buf + 2, parse_tree.raw_text_ + 1, parse_tree.text_len_ - 1); + str_buf[parse_tree.text_len_] = '\0'; + } else { + MEMCPY(str_buf + 2, parse_tree.raw_text_, parse_tree.text_len_); + } + path_str.assign_ptr(str_buf, strlen(str_buf)); + } + } else if (table_type == MulModeTableType::OB_ORA_XML_TABLE_TYPE) { + if (parse_tree.str_len_ == 0) { + ret = OB_ERR_LACK_XQUERY_LITERAL; + LOG_WARN("xmltable need xquery literal", K(ret)); + } else { + char* str_buf = static_cast(allocator->alloc(parse_tree.text_len_)); + if (OB_ISNULL(str_buf)) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("allocate memory failed", K(ret), K(parse_tree.str_len_)); + } else { + MEMCPY(str_buf, parse_tree.raw_text_, parse_tree.text_len_); + for (int64_t i = 0; i < parse_tree.text_len_; i ++) { + str_buf[i] = toupper(str_buf[i]); + } + path_str.assign_ptr(str_buf, parse_tree.text_len_); + } + } + } else { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("INVALID table function", K(ret), K(table_type)); + } + } + return ret; +} + +int ObDMLResolver::resolve_table_func_path(ObIAllocator* allocator, + ObString& path_str, + MulModeTableType table_type) +{ + INIT_SUCC(ret); + if (table_type == OB_ORA_XML_TABLE_TYPE) { // if path not begin with '/', then add '/' in front + ObString root_path = json_table_infos_.at(0)->col_base_info_.path_; // root path + ObString header; + int len = root_path.length() - 1; + if (path_str[0] != '/') { + while(len >= 0 && root_path[len] != '/'){ + len --; + } + char* str_buf = static_cast(allocator->alloc(root_path.length() - len + 1 + path_str.length())); + if (OB_ISNULL(str_buf)) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("allocate memory failed", K(ret), K(root_path.length() - len + 1 + path_str.length())); + } else { + str_buf[0] = '/'; + MEMCPY(str_buf + 1, root_path.ptr() + (len + 1), root_path.length() - (len + 1)); + str_buf[root_path.length() - len] = '/'; + MEMCPY(str_buf + root_path.length() - len + 1, path_str.ptr(), path_str.length()); + path_str.assign_ptr(str_buf, (root_path.length() - len + 1 + path_str.length())); + } + } + } + return ret; +} + +bool check_xpath_need_transform(ObString& path) +{ + bool res = true; + size_t len = path.length(); + size_t l = 0; + if ((path[0] >= 'a' && path[0] <= 'z') + || (path[0] >= 'A' && path[0] <= 'Z') + || path[0] >= '_') { + } else { + res = false; + } + while(res && l < len + && !ObPathParserUtil::is_xpath_transform_terminator(path[l])) { + if (path[l] == ObPathItem::BRACE_START || path[l] == ObPathItem::COLON) { // exist item method, not transform + res = false; + } + l ++; + } + if (res && l == 0) { // first char is '/', ignore + res = false; + } + return res; +} + +// xmltype need transform xpath 'a' -> '/a' +int ObDMLResolver::check_xpath_in_xmltype(ObDmlJtColDef *col_def, + const ObDataType &data_type) +{ + INIT_SUCC(ret); + if (!ob_is_xml_sql_type(data_type.get_obj_type(), data_type.get_meta_type().get_subschema_id())) { + } else if (col_def->col_base_info_.path_.length() == 0) { + ret = OB_ERR_LACK_XQUERY_LITERAL; + LOG_WARN("xmltable need xquery literal", K(ret)); + } else if (json_table_infos_.at(0)->col_base_info_.path_ == "/" + && check_xpath_need_transform(col_def->col_base_info_.path_)) { + int64_t len = col_def->col_base_info_.path_.length() + 1; + char* str_buf = static_cast(allocator_->alloc(len)); if (OB_ISNULL(str_buf)) { ret = OB_ALLOCATE_MEMORY_FAILED; - LOG_WARN("allocate memory failed", K(ret), K(parse_tree.str_len_)); + LOG_WARN("allocate memory failed", K(ret), K(col_def->col_base_info_.path_)); } else { - MEMCPY(str_buf, "$.", 2); - str_buf[parse_tree.text_len_ + 2] = '\0'; - if (parse_tree.text_len_ > 0 - && (parse_tree.raw_text_[0] == '\'' && parse_tree.raw_text_[parse_tree.text_len_-1] == '\'')) { - MEMCPY(str_buf + 2, parse_tree.raw_text_ + 1, parse_tree.text_len_ - 1); - str_buf[parse_tree.text_len_] = '\0'; - } else { - MEMCPY(str_buf + 2, parse_tree.raw_text_, parse_tree.text_len_); - } - path_str.assign_ptr(str_buf, strlen(str_buf)); + MEMCPY(str_buf, "/", 1); + MEMCPY(str_buf + 1, col_def->col_base_info_.path_.ptr(), col_def->col_base_info_.path_.length()); + col_def->col_base_info_.path_.assign_ptr(str_buf, len); } } return ret; @@ -9320,7 +9571,8 @@ int ObDMLResolver::json_table_make_json_path(const ParseNode &parse_tree, int ObDMLResolver::resolve_json_table_column_name_and_path(const ParseNode *name_node, const ParseNode *path_node, ObIAllocator* allocator, - ObDmlJtColDef *col_def) + ObDmlJtColDef *col_def, + MulModeTableType table_type) { int ret = OB_SUCCESS; if (OB_ISNULL(name_node) || OB_ISNULL(path_node) || OB_ISNULL(allocator) || OB_ISNULL(col_def)) { @@ -9334,20 +9586,28 @@ int ObDMLResolver::resolve_json_table_column_name_and_path(const ParseNode *name LOG_WARN("failed to resolve json column as name node is null", K(ret)); } else if (path_node->type_ != T_NULL && (path_node->str_len_ > 0 && OB_NOT_NULL(path_node->str_value_))) { - col_def->col_base_info_.path_.assign_ptr(path_node->str_value_, path_node->str_len_); - if (lib::is_mysql_mode()) { // do nothing - } else if (*path_node->str_value_ != '$' && path_node->value_ != 1 - && OB_FAIL(json_table_make_json_path(*path_node, allocator, col_def->col_base_info_.path_))) { + if ((path_node->type_ == T_CHAR || path_node->type_ == T_VARCHAR) + && OB_FAIL(resolve_str_const(*path_node, col_def->col_base_info_.path_))) { + LOG_WARN("fail to resolve path const", K(ret)); + } else if (lib::is_mysql_mode()) { // do nothing + (const_cast(path_node))->type_ = T_CHAR; + if (OB_FAIL(resolve_str_const(*path_node, col_def->col_base_info_.path_))) { + LOG_WARN("fail to resolve path const in mysql", K(ret)); + } + } else if (((table_type == OB_ORA_JSON_TABLE_TYPE && *path_node->str_value_ != '$' && path_node->value_ != 1)) + && OB_FAIL(json_table_make_json_path(*path_node, allocator, col_def->col_base_info_.path_, table_type))) { LOG_WARN("failed to make json path", K(ret)); } } else if (path_node->type_ == T_NULL - && OB_FAIL(json_table_make_json_path(*name_node, allocator, col_def->col_base_info_.path_))) { + && OB_FAIL(json_table_make_json_path(*name_node, allocator, col_def->col_base_info_.path_, table_type))) { LOG_WARN("failed to make json path by name", K(ret)); } else if (path_node->type_ == T_OBJ_ACCESS_REF - && OB_FAIL(json_table_make_json_path(*path_node, allocator, col_def->col_base_info_.path_))) { + && OB_FAIL(json_table_make_json_path(*path_node, allocator, col_def->col_base_info_.path_, table_type))) { LOG_WARN("failed to make json path by lists", K(ret)); + } else if (table_type == MulModeTableType::OB_ORA_XML_TABLE_TYPE && col_def->col_base_info_.path_.length() == 0) { + ret = OB_ERR_LACK_XQUERY_LITERAL; + LOG_WARN("xmltable need xquery literal", K(ret)); } - if (OB_SUCC(ret)) { if (lib::is_mysql_mode() && (name_node->str_value_[name_node->str_len_ - 1] == ' ')) { ret = OB_WRONG_COLUMN_NAME; @@ -9361,6 +9621,7 @@ int ObDMLResolver::resolve_json_table_column_name_and_path(const ParseNode *name return ret; } + int ObDMLResolver::resolve_json_table_check_dup_name(const ObJsonTableDef* table_def, const ObString& column_name, bool& exists) @@ -9402,7 +9663,7 @@ int ObDMLResolver::resolve_json_table_column_type(const ParseNode &parse_tree, obj_type = static_cast(parse_tree.type_); } - if (col_type == COL_TYPE_ORDINALITY) { + if (col_type == COL_TYPE_ORDINALITY || col_type == COL_TYPE_ORDINALITY_XML) { data_type.set_int(); data_type.set_accuracy(ObAccuracy::DDL_DEFAULT_ACCURACY[ObInt32Type]); } else if (col_type == COL_TYPE_QUERY && obj_type == ObJsonType) { @@ -9411,8 +9672,8 @@ int ObDMLResolver::resolve_json_table_column_type(const ParseNode &parse_tree, } else if (col_type == COL_TYPE_EXISTS || col_type == COL_TYPE_VALUE || col_type == COL_TYPE_QUERY - || col_type == COL_TYPE_QUERY_JSON_COL) { - + || col_type == COL_TYPE_QUERY_JSON_COL + || col_type == COL_TYPE_VAL_EXTRACT_XML) { if (OB_UNLIKELY(!ob_is_valid_obj_type(obj_type))) { ret = OB_INVALID_ARGUMENT; LOG_WARN("invalid obj type", K(ret), K(obj_type)); @@ -9432,15 +9693,19 @@ int ObDMLResolver::resolve_json_table_column_type(const ParseNode &parse_tree, convert_real_to_decimal))) { LOG_WARN("resolve data type failed", K(ret), K(col_def->col_base_info_.col_name_)); } else { + ObCharsetType charset_type = data_type.get_charset_type(); ObCollationType coll_type = data_type.get_collation_type(); - if (CS_TYPE_INVALID != coll_type) { + if (CS_TYPE_INVALID != coll_type && CHARSET_INVALID != charset_type) { data_type.set_collation_type(coll_type); + data_type.set_charset_type(charset_type); } else if (OB_ISNULL(session_info_)) { // use connection_collation. for cast('a' as char) ret = OB_ERR_UNEXPECTED; LOG_WARN("unexpected collation type", K(ret)); - } else if (OB_FAIL(session_info_->get_collation_connection(coll_type))) { + } else if (OB_FAIL(session_info_->get_collation_connection(coll_type)) + || OB_FAIL(session_info_->get_character_set_connection(charset_type))) { LOG_WARN("failed to get collation", K(ret)); } else { + data_type.set_charset_type(charset_type); data_type.set_collation_type(coll_type); } if (OB_SUCC(ret) && ob_is_json_tc(obj_type)) { @@ -9462,7 +9727,10 @@ int ObDMLResolver::resolve_json_table_column_type(const ParseNode &parse_tree, accuracy.set_length_semantics(length_semantics); ObObjTypeClass dest_tc = ob_obj_type_class(obj_type); - if (ObStringTC == dest_tc) { + if (ObExtendTC == dest_tc) { + ret = OB_ERR_INVALID_CAST_UDT; + LOG_WARN("invalid CAST to a type that is not a nested table or VARRAY", K(ret)); + } else if (ObStringTC == dest_tc) { if (parse_tree.length_semantics_ == LS_DEFAULT) { length_semantics = (OB_NOT_NULL(session_info_) ? session_info_->get_actual_nls_length_semantics() : LS_BYTE); @@ -9522,6 +9790,9 @@ int ObDMLResolver::resolve_json_table_column_type(const ParseNode &parse_tree, } } } + } else if (col_type == COL_TYPE_XMLTYPE_XML) { // not check, default returning xml type + data_type.set_obj_type(ObUserDefinedSQLType); + data_type.set_subschema_id(ObXMLSqlType); } else { ret = OB_INVALID_ARGUMENT; LOG_WARN("failed to resolve regular column type.", K(ret), KP(col_type)); @@ -9609,9 +9880,13 @@ int ObDMLResolver::resolve_json_table_regular_column(const ParseNode &parse_tree } else if (OB_FAIL(get_json_table_column_by_id(table_item->table_id_, root_col_def))) { LOG_WARN("internal error find jt column failed", K(ret)); } else if ((col_type == COL_TYPE_EXISTS && parse_tree.num_child_ != 5) || - (col_type == COL_TYPE_VALUE && parse_tree.num_child_ != 5) || - ((col_type == COL_TYPE_QUERY || col_type == COL_TYPE_QUERY_JSON_COL) && parse_tree.num_child_ != 7) || - (col_type == COL_TYPE_ORDINALITY && parse_tree.num_child_ != 2)) { + ((col_type == COL_TYPE_VALUE + || col_type == COL_TYPE_VAL_EXTRACT_XML + || col_type == COL_TYPE_XMLTYPE_XML) && parse_tree.num_child_ != 5) || + ((col_type == COL_TYPE_QUERY + || col_type == COL_TYPE_QUERY_JSON_COL) && parse_tree.num_child_ != 7) || + ((col_type == COL_TYPE_ORDINALITY + || col_type == COL_TYPE_ORDINALITY_XML) && parse_tree.num_child_ != 2)) { ret = OB_ERR_UNEXPECTED; LOG_WARN("fail to resolve json table regular column", K(ret), K(parse_tree.num_child_), K(col_type)); } else { @@ -9628,13 +9903,14 @@ int ObDMLResolver::resolve_json_table_regular_column(const ParseNode &parse_tree const ParseNode* return_type = parse_tree.children_[1]; col_def->col_base_info_.res_type_ = return_type->value_; - if (col_type == COL_TYPE_ORDINALITY) { + if (col_type == COL_TYPE_ORDINALITY || col_type == COL_TYPE_ORDINALITY_XML) { if (OB_ISNULL(name_node->str_value_) || name_node->str_len_ == 0 ) { ret = OB_INVALID_ARGUMENT; LOG_WARN("failed to resolve json column as name node is null", K(ret)); } else { col_def->col_base_info_.col_name_.assign_ptr(name_node->str_value_, name_node->str_len_); - col_def->col_base_info_.col_type_ = COL_TYPE_ORDINALITY; + col_def->col_base_info_.is_name_quoted_ = name_node->is_input_quoted_; + col_def->col_base_info_.col_type_ = col_type; col_def->col_base_info_.is_name_quoted_ = name_node->is_input_quoted_; } } else if (col_type == COL_TYPE_EXISTS) { @@ -9642,7 +9918,7 @@ int ObDMLResolver::resolve_json_table_regular_column(const ParseNode &parse_tree const ParseNode* on_err_node = parse_tree.children_[4]; const ParseNode* truncate_node = parse_tree.children_[2]; - if (OB_FAIL(resolve_json_table_column_name_and_path(name_node, path_node, allocator_, col_def))) { + if (OB_FAIL(resolve_json_table_column_name_and_path(name_node, path_node, allocator_, col_def, table_item->json_table_def_->table_type_))) { LOG_WARN("failed to resolve json column name node or path node", K(ret)); } else if (on_err_node->num_child_ != 2) { ret = OB_ERR_UNEXPECTED; @@ -9681,7 +9957,7 @@ int ObDMLResolver::resolve_json_table_regular_column(const ParseNode &parse_tree || OB_ISNULL(on_err_node->children_[2])) { ret = OB_ERR_UNEXPECTED; LOG_WARN("json table error empty mismatch is null", K(ret)); - } else if (OB_FAIL(resolve_json_table_column_name_and_path(name_node, path_node, allocator_, col_def))) { + } else if (OB_FAIL(resolve_json_table_column_name_and_path(name_node, path_node, allocator_, col_def, table_item->json_table_def_->table_type_))) { LOG_WARN("failed to resolve json column name node or path node", K(ret)); } else { col_def->col_base_info_.col_type_ = COL_TYPE_QUERY; @@ -9712,10 +9988,10 @@ int ObDMLResolver::resolve_json_table_regular_column(const ParseNode &parse_tree } else { // COL_TYPE_VALUE const ParseNode* trunc_node = parse_tree.children_[2]; const ParseNode* path_node = parse_tree.children_[3]; - col_def->col_base_info_.col_type_ = COL_TYPE_VALUE; + col_def->col_base_info_.col_type_ = col_type; col_def->col_base_info_.truncate_ = trunc_node->value_; - if (OB_FAIL(resolve_json_table_column_name_and_path(name_node, path_node, allocator_, col_def))) { + if (OB_FAIL(resolve_json_table_column_name_and_path(name_node, path_node, allocator_, col_def, table_item->json_table_def_->table_type_))) { LOG_WARN("failed to resolve json column name node or path node", K(ret)); } } @@ -9735,6 +10011,10 @@ int ObDMLResolver::resolve_json_table_regular_column(const ParseNode &parse_tree data_type, col_def))) { LOG_WARN("failed to resolve json column type.", K(ret)); + } else if (table_item->json_table_def_->table_type_ + == MulModeTableType::OB_ORA_XML_TABLE_TYPE + && OB_FAIL(check_xpath_in_xmltype(col_def, data_type))) { + LOG_WARN("fail to check xpath in xmltype column", K(ret)); } else if (OB_FAIL(generate_json_table_output_column_item(table_item, data_type, col_def->col_base_info_.col_name_, @@ -9751,9 +10031,12 @@ int ObDMLResolver::resolve_json_table_regular_column(const ParseNode &parse_tree col_def->col_base_info_.output_column_idx_ = cur_column_id++; if (OB_FAIL(ret)) { - } else if (OB_FAIL(check_json_table_column_constrain(col_def))) { + } else if (table_def->table_type_ == MulModeTableType::OB_ORA_JSON_TABLE_TYPE + && OB_FAIL(check_json_table_column_constrain(col_def))) { LOG_WARN("failed to check json column constrain.", K(ret)); - } else if (col_type == COL_TYPE_VALUE) { + } else if (col_type == COL_TYPE_VALUE + || col_type == COL_TYPE_VAL_EXTRACT_XML + || col_type == COL_TYPE_XMLTYPE_XML) { const ParseNode* on_err_node = parse_tree.children_[4]; // error default value @@ -9897,7 +10180,7 @@ int ObDMLResolver::resolve_json_table_nested_column(const ParseNode &parse_tree, // json table nested path syntax, not quoted: // nested path employees[*] columns ( name, job ) if (path_node->value_ == 2) { - if (OB_FAIL(json_table_make_json_path(*path_node, allocator_, col_def->col_base_info_.path_))) { + if (OB_FAIL(json_table_make_json_path(*path_node, allocator_, col_def->col_base_info_.path_, table_item->json_table_def_->table_type_))) { LOG_WARN("failed to make json path.", K(ret)); } } else if (OB_ISNULL(path_node->str_value_) || path_node->str_len_ == 0) { @@ -9906,7 +10189,7 @@ int ObDMLResolver::resolve_json_table_nested_column(const ParseNode &parse_tree, } else { if (path_node->str_value_[0] == '$') { col_def->col_base_info_.path_.assign_ptr(path_node->str_value_, path_node->str_len_); - } else if (OB_FAIL(json_table_make_json_path(*path_node, allocator_, col_def->col_base_info_.path_))) { + } else if (OB_FAIL(json_table_make_json_path(*path_node, allocator_, col_def->col_base_info_.path_, table_item->json_table_def_->table_type_))) { LOG_WARN("failed to make json path.", K(ret)); } } @@ -9996,7 +10279,10 @@ int ObDMLResolver::resolve_json_table_column_item(const ParseNode &parse_tree, col_type == COL_TYPE_QUERY || col_type == COL_TYPE_EXISTS || col_type == COL_TYPE_ORDINALITY || - col_type == COL_TYPE_QUERY_JSON_COL) { + col_type == COL_TYPE_ORDINALITY_XML || + col_type == COL_TYPE_QUERY_JSON_COL || + col_type == COL_TYPE_VAL_EXTRACT_XML || + col_type == COL_TYPE_XMLTYPE_XML) { if (OB_FAIL(resolve_json_table_regular_column(*cur_node, table_item, cur_col_def, cur_node_id, id, cur_column_id))) { LOG_WARN("resolve column defination in json table failed.", K(ret), K(cur_node->value_)); } else if (OB_ISNULL(cur_col_def)) { @@ -10018,7 +10304,7 @@ int ObDMLResolver::resolve_json_table_column_item(const ParseNode &parse_tree, } } else { ret = OB_ERR_UNEXPECTED; - LOG_WARN("bad column type defination in json table.", K(ret)); + LOG_WARN("bad column type defination in json table.", K(ret), K(cur_node->value_)); } } } @@ -10059,8 +10345,6 @@ int ObDMLResolver::resolve_json_table_column_all_items(const TableItem &table_it ObDMLStmt* stmt = get_stmt(); CK (OB_NOT_NULL(stmt)); - CK (OB_NOT_NULL(stmt)); - CK (OB_LIKELY(table_item.is_json_table())); if (OB_FAIL(ret)) { } else if (OB_ISNULL(jt_def = table_item.json_table_def_)) { ret = OB_ERR_UNEXPECTED; @@ -10134,7 +10418,8 @@ int ObDMLResolver::resolve_function_table_column_item_udf(const TableItem &table CK (OB_NOT_NULL(coll_type = static_cast(user_type))); if (OB_SUCC(ret) && !coll_type->get_element_type().is_obj_type() - && !coll_type->get_element_type().is_record_type()) { + && !coll_type->get_element_type().is_record_type() + && !(coll_type->get_element_type().is_opaque_type() && coll_type->get_element_type().get_user_type_id() == T_OBJ_XML)) { ret = OB_NOT_SUPPORTED; LOG_WARN("not supported udt type", K(ret), K(coll_type->get_user_type_id())); LOG_USER_ERROR(OB_NOT_SUPPORTED, "current udt type"); @@ -10156,6 +10441,29 @@ int ObDMLResolver::resolve_function_table_column_item_udf(const TableItem &table CK (OB_NOT_NULL(col_item)); OZ (col_items.push_back(*col_item)); } + + // The array element type is opaque + if (OB_SUCC(ret) && coll_type->get_element_type().is_opaque_type()) { + if (OB_FAIL(ret)) { // do nothing ... + } else if (NULL != (col_item = stmt->get_column_item(table_item.table_id_, ObString("COLUMN_VALUE")))) { + //exist, ignore resolve... + } else { + ObAccuracy accuracy = ObAccuracy(PRECISION_UNKNOWN_YET, SCALE_UNKNOWN_YET); + accuracy.set_accuracy(T_OBJ_XML); + common::ObObjMeta meta; + meta.set_type(ObUserDefinedSQLType); + meta.set_subschema_id(ObXMLSqlType); + const ObObjMeta input_meta = meta; + OZ (resolve_function_table_column_item(table_item, + input_meta, + accuracy, + ObString("COLUMN_VALUE"), + OB_APP_MIN_COLUMN_ID, + col_item)); + } + CK (OB_NOT_NULL(col_item)); + OZ (col_items.push_back(*col_item)); + } // 数组的元素类型是Object的情况, 此时应该输出多列 if (OB_SUCC(ret) && coll_type->get_element_type().is_record_type()) { ObPLPackageGuard package_guard(params_.session_info_->get_effective_tenant_id()); diff --git a/src/sql/resolver/dml/ob_dml_resolver.h b/src/sql/resolver/dml/ob_dml_resolver.h index c36a82b66d..927cc479e4 100644 --- a/src/sql/resolver/dml/ob_dml_resolver.h +++ b/src/sql/resolver/dml/ob_dml_resolver.h @@ -198,6 +198,7 @@ public: TableItem *&table_item); int resolve_json_table_item(const ParseNode &table_node, TableItem *&table_item); + int resolve_xml_namespaces(const ParseNode *namespace_node, ObJsonTableDef*& table_def); int fill_same_column_to_using(JoinedTable* &joined_table); int get_columns_from_table_item(const TableItem *table_item, common::ObIArray &column_names); @@ -209,11 +210,19 @@ public: ObRawExpr *&real_ref_expr); int json_table_make_json_path(const ParseNode &parse_tree, ObIAllocator* allocator, - ObString& path_str); + ObString& path_str, + MulModeTableType table_type); + int resolve_str_const(const ParseNode &parse_tree, ObString& path_str); + int resolve_table_func_path(ObIAllocator* allocator, + ObString& path_str, + MulModeTableType table_type); int resolve_json_table_column_name_and_path(const ParseNode *name_node, const ParseNode *path_node, ObIAllocator* allocator, - ObDmlJtColDef *col_def); + ObDmlJtColDef *col_def, + MulModeTableType table_type); + int check_xpath_in_xmltype(ObDmlJtColDef *col_def, + const ObDataType &data_type); int resolve_single_table_column_item(const TableItem &table_item, const common::ObString &column_name, bool include_hidden, diff --git a/src/sql/resolver/dml/ob_dml_stmt.cpp b/src/sql/resolver/dml/ob_dml_stmt.cpp index 2f777dcda4..eb52183286 100644 --- a/src/sql/resolver/dml/ob_dml_stmt.cpp +++ b/src/sql/resolver/dml/ob_dml_stmt.cpp @@ -849,11 +849,17 @@ int ObDMLStmt::iterate_stmt_expr(ObStmtExprVisitor &visitor) LOG_WARN("failed to visit temp expr", K(ret)); } } - if (OB_SUCC(ret) && NULL != column_items_.at(i).default_value_expr_) { + if (OB_FAIL(ret)) { + } else if (NULL != column_items_.at(i).default_value_expr_) { if (OB_FAIL(visitor.visit(column_items_.at(i).default_value_expr_, SCOPE_BASIC_TABLE))) { LOG_WARN("failed to visit temp expr", K(ret)); } + } else if (NULL != column_items_.at(i).default_empty_expr_) { + if (OB_FAIL(visitor.visit(column_items_.at(i).default_empty_expr_, + SCOPE_FROM))) { + LOG_WARN("failed to visit temp expr", K(ret)); + } // non_const default value } else { /*do nothing*/ } } for (int64_t i = 0; OB_SUCC(ret) && i < table_items_.count(); i++) { @@ -1874,6 +1880,14 @@ int ObDMLStmt::formalize_stmt_expr_reference(ObRawExprFactory *expr_factory, is_hierarchical_query()) { if (OB_FAIL(set_sharable_expr_reference(*column_item.expr_, ExplicitedRefType::REF_BY_NORMAL))) { LOG_WARN("failed to set sharable exprs reference", K(ret)); + } else if (table_item->is_json_table()) { + if (NULL != column_item.default_value_expr_ + && OB_FAIL(set_sharable_expr_reference(*column_item.default_value_expr_, ExplicitedRefType::REF_BY_NORMAL))) { + LOG_WARN("failed to visit default error expr", K(ret)); + } else if (NULL != column_item.default_empty_expr_ + && OB_FAIL(set_sharable_expr_reference(*column_item.default_empty_expr_, ExplicitedRefType::REF_BY_NORMAL))) { + LOG_WARN("failed to visit default empty expr", K(ret)); + } else { /*do nothing*/ } } } else { /*do nothing*/ } } @@ -4973,7 +4987,7 @@ int ObJtColBaseInfo::assign(const ObJtColBaseInfo& src) int ObJsonTableDef::deep_copy(const ObJsonTableDef& src, ObIRawExprCopier &expr_copier, ObIAllocator* allocator) { int ret = OB_SUCCESS; - + table_type_ = src.table_type_; if (OB_FAIL(expr_copier.copy(src.doc_expr_, doc_expr_))) { LOG_WARN("failed to deep copy raw expr", K(ret)); } @@ -4998,6 +5012,14 @@ int ObJsonTableDef::deep_copy(const ObJsonTableDef& src, ObIRawExprCopier &expr_ } } } + ObString ns_str; + for (size_t i = 0; OB_SUCC(ret) && i < src.namespace_arr_.count(); i ++) { + if (OB_FAIL(ob_write_string(*allocator, src.namespace_arr_.at(i), ns_str))) { + LOG_WARN("fail to copy string", K(src.namespace_arr_.at(i)), K(i)); + } else if (OB_FAIL(namespace_arr_.push_back(ns_str))) { + LOG_WARN("fail to push str in array", K(ret), K(ns_str)); + } + } return ret; } @@ -5007,10 +5029,16 @@ int ObJsonTableDef::assign(const ObJsonTableDef& src) int ret = OB_SUCCESS; doc_expr_ = src.doc_expr_; + table_type_ = src.table_type_; if (OB_FAIL(all_cols_.assign(src.all_cols_))) { LOG_WARN("fail to assign all cols.", K(ret)); } + for (size_t i = 0; OB_SUCC(ret) && i < src.namespace_arr_.count(); i ++) { + if (OB_FAIL(namespace_arr_.push_back(src.namespace_arr_.at(i)))) { + LOG_WARN("fail to push str in array", K(ret), K(src.namespace_arr_.at(i))); + } + } return ret; } diff --git a/src/sql/resolver/dml/ob_dml_stmt.h b/src/sql/resolver/dml/ob_dml_stmt.h index e5da5c402e..5511820935 100644 --- a/src/sql/resolver/dml/ob_dml_stmt.h +++ b/src/sql/resolver/dml/ob_dml_stmt.h @@ -152,7 +152,64 @@ public: int64_t unpivot_column_count_; }; -struct ObJsonTableDef; +enum MulModeTableType { + INVALID_TABLE_TYPE = 0, + OB_ORA_JSON_TABLE_TYPE, // 1 + OB_ORA_XML_TABLE_TYPE = 2, +}; + +typedef struct ObJtColBaseInfo +{ + ObJtColBaseInfo(); + ObJtColBaseInfo(const ObJtColBaseInfo& info); + + int32_t col_type_; + int32_t truncate_; + int32_t format_json_; + int32_t wrapper_; + int32_t allow_scalar_; + int64_t output_column_idx_; + int64_t empty_expr_id_; + int64_t error_expr_id_; + ObString col_name_; + ObString path_; + int32_t on_empty_; + int32_t on_error_; + int32_t on_mismatch_; + int32_t on_mismatch_type_; + int64_t res_type_; + ObDataType data_type_; + int32_t parent_id_; + int32_t id_; + union { + int32_t value_; + struct { + int32_t is_name_quoted_ : 1; + int32_t reserved_ : 31; + }; + }; + + int deep_copy(const ObJtColBaseInfo& src, ObIAllocator* allocator); + int assign(const ObJtColBaseInfo& src); + + TO_STRING_KV(K_(col_type), K_(format_json), K_(wrapper), K_(allow_scalar), + K_(output_column_idx), K_(col_name), K_(path), K_(parent_id), K_(id)); +} ObJtColBaseInfo; + +typedef struct ObJsonTableDef { + ObJsonTableDef() + : all_cols_(), + doc_expr_(nullptr), + table_type_(MulModeTableType::INVALID_TABLE_TYPE), + namespace_arr_() {} + + int deep_copy(const ObJsonTableDef& src, ObIRawExprCopier &expr_copier, ObIAllocator* allocator); + int assign(const ObJsonTableDef& src); + common::ObSEArray all_cols_; + ObRawExpr *doc_expr_; + MulModeTableType table_type_; + common::ObSEArray namespace_arr_; +} ObJsonTableDef; struct TableItem { @@ -257,7 +314,7 @@ struct TableItem bool is_function_table() const { return FUNCTION_TABLE == type_; } bool is_link_table() const { return OB_INVALID_ID != dblink_id_; } // why not use type_, cause type_ will be changed in dblink transform rule, but dblink id don't change bool is_link_type() const { return LINK_TABLE == type_; } // after dblink transformer, LINK_TABLE will be BASE_TABLE, BASE_TABLE will be LINK_TABLE - bool is_json_table() const { return JSON_TABLE == type_; } + bool is_json_table() const { return JSON_TABLE == type_; } // json_table_def_->table_type_ == MulModeTableType::OB_ORA_JSON_TABLE_TYPE bool is_values_table() const { return VALUES_TABLE == type_; }//used to mark values statement: values row(1,2), row(3,4); bool is_lateral_table() const { return LATERAL_TABLE == type_; } @@ -449,55 +506,6 @@ inline uint64_t ColumnItem::hash(uint64_t seed) const return seed; } -typedef struct ObJtColBaseInfo -{ - ObJtColBaseInfo(); - ObJtColBaseInfo(const ObJtColBaseInfo& info); - - int32_t col_type_; - int32_t truncate_; - int32_t format_json_; - int32_t wrapper_; - int32_t allow_scalar_; - int64_t output_column_idx_; - int64_t empty_expr_id_; - int64_t error_expr_id_; - ObString col_name_; - ObString path_; - int32_t on_empty_; - int32_t on_error_; - int32_t on_mismatch_; - int32_t on_mismatch_type_; - int64_t res_type_; - ObDataType data_type_; - int32_t parent_id_; - int32_t id_; - union { - int32_t value_; - struct { - int32_t is_name_quoted_ : 1; - int32_t reserved_ : 31; - }; - }; - - int deep_copy(const ObJtColBaseInfo& src, ObIAllocator* allocator); - int assign(const ObJtColBaseInfo& src); - - TO_STRING_KV(K_(col_type), K_(format_json), K_(wrapper), K_(allow_scalar), - K_(output_column_idx), K_(col_name), K_(path), K_(parent_id), K_(id)); -} ObJtColBaseInfo; - -typedef struct ObJsonTableDef { - ObJsonTableDef() - : all_cols_(), - doc_expr_(nullptr) {} - - int deep_copy(const ObJsonTableDef& src, ObIRawExprCopier &expr_copier, ObIAllocator* allocator); - int assign(const ObJsonTableDef& src); - common::ObSEArray all_cols_; - ObRawExpr *doc_expr_; -} ObJsonTableDef; - struct FromItem { FromItem() @@ -1032,7 +1040,6 @@ public: ObStmtExprGetter &visitor); int get_relation_exprs(common::ObIArray &relation_exprs) const; int get_relation_exprs(common::ObIArray &relation_expr_ptrs); - //this func is used for enum_set_wrapper to get exprs which need to be handled int get_relation_exprs_for_enum_set_wrapper(common::ObIArray &rel_array); ColumnItem *get_column_item_by_id(uint64_t table_id, uint64_t column_id) const; diff --git a/src/sql/resolver/dml/ob_merge_resolver.cpp b/src/sql/resolver/dml/ob_merge_resolver.cpp index 5c74126a12..00dd4ef5db 100644 --- a/src/sql/resolver/dml/ob_merge_resolver.cpp +++ b/src/sql/resolver/dml/ob_merge_resolver.cpp @@ -358,7 +358,8 @@ int ObMergeResolver::resolve_table(const ParseNode &parse_tree, TableItem *&tabl OZ (resolve_function_table_item(*table_node, table_item)); break; } - case T_JSON_TABLE_EXPRESSION: { + case T_JSON_TABLE_EXPRESSION: + case T_XML_TABLE_EXPRESSION: { if (OB_ISNULL(session_info_)) { ret = OB_INVALID_ARGUMENT; LOG_WARN("invalid argument", K(ret)); diff --git a/src/sql/resolver/dml/ob_select_resolver.cpp b/src/sql/resolver/dml/ob_select_resolver.cpp index 3c99835fd3..b606491aa0 100644 --- a/src/sql/resolver/dml/ob_select_resolver.cpp +++ b/src/sql/resolver/dml/ob_select_resolver.cpp @@ -2168,6 +2168,12 @@ int ObSelectResolver::resolve_field_list(const ParseNode &node) } } + if (OB_SUCC(ret) && is_oracle_mode() && T_FUN_SYS_XMLSEQUENCE == sel_expr->get_expr_type()) { + // Currently, xmlsequence is not supported in the select clause. + ret = OB_NOT_SUPPORTED; + LOG_WARN("xmlsequence in select clause is not supported", K(ret)); + } + if (OB_SUCC(ret) && is_oracle_mode() && sel_expr->has_flag(CNT_SUB_QUERY)) { if (OB_FAIL(check_subquery_return_one_column(*sel_expr))) { LOG_WARN("failed to check subquery return one column", K(ret)); diff --git a/src/sql/resolver/expr/ob_raw_expr.cpp b/src/sql/resolver/expr/ob_raw_expr.cpp index 5c422ee5bb..333dceb118 100644 --- a/src/sql/resolver/expr/ob_raw_expr.cpp +++ b/src/sql/resolver/expr/ob_raw_expr.cpp @@ -4049,6 +4049,16 @@ bool ObSysFunRawExpr::inner_same_as( get_extra() != expr.get_extra()) { // for calc_partition_id bool_ret = false; } + + // for json partial update + // update json_test set j1 = json_replace(j2, '$[0]', 'xyz'), j2 = json_replace(j2, '$[0]', 'xyz') where pk=1; + // first and second value expr is same, but behavior is different + // so do not share expr + if ((T_FUN_SYS_JSON_REPLACE == get_expr_type() || + T_FUN_SYS_JSON_SET == get_expr_type() || + T_FUN_SYS_JSON_REMOVE == get_expr_type())) { + bool_ret = false; + } } } else if (expr.is_op_expr() && T_OP_CNN == expr.get_expr_type()) { //for cases which compares concat('xxx','xxx') with 'xxx'||'xxx' diff --git a/src/sql/resolver/expr/ob_raw_expr_deduce_type.cpp b/src/sql/resolver/expr/ob_raw_expr_deduce_type.cpp index 6e6fea431f..d674f3039f 100644 --- a/src/sql/resolver/expr/ob_raw_expr_deduce_type.cpp +++ b/src/sql/resolver/expr/ob_raw_expr_deduce_type.cpp @@ -310,7 +310,8 @@ bool need_calc_json(ObItemType item_type) { bool bool_ret = false; if (T_FUN_SYS < item_type && item_type < T_FUN_SYS_END) { - if (T_FUN_SYS_JSON_OBJECT <= item_type && item_type <= T_FUN_JSON_OBJECTAGG) { + if ((T_FUN_SYS_JSON_OBJECT <= item_type && item_type <= T_FUN_JSON_OBJECTAGG) + || (T_FUN_SYS_JSON_SCHEMA_VALID <= item_type && item_type <= T_FUN_SYS_JSON_APPEND)) { bool_ret = true; // json calc type is decided by json functions } } diff --git a/src/sql/resolver/expr/ob_raw_expr_info_extractor.cpp b/src/sql/resolver/expr/ob_raw_expr_info_extractor.cpp index c14421ec2c..4e1d6fc415 100644 --- a/src/sql/resolver/expr/ob_raw_expr_info_extractor.cpp +++ b/src/sql/resolver/expr/ob_raw_expr_info_extractor.cpp @@ -567,6 +567,7 @@ int ObRawExprInfoExtractor::visit(ObSysFunRawExpr &expr) } } else {} } + } return ret; } diff --git a/src/sql/resolver/expr/ob_raw_expr_resolver_impl.cpp b/src/sql/resolver/expr/ob_raw_expr_resolver_impl.cpp index d6b5853763..6d779ad77e 100644 --- a/src/sql/resolver/expr/ob_raw_expr_resolver_impl.cpp +++ b/src/sql/resolver/expr/ob_raw_expr_resolver_impl.cpp @@ -1415,7 +1415,7 @@ int ObRawExprResolverImpl::process_xml_element_node(const ParseNode *node, ObRaw } ObSysFunRawExpr *func_expr = NULL; if (OB_SUCC(ret)) { - if (OB_FAIL(ctx_.expr_factory_.create_raw_expr(T_FUN_SYS, func_expr))) { + if (OB_FAIL(ctx_.expr_factory_.create_raw_expr(T_FUN_SYS_XML_ELEMENT, func_expr))) { LOG_WARN("create raw expr failed", K(node->num_child_), K(ret)); } else if (OB_ISNULL(func_expr)) { ret = OB_ERR_UNEXPECTED; @@ -1711,7 +1711,7 @@ int ObRawExprResolverImpl::process_xml_attributes_node(const ParseNode *node, Ob } ObSysFunRawExpr *func_expr = NULL; if (OB_SUCC(ret)) { - if (OB_FAIL(ctx_.expr_factory_.create_raw_expr(T_FUN_SYS, func_expr))) { + if (OB_FAIL(ctx_.expr_factory_.create_raw_expr(T_FUN_SYS_XML_ATTRIBUTES, func_expr))) { LOG_WARN("create raw expr failed", K(node->num_child_), K(ret)); } else if (OB_ISNULL(func_expr)) { ret = OB_ERR_UNEXPECTED; @@ -2201,7 +2201,8 @@ int ObRawExprResolverImpl::check_sys_func(ObQualifiedName &q_name, bool &is_sys_ static bool check_is_pl_jsontype(const ObString& name) { return ((name.length() == 13 && ObString("JSON_OBJECT_T").compare(name) == 0) - || (name.length() == 14 && ObString("JSON_ELEMENT_T").compare(name) == 0)); + || (name.length() == 14 && ObString("JSON_ELEMENT_T").compare(name) == 0) + || (name.length() == 12 && ObString("JSON_ARRAY_T").compare(name) == 0)); } int ObRawExprResolverImpl::check_pl_udf(ObQualifiedName &q_name, @@ -5111,7 +5112,7 @@ int ObRawExprResolverImpl::process_xmlparse_node(const ParseNode *node, ObRawExp ObSysFunRawExpr *func_expr = NULL; if (OB_SUCC(ret)) { child_num = node->num_child_; - ctx_.expr_factory_.create_raw_expr(T_FUN_SYS, func_expr); + ctx_.expr_factory_.create_raw_expr(T_FUN_SYS_XMLPARSE, func_expr); CK(OB_NOT_NULL(func_expr)); OX(func_expr->set_func_name(ObString::make_string("xmlparse"))); } @@ -5820,7 +5821,7 @@ int ObRawExprResolverImpl::process_ora_json_object_star_node(const ParseNode *no { INIT_SUCC(ret); ObSysFunRawExpr *func_expr = NULL; - if (OB_FAIL(ctx_.expr_factory_.create_raw_expr(T_FUN_SYS, func_expr))) { + if (OB_FAIL(ctx_.expr_factory_.create_raw_expr(T_FUN_SYS_JSON_OBJECT_WILD_STAR, func_expr))) { LOG_WARN("fail to create func_expr"); } else { CK(OB_NOT_NULL(func_expr)); @@ -5860,7 +5861,7 @@ int ObRawExprResolverImpl::process_ora_json_object_node(const ParseNode *node, O } ObSysFunRawExpr *func_expr = NULL; - if (OB_SUCC(ret) && OB_SUCC(ctx_.expr_factory_.create_raw_expr(T_FUN_SYS, func_expr))) { + if (OB_SUCC(ret) && OB_SUCC(ctx_.expr_factory_.create_raw_expr(T_FUN_SYS_JSON_OBJECT, func_expr))) { CK(OB_NOT_NULL(func_expr)); OX(func_expr->set_func_name(ObString::make_string("json_object"))); } else { @@ -6165,9 +6166,13 @@ int ObRawExprResolverImpl::process_json_value_node(const ParseNode *node, ObRawE bool mismatch_vec = false; int32_t num = 0; ObSysFunRawExpr *func_expr = NULL; - if (OB_SUCC(ret)) { + if (OB_FAIL(ret)) { + } else if (GET_MIN_CLUSTER_VERSION() < CLUSTER_VERSION_4_2_2_0) { + ret = OB_NOT_SUPPORTED; + LOG_WARN("json value raw expr number has change in 4.2.2 version", K(ret)); // 12 -> 10 + } else { num = node->num_child_; - OZ(ctx_.expr_factory_.create_raw_expr(T_FUN_SYS, func_expr)); + OZ(ctx_.expr_factory_.create_raw_expr(T_FUN_SYS_JSON_VALUE, func_expr)); CK(OB_NOT_NULL(func_expr)); OX(func_expr->set_func_name(ObString::make_string("json_value"))); } @@ -6274,13 +6279,6 @@ int ObRawExprResolverImpl::process_json_value_node(const ParseNode *node, ObRawE CK(OB_NOT_NULL(para_expr)); OZ(func_expr->add_param_expr(para_expr)); } - if (OB_SUCC(ret) && (i == 6 || i == 8)) { - ObRawExpr *para_expr = NULL; - CK(OB_NOT_NULL(cur_node)); - OZ(SMART_CALL(recursive_resolve(cur_node, para_expr))); - CK(OB_NOT_NULL(para_expr)); - OZ(func_expr->add_param_expr(para_expr)); - } } //end for // ([on_mismatch][opt_mismatch_types] on oracle) int8_t mis_check = 0; @@ -6344,7 +6342,7 @@ int ObRawExprResolverImpl::process_json_equal_node(const ParseNode *node, ObRawE CK(3 == node->num_child_); int32_t num = node->num_child_; ObSysFunRawExpr *func_expr = NULL; - OZ(ctx_.expr_factory_.create_raw_expr(T_FUN_SYS, func_expr)); + OZ(ctx_.expr_factory_.create_raw_expr(T_FUN_SYS_JSON_EQUAL, func_expr)); CK(OB_NOT_NULL(func_expr)); OX(func_expr->set_func_name(ObString::make_string("json_equal"))); @@ -6366,7 +6364,7 @@ int ObRawExprResolverImpl::process_json_array_node(const ParseNode *node, ObRawE CK(4 == node->num_child_); int32_t num = node->num_child_; ObSysFunRawExpr *func_expr = NULL; - OZ(ctx_.expr_factory_.create_raw_expr(T_FUN_SYS, func_expr)); + OZ(ctx_.expr_factory_.create_raw_expr(T_FUN_SYS_JSON_ARRAY, func_expr)); CK(OB_NOT_NULL(func_expr)); OX(func_expr->set_func_name(ObString::make_string("json_array"))); @@ -6407,7 +6405,7 @@ int ObRawExprResolverImpl::process_json_mergepatch_node(const ParseNode *node, O CK(7 == node->num_child_); int32_t num = node->num_child_; ObSysFunRawExpr *func_expr = NULL; - OZ(ctx_.expr_factory_.create_raw_expr(T_FUN_SYS, func_expr)); + OZ(ctx_.expr_factory_.create_raw_expr(T_FUN_SYS_JSON_MERGE_PATCH, func_expr)); CK(OB_NOT_NULL(func_expr)); OX(func_expr->set_func_name(ObString::make_string("json_merge_patch"))); @@ -6440,7 +6438,7 @@ int ObRawExprResolverImpl::process_is_json_node(const ParseNode *node, ObRawExpr CK(5 == node->num_child_); int32_t num = node->num_child_; ObSysFunRawExpr *func_expr = NULL; - OZ(ctx_.expr_factory_.create_raw_expr(T_FUN_SYS, func_expr)); + OZ(ctx_.expr_factory_.create_raw_expr(T_FUN_SYS_IS_JSON, func_expr)); CK(OB_NOT_NULL(func_expr)); OX(func_expr->set_func_name(ObString::make_string("is_json"))); diff --git a/src/sql/resolver/ob_resolver_utils.cpp b/src/sql/resolver/ob_resolver_utils.cpp index 97a096d15c..ab51879c6c 100644 --- a/src/sql/resolver/ob_resolver_utils.cpp +++ b/src/sql/resolver/ob_resolver_utils.cpp @@ -113,7 +113,9 @@ int ObResolverUtils::get_all_function_table_column_names(const TableItem &table_ CK (OB_NOT_NULL(coll_type = static_cast(user_type))); if (OB_SUCC(ret) && !coll_type->get_element_type().is_obj_type() - && !coll_type->get_element_type().is_record_type()) { + && !coll_type->get_element_type().is_record_type() + && !(coll_type->get_element_type().is_opaque_type() + && coll_type->get_element_type().get_user_type_id() == T_OBJ_XML)) { ret = OB_NOT_SUPPORTED; LOG_WARN("not suppoert type in table function", K(ret), KPC(coll_type)); ObString err; @@ -121,7 +123,8 @@ int ObResolverUtils::get_all_function_table_column_names(const TableItem &table_ err.write(" collation type in table function\0", sizeof(" collation type in table function\0")); LOG_USER_ERROR(OB_NOT_SUPPORTED, err.ptr()); } - if (OB_SUCC(ret) && coll_type->get_element_type().is_obj_type()) { + if (OB_SUCC(ret) && (coll_type->get_element_type().is_obj_type() + || coll_type->get_element_type().is_opaque_type())) { OZ (column_names.push_back(ObString("COLUMN_VALUE"))); } if (OB_SUCC(ret) && coll_type->get_element_type().is_record_type()) { @@ -1018,13 +1021,27 @@ int ObResolverUtils::check_type_match(const pl::ObPLResolveCtx &resolve_ctx, } else { // 复杂类型的TypeClass相同, 需要检查兼容性 bool is_compatible = false; - OZ (ObPLResolver::check_composite_compatible( +#ifdef OB_BUILD_ORACLE_PL + if (ObPlJsonUtil::is_pl_jsontype(src_type_id)) { + OZ (ObPLResolver::check_composite_compatible( NULL == resolve_ctx.params_.secondary_namespace_ ? static_cast(resolve_ctx) : static_cast(*resolve_ctx.params_.secondary_namespace_), - src_type_id, dst_pl_type.get_user_type_id(), + src_type_id, is_compatible), K(src_type_id), K(dst_pl_type), K(resolve_ctx.params_.is_execute_call_stmt_)); + } else { +#endif + OZ (ObPLResolver::check_composite_compatible( + NULL == resolve_ctx.params_.secondary_namespace_ + ? static_cast(resolve_ctx) + : static_cast(*resolve_ctx.params_.secondary_namespace_), + src_type_id, + dst_pl_type.get_user_type_id(), + is_compatible), K(src_type_id), K(dst_pl_type), K(resolve_ctx.params_.is_execute_call_stmt_)); +#ifdef OB_BUILD_ORACLE_PL + } +#endif if (OB_FAIL(ret)) { } else if (is_compatible) { OX (match_info = ObRoutineMatchInfo::MatchInfo(true, src_type, dst_type)); diff --git a/src/sql/rewrite/ob_transform_utils.cpp b/src/sql/rewrite/ob_transform_utils.cpp index 6f35909ac6..5e9fa377ea 100644 --- a/src/sql/rewrite/ob_transform_utils.cpp +++ b/src/sql/rewrite/ob_transform_utils.cpp @@ -13760,6 +13760,7 @@ int ObTransformUtils::check_is_json_constraint(ObTransformerCtx *ctx, return ret; } + int ObTransformUtils::add_dummy_expr_for_json_object_node(ObTransformerCtx *ctx, ObSEArray& param_array) { diff --git a/src/sql/rewrite/ob_transform_utils.h b/src/sql/rewrite/ob_transform_utils.h index c195acbde8..194a8861f9 100644 --- a/src/sql/rewrite/ob_transform_utils.h +++ b/src/sql/rewrite/ob_transform_utils.h @@ -928,13 +928,15 @@ public: ObDMLStmt *stmt, ColumnItem& col_item, ObSEArray& param_array); + static int add_dummy_expr_for_json_object_node(ObTransformerCtx *ctx, + ObSEArray& param_array); static int get_expand_node_from_star(ObTransformerCtx *ctx, ObDMLStmt *stmt, ObRawExpr *param_expr, ObSEArray& param_array); - static int add_dummy_expr_for_json_object_node(ObTransformerCtx *ctx, - ObSEArray& param_array); // end json object with star + + static int add_cast_for_replace(ObRawExprFactory &expr_factory, const ObRawExpr *from_expr, ObRawExpr *&to_expr, diff --git a/src/storage/CMakeLists.txt b/src/storage/CMakeLists.txt index 897dfff00d..27db0a4449 100644 --- a/src/storage/CMakeLists.txt +++ b/src/storage/CMakeLists.txt @@ -778,6 +778,7 @@ ob_set_subtarget(ob_storage lob lob/ob_lob_locator.cpp lob/ob_lob_util.cpp lob/ob_lob_rpc_struct.cpp + lob/ob_ext_info_callback.cpp ) ob_set_subtarget(ob_storage mview diff --git a/src/storage/ddl/ob_direct_load_struct.cpp b/src/storage/ddl/ob_direct_load_struct.cpp index 79bab5ed5f..2ab9cadd79 100644 --- a/src/storage/ddl/ob_direct_load_struct.cpp +++ b/src/storage/ddl/ob_direct_load_struct.cpp @@ -813,6 +813,7 @@ int ObDirectLoadSliceWriter::prepare_iters( const share::ObLSID &ls_id, const ObTabletID &tablet_id, const int64_t trans_version, + const ObObjType &obj_type, const ObCollationType &cs_type, const ObLobId &lob_id, const transaction::ObTransID trans_id, @@ -853,7 +854,7 @@ int ObDirectLoadSliceWriter::prepare_iters( lob_storage_param.inrow_threshold_ = lob_inrow_threshold; int64_t unused_affected_rows = 0; if (OB_FAIL(ObInsertLobColumnHelper::insert_lob_column( - allocator, nullptr, ls_id, tablet_id, lob_id, cs_type, lob_storage_param, datum, + allocator, nullptr, ls_id, tablet_id, lob_id, obj_type, cs_type, lob_storage_param, datum, timeout_ts, true/*has_lob_header*/, src_tenant_id, *meta_write_iter_))) { LOG_WARN("fail to insert_lob_col", K(ret), K(ls_id), K(tablet_id), K(lob_id), K(src_tenant_id)); } else if (OB_FAIL(row_iterator_->init(meta_write_iter_, trans_id, @@ -912,7 +913,7 @@ int ObDirectLoadSliceWriter::fill_lob_into_memtable( ObLobStorageParam lob_storage_param; lob_storage_param.inrow_threshold_ = lob_inrow_threshold; if (OB_FAIL(ObInsertLobColumnHelper::insert_lob_column( - allocator, info.ls_id_, info.data_tablet_id_, col_types.at(i).get_collation_type(), + allocator, info.ls_id_, info.data_tablet_id_, col_types.at(i).get_type(), col_types.at(i).get_collation_type(), lob_storage_param, datum, timeout_ts, true/*has_lob_header*/, info.src_tenant_id_))) { LOG_WARN("fail to insert_lob_col", K(ret), K(datum)); } @@ -949,7 +950,7 @@ int ObDirectLoadSliceWriter::fill_lob_into_macro_block( lob_id.tablet_id_ = tablet_direct_load_mgr_->get_tablet_id().id(); // lob meta tablet id. ObLobMetaRowIterator *row_iter = nullptr; if (OB_FAIL(prepare_iters(allocator, iter_allocator, datum, info.ls_id_, - info.data_tablet_id_, info.trans_version_, col_types.at(i).get_collation_type(), lob_id, + info.data_tablet_id_, info.trans_version_, col_types.at(i).get_type(), col_types.at(i).get_collation_type(), lob_id, info.trans_id_, info.seq_no_, timeout_ts, lob_inrow_threshold, info.src_tenant_id_, row_iter))) { LOG_WARN("fail to prepare iters", K(ret), KP(row_iter), K(datum)); } else { diff --git a/src/storage/ddl/ob_direct_load_struct.h b/src/storage/ddl/ob_direct_load_struct.h index ff0862d9a8..aa95d0e236 100644 --- a/src/storage/ddl/ob_direct_load_struct.h +++ b/src/storage/ddl/ob_direct_load_struct.h @@ -559,6 +559,7 @@ private: const share::ObLSID &ls_id, const ObTabletID &tablet_id, const int64_t trans_version, + const ObObjType &obj_type, const ObCollationType &cs_type, const ObLobId &lob_id, const transaction::ObTransID trans_id, diff --git a/src/storage/lob/ob_ext_info_callback.cpp b/src/storage/lob/ob_ext_info_callback.cpp new file mode 100644 index 0000000000..d18377a723 --- /dev/null +++ b/src/storage/lob/ob_ext_info_callback.cpp @@ -0,0 +1,436 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ +#define USING_LOG_PREFIX STORAGE + +#include "ob_ext_info_callback.h" +#include "share/ob_lob_access_utils.h" +#include "lib/utility/ob_fast_convert.h" +#include "storage/memtable/ob_memtable_mutator.h" +#include "storage/blocksstable/ob_row_writer.h" + +namespace oceanbase +{ +using namespace common; +using namespace share; +namespace storage +{ +const int64_t ObExtInfoCbRegister::OB_EXT_INFO_LOG_BLOCK_MAX_SIZE = 1 * 1024 * 1024 + 500 * 1024; + +DEFINE_GET_SERIALIZE_SIZE(ObExtInfoLogHeader) +{ + int64_t size = 0; + size += serialization::encoded_length_i8(type_); + return size; +} + +DEFINE_SERIALIZE(ObExtInfoLogHeader) +{ + int ret = OB_SUCCESS; + int64_t new_pos = pos; + if (NULL == buf || 0 >= buf_len || 0 > pos) { + ret = OB_INVALID_ARGUMENT; + LOG_ERROR("serialize failed", K(ret), K(pos), K(buf_len)); + } else if (OB_FAIL(serialization::encode_i8(buf, buf_len, new_pos, type_))) { + LOG_ERROR("serialize failed", K(ret), K(pos), K(buf_len)); + } else { + pos = new_pos; + } + return ret; +} + +DEFINE_DESERIALIZE(ObExtInfoLogHeader) +{ + int ret = OB_SUCCESS; + int64_t new_pos = pos; + if (NULL == buf || 0 >= data_len || 0 > pos) { + ret = OB_INVALID_ARGUMENT; + LOG_ERROR("serialize failed", K(ret), K(pos), K(data_len)); + } else if (OB_FAIL(serialization::decode_i8(buf, data_len, new_pos, reinterpret_cast(&type_)))) { + LOG_ERROR("serialize failed", K(ret), K(pos), K(data_len)); + } else { + pos = new_pos; + } + return ret; +} + +ObJsonDiffLog::~ObJsonDiffLog() +{} + +int ObJsonDiffLog::to_string(ObIAllocator &allocator, ObString &result) +{ + int ret = OB_SUCCESS; + ObStringBuffer buffer(&allocator); + if (diffs_.count() <= 0) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("json diff is empty", KR(ret)); + } else if (OB_FAIL(buffer.append("{\"version\": 1, "))) { + LOG_WARN("buffer append fail", KR(ret)); + } else if (OB_FAIL(buffer.append("\"diffs\": ["))) { // diffs begin + LOG_WARN("buffer append fail", KR(ret)); + } else { + for (int idx = 0; OB_SUCC(ret) && idx < diffs_.count(); ++idx) { + if (idx > 0 && OB_FAIL(buffer.append(", "))) { + LOG_WARN("buffer append fail", KR(ret)); + } else if (OB_FAIL(diffs_[idx].print(buffer))) { + LOG_WARN("print json diff fail", KR(ret), K(idx)); + } + } + if (OB_FAIL(ret)) { + } else if (OB_FAIL(buffer.append("]"))) { // diffs end + LOG_WARN("buffer append fail", KR(ret)); + } else if (OB_FAIL(buffer.append("}"))) { + LOG_WARN("buffer append fail", KR(ret)); + } else { + buffer.get_result_string(result); + } + } + return ret; +} + +int ObJsonDiffLog::deserialize(const char* buf, const int64_t data_len, int64_t& pos) +{ + int ret = OB_SUCCESS; + if (OB_FAIL(diff_header_.deserialize(buf, data_len, pos))) { + LOG_WARN("deserialize json diff header fail", KR(ret), K(data_len), K(pos)); + } else { + for (int64_t j = 0; OB_SUCC(ret) && j < diff_header_.cnt_; ++j) { + ObJsonDiff json_diff; + if (OB_FAIL(json_diff.deserialize(buf, data_len, pos))) { + LOG_WARN("deserialize json diff header fail", KR(ret), K(data_len), K(pos)); + } else if (OB_FAIL(diffs_.push_back(json_diff))) { + LOG_WARN("push fail", KR(ret)); + } + } + } + return ret; +} + +ObExtInfoCallback::~ObExtInfoCallback() +{ + if (! data_.empty()) { + allocator_->free(data_.ptr()); + data_.assign_ptr(nullptr, 0); + } + if (OB_NOT_NULL(mutator_row_buf_)) { + allocator_->free(mutator_row_buf_); + mutator_row_buf_ = nullptr; + } +} + +memtable::MutatorType ObExtInfoCallback::get_mutator_type() const +{ + return memtable::MutatorType::MUTATOR_ROW_EXT_INFO; +} + +int ObExtInfoCallback::get_redo(memtable::RedoDataNode &redo_node) +{ + int ret = OB_SUCCESS; + memtable::ObRowData old_row; + memtable::ObRowData new_row; + ObDatumRow datum_row; + ObTabletID tablet_id; + char *buf = nullptr; + int64_t len = 0; + key_obj_.reset(); + rowkey_.reset(); + key_.reset(); + key_obj_.set_uint64(seq_no_cur_.cast_to_int()); + rowkey_.assign(&key_obj_, OB_EXT_INFO_MUTATOR_ROW_KEY_CNT); + SMART_VAR(blocksstable::ObRowWriter, row_writer) { + if (data_.empty()) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("data is empty", K(ret)); + } else if (OB_FAIL(key_.encode(&rowkey_))) { + LOG_WARN("encode memtable key failed", K(ret), K(rowkey_)); + } else if (OB_NOT_NULL(mutator_row_buf_)) { + // already alloced, so no need alloc again + } else if (OB_FAIL(datum_row.init(*allocator_, OB_EXT_INFO_MUTATOR_ROW_COUNT))) { + LOG_WARN("init datum row fail", K(ret)); + } else if (OB_FAIL(datum_row.storage_datums_[OB_EXT_INFO_MUTATOR_ROW_KEY_IDX].from_obj_enhance(key_obj_))) { + LOG_WARN("set datum fail", K(ret), K(data_)); + } else if (OB_FAIL(datum_row.storage_datums_[OB_EXT_INFO_MUTATOR_ROW_VALUE_IDX].from_buf_enhance(data_.ptr(), data_.length()))) { + LOG_WARN("set datum fail", K(ret), K(data_)); + } else if (OB_FAIL(row_writer.write(OB_EXT_INFO_MUTATOR_ROW_KEY_CNT, datum_row, buf, len))) { + LOG_WARN("write row data fail", K(ret)); + } else if (OB_ISNULL(mutator_row_buf_ = static_cast(allocator_->alloc(len)))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("alloc mutator_row_buf fail", K(ret), K(len)); + } else { + MEMCPY(mutator_row_buf_, buf, len); + mutator_row_len_ = len; + } + if (OB_SUCC(ret)) { + new_row.set(mutator_row_buf_, mutator_row_len_); + redo_node.set(&key_, + old_row, + new_row, + dml_flag_, + 1, /* modify_count */ + 0, /* acc_checksum */ + 0, /* version */ + 0, /* flag */ + seq_no_cur_, + tablet_id, + OB_EXT_INFO_MUTATOR_ROW_COUNT); + redo_node.set_callback(this); + } + } // end row_writer + return ret; +} + +int ObExtInfoCallback::set( + ObIAllocator &allocator, + const blocksstable::ObDmlFlag dml_flag, + const transaction::ObTxSEQ &seq_no_cur, + ObString &data) +{ + int ret = OB_SUCCESS; + dml_flag_ = dml_flag; + seq_no_cur_ = seq_no_cur; + allocator_ = &allocator; + if (OB_FAIL(ob_write_string(allocator, data, data_))) { + LOG_WARN("ob_write_string fail", K(ret), K(data)); + } + return ret; +} + +ObExtInfoCbRegister::~ObExtInfoCbRegister() +{ + if (OB_NOT_NULL(lob_param_)) { + lob_param_->~ObLobAccessParam(); + tmp_allocator_.free(lob_param_); + lob_param_ = nullptr; + } + if (OB_NOT_NULL(data_iter_)) { + // date_iter is alloc from ObLobManager::query + // that use ob_malloc alloc memory, so here use ob_delete + data_iter_->reset(); + data_iter_->~ObLobQueryIter(); + OB_DELETE(ObLobQueryIter, "unused", data_iter_); + data_iter_ = nullptr; + } +} + +int ObExtInfoCbRegister::register_cb( + memtable::ObIMvccCtx *ctx, + const int64_t timeout, + const blocksstable::ObDmlFlag dml_flag, + transaction::ObTxDesc *tx_desc, + transaction::ObTxSEQ &parent_seq_no, + ObObj &index_data, + ObObj &ext_info_data) +{ + int ret = OB_SUCCESS; + ext_info_data_ = ext_info_data; + timeout_ = timeout; + ObLobManager *lob_mngr = MTL(ObLobManager*); + if (OB_ISNULL(lob_mngr)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("[STORAGE_LOB]get lob manager instance failed.", K(ret)); + } else if (OB_ISNULL(tx_desc)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("tx_desc is null", K(ret)); + } else if (ext_info_data.is_null()) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("data is empty", K(ret), K(ext_info_data)); + } else if (OB_ISNULL(mvcc_ctx_ = ctx)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("data is empty", K(ret), K(ext_info_data)); + } else if (OB_FAIL(init_header(index_data, ext_info_data))) { + LOG_WARN("init_header_ fail", K(ret), K(ext_info_data)); + } else if (OB_FAIL(build_data_iter(ext_info_data))) { + LOG_WARN("build data iter fail", K(ret)); + } else { + seq_no_cnt_ = data_size_/OB_EXT_INFO_LOG_BLOCK_MAX_SIZE + 1; + seq_no_st_ = tx_desc->get_and_inc_tx_seq(parent_seq_no.get_branch(), seq_no_cnt_); + transaction::ObTxSEQ seq_no_cur = seq_no_st_; + ObString data; + ObIAllocator &allocator = lob_mngr->get_ext_info_log_allocator(); + ObSEArray cb_array; + int cb_cnt = 0; + while (OB_SUCC(ret) && OB_SUCC(get_data(data))) { + storage::ObExtInfoCallback *cb = nullptr; + if (OB_ISNULL(cb = mvcc_ctx_->alloc_ext_info_callback())) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("alloc row callback failed", K(ret)); + } else if (OB_FAIL(cb_array.push_back(cb))) { + LOG_WARN("push back cb fail", K(ret), K(cb_array)); + } else { + cb->set(allocator, dml_flag, seq_no_cur, data); + seq_no_cur = seq_no_cur + 1; + if (OB_FAIL(mvcc_ctx_->append_callback(cb))) { + LOG_ERROR("register ext info callback failed", K(ret), K(*this),K(*cb)); + } else { + ++cb_cnt; + LOG_DEBUG("register ext info callback success", K(*cb)); + } + } + if (OB_FAIL(ret) && OB_NOT_NULL(cb)) { + mvcc_ctx_->free_ext_info_callback(cb); + } + } + if (OB_ITER_END == ret) { + ret = OB_SUCCESS; + } + if (OB_FAIL(ret)) { + for(int i = 0; i < cb_cnt; ++i) { + cb_array[i]->del(); + mvcc_ctx_->free_ext_info_callback(cb_array[i]); + } + } else if (OB_FALSE_IT(seq_no_cnt_ = cb_cnt)) { + } else if (OB_FAIL(set_index_data(index_data))) { + LOG_WARN("set_index_data fail", K(ret)); + } + } + return ret; +} + +int ObExtInfoCbRegister::init_header(ObObj& index_data, ObObj &ext_info_data) +{ + int ret = OB_SUCCESS; + header_.type_ = get_type(index_data.get_type()); + return ret; +} + +int ObExtInfoCbRegister::set_index_data(ObObj &index_data) +{ + int ret = OB_SUCCESS; + if (is_lob_storage(index_data.get_type())) { + if (OB_FAIL(set_outrow_ctx_seq_no(index_data))) { + LOG_WARN("set_outrow_ctx_seq_no fail", K(ret)); + } + } else { + ret = OB_NOT_SUPPORTED; + LOG_WARN("not support type", K(ret), K(index_data)); + } + return ret; +} + +int ObExtInfoCbRegister::set_outrow_ctx_seq_no(ObObj& index_data) +{ + int ret = OB_SUCCESS; + ObLobLocatorV2 locator; + ObLobCommon *lob_common = nullptr; + ObString str_data = index_data.get_string(); + if (str_data.empty()) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("index lob data is empty", K(ret), K(index_data)); + } else if (OB_ISNULL(lob_common = reinterpret_cast(str_data.ptr()))) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("lob_common is null", K(ret), K(index_data)); + } else if (! lob_common->is_valid() || lob_common->in_row_ || lob_common->is_mem_loc_ || ! lob_common->is_init_ ) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid lob", K(ret), K(*lob_common)); + } else { + ObLobData *lob_data = reinterpret_cast(lob_common->buffer_); + ObLobDataOutRowCtx *lob_data_outrow_ctx = reinterpret_cast(lob_data->buffer_); + lob_data_outrow_ctx->seq_no_st_ = seq_no_st_.cast_to_int(); + lob_data_outrow_ctx->seq_no_cnt_ = seq_no_cnt_; + lob_data_outrow_ctx->modified_len_ = data_size_ + header_.get_serialize_size(); + } + return ret; +} + +int ObExtInfoCbRegister::build_data_iter(ObObj &ext_info_data) +{ + int ret = OB_SUCCESS; + ObLobManager *lob_mgr = MTL(ObLobManager*); + ObString data = ext_info_data.get_string(); + ObLobLocatorV2 data_locator; + char *data_buf = nullptr; + int64_t data_buf_len = 0; + if (OB_ISNULL(lob_mgr)) { + ret = OB_ERR_UNEXPECTED; + TRANS_LOG(ERROR, "lob manager is null", K(ret)); + } else if (! is_lob_storage(ext_info_data.get_type())) { + if (OB_FAIL(lob_mgr->query(data, data_iter_))) { + LOG_WARN("build data iter fail", K(ret), K(lob_param_)); + } else { + data_size_ = data.length(); + } + } else if (OB_FALSE_IT(data_locator.assign_buffer(data.ptr(), data.length()))) { + } else if (! data_locator.is_valid()) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("invalid lob locator", K(ret), K(data_locator)); + } else if (OB_FAIL(data_locator.get_lob_data_byte_len(data_size_))) { + LOG_WARN("get lob data byte len fail", K(ret), K(data_locator)); + } else if (OB_ISNULL(lob_param_ = OB_NEWx(ObLobAccessParam, &tmp_allocator_))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("alloc lob param fail", K(ret), "size", sizeof(ObLobAccessParam)); + } else if (OB_FAIL(lob_mgr->build_lob_param( + *lob_param_, + tmp_allocator_, + CS_TYPE_BINARY, + 0, + data_size_, + timeout_, + data_locator))) { + LOG_WARN("build lob param fail", K(ret), K(data_locator)); + } else if (OB_FAIL(lob_mgr->query(*lob_param_, data_iter_))) { + LOG_WARN("build data iter fail", K(ret), K(lob_param_)); + } + + if (OB_FAIL(ret)) { + } else if (OB_ISNULL(data_iter_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("data iter is null", K(ret), K(lob_param_)); + } else if (OB_FALSE_IT(data_buf_len = std::min(data_size_ + header_.get_serialize_size(), OB_EXT_INFO_LOG_BLOCK_MAX_SIZE))) { + } else if (OB_ISNULL(data_buf = reinterpret_cast(tmp_allocator_.alloc(data_buf_len)))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("alloc lob param fail", K(ret), K(data_buf_len)); + } else { + data_buffer_.assign_buffer(data_buf, data_buf_len); + } + return ret; +} + +int ObExtInfoCbRegister::get_data(ObString &data) +{ + int ret = OB_SUCCESS; + if (OB_ISNULL(data_iter_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("data iter is null", K(ret)); + } else if (! header_writed_) { + char *buf = data_buffer_.ptr(); + int64_t buf_len = data_buffer_.size(); + int64_t pos = 0; + ObString read_buffer; + if (OB_FAIL(header_.serialize(buf, buf_len, pos))) { + LOG_WARN("serialize header fail", KR(ret), K(pos), K(buf_len), KP(buf)); + } else { + read_buffer.assign_buffer(buf + pos, buf_len - pos); + } + if (OB_FAIL(ret)) { + } else if (OB_FAIL(data_iter_->get_next_row(read_buffer))) { + if (OB_ITER_END != ret) { + LOG_WARN("get next row fail", K(ret)); + } + } else { + data.assign_ptr(buf, read_buffer.length() + 1); + data_buffer_.set_length(read_buffer.length() + 1); + header_writed_ = true; + } + } else { + data_buffer_.set_length(0); + if (OB_FAIL(data_iter_->get_next_row(data_buffer_))) { + if (OB_ITER_END != ret) { + LOG_WARN("get next row fail", K(ret)); + } + } else { + data.assign_ptr(data_buffer_.ptr(), data_buffer_.length()); + } + } + return ret; +} + +}; // end namespace storage +}; // end namespace oceanbase diff --git a/src/storage/lob/ob_ext_info_callback.h b/src/storage/lob/ob_ext_info_callback.h new file mode 100644 index 0000000000..7d23d362e1 --- /dev/null +++ b/src/storage/lob/ob_ext_info_callback.h @@ -0,0 +1,204 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#ifndef OCEANBASE_STORAGE_OB_EXT_INFO_CALLBACK_ +#define OCEANBASE_STORAGE_OB_EXT_INFO_CALLBACK_ + +#include "storage/memtable/mvcc/ob_mvcc_trans_ctx.h" +#include "lib/json_type/ob_json_diff.h" + +namespace oceanbase +{ +using namespace common; +using namespace share; + +namespace storage +{ +class ObLobAccessParam; +class ObLobQueryIter; + +enum ObExtInfoLogType +{ + OB_INVALID_EXT_INFO_LOG = 0, + OB_JSON_DIFF_EXT_INFO_LOG = 1, +}; + + +struct ObExtInfoLogHeader +{ + static int64_t get_header_size() { return sizeof(ObExtInfoLogHeader); } + + ObExtInfoLogHeader(): + type_(0) + {} + + ObExtInfoLogType get_type() const { return static_cast(type_); } + bool is_json_diff() const { return get_type() == OB_JSON_DIFF_EXT_INFO_LOG; } + + uint8_t type_; + + NEED_SERIALIZE_AND_DESERIALIZE; + +public: + TO_STRING_KV(K(type_)); + +}; + +class ObJsonDiffLog +{ +public: + ObJsonDiffLog(): + diff_header_(), + diffs_() + {} + + ~ObJsonDiffLog(); + int deserialize(const char* buf, const int64_t data_len, int64_t& pos); + int to_string(ObIAllocator &allocator, ObString &result); + TO_STRING_KV(K(diff_header_), K(diffs_)); + +private: + ObJsonDiffHeader diff_header_; + + ObJsonDiffArray diffs_; + +}; + +class ObExtInfoCallback : public memtable::ObITransCallback +{ +public: + static const int32_t OB_EXT_INFO_MUTATOR_ROW_COUNT = 2; + static const int32_t OB_EXT_INFO_MUTATOR_ROW_KEY_IDX = 0; + static const int32_t OB_EXT_INFO_MUTATOR_ROW_KEY_CNT = 1; + static const int32_t OB_EXT_INFO_MUTATOR_ROW_VALUE_IDX = 1; + +public: + ObExtInfoCallback() : + ObITransCallback(), + allocator_(nullptr), + data_(), + seq_no_cur_(), + dml_flag_(blocksstable::ObDmlFlag::DF_MAX), + key_obj_(), + key_(), + rowkey_(), + mutator_row_buf_(nullptr), + mutator_row_len_(0) + {} + + virtual ~ObExtInfoCallback(); + + virtual memtable::MutatorType get_mutator_type() const override; + virtual transaction::ObTxSEQ get_seq_no() const override { return seq_no_cur_; } + virtual int64_t get_data_size() override { return data_.length(); }; + + int get_redo(memtable::RedoDataNode &redo_node); + int set( + ObIAllocator &allocator, + const blocksstable::ObDmlFlag dml_flag, + const transaction::ObTxSEQ &seq_no_cur, + ObString &data); + +public: + TO_STRING_KV(K(seq_no_cur_), K(dml_flag_), K(data_), K(mutator_row_len_), KP(mutator_row_buf_)); + +private: + ObIAllocator *allocator_; + ObString data_; + transaction::ObTxSEQ seq_no_cur_; + blocksstable::ObDmlFlag dml_flag_; + ObObj key_obj_; + memtable::ObMemtableKey key_; + common::ObStoreRowkey rowkey_; + char *mutator_row_buf_; + int32_t mutator_row_len_; +}; + +class ObExtInfoCbRegister { +public: + static const int32_t OB_EXT_INFO_LOG_HEADER_LEN = 1; + static const int64_t OB_EXT_INFO_LOG_BLOCK_MAX_SIZE; + +public: + ObExtInfoCbRegister(): + tmp_allocator_(lib::ObMemAttr(MTL_ID(), "ExtInfoLogReg")), + mvcc_ctx_(nullptr), + header_(), + ext_info_data_(), + lob_param_(nullptr), + data_iter_(nullptr), + data_buffer_(), + timeout_(0), + data_size_(0), + seq_no_st_(), + seq_no_cnt_(0), + header_writed_(false) + {} + + ~ObExtInfoCbRegister(); + + int register_cb( + memtable::ObIMvccCtx *ctx, + const int64_t timeout, + const blocksstable::ObDmlFlag dml_flag, + transaction::ObTxDesc *tx_desc, + transaction::ObTxSEQ &parent_seq_no, + ObObj &index_data, + ObObj &ext_info_data); + +private: + + static ObExtInfoLogType get_type(ObObjType obj_type) + { + ObExtInfoLogType res = OB_INVALID_EXT_INFO_LOG; + switch (obj_type) + { + case ObJsonType: + res = OB_JSON_DIFF_EXT_INFO_LOG; + break; + default: + break; + } + return res; + } + + int build_data_iter(ObObj &ext_info_data); + + int set_index_data(ObObj &index_data); + int set_outrow_ctx_seq_no(ObObj& index_data); + + int get_data(ObString &data); + + int init_header(ObObj& index_data, ObObj &ext_info_data); + +public: + TO_STRING_KV(K(timeout_), K(data_size_), K(seq_no_st_), K(seq_no_cnt_), K(header_writed_)); + +private: + ObArenaAllocator tmp_allocator_; + memtable::ObIMvccCtx *mvcc_ctx_; + ObExtInfoLogHeader header_; + ObObj ext_info_data_; + ObLobAccessParam *lob_param_; + ObLobQueryIter *data_iter_; + ObString data_buffer_; + int64_t timeout_; + int64_t data_size_; + transaction::ObTxSEQ seq_no_st_; + uint64_t seq_no_cnt_; + bool header_writed_; +}; + +}; // end namespace memtable +}; // end namespace oceanbase + +#endif /* OCEANBASE_STORAGE_OB_EXT_INFO_CALLBACK_ */ diff --git a/src/storage/lob/ob_i_lob_adaptor.h b/src/storage/lob/ob_i_lob_adaptor.h index d0c3cc01e6..9439e007e9 100644 --- a/src/storage/lob/ob_i_lob_adaptor.h +++ b/src/storage/lob/ob_i_lob_adaptor.h @@ -35,6 +35,10 @@ public: virtual int get_lob_data(ObLobAccessParam ¶m, uint64_t piece_id, ObLobPieceInfo& info) = 0; virtual int revert_scan_iter(common::ObNewRowIterator *iter) = 0; virtual int fetch_lob_id(ObLobAccessParam& param, uint64_t &lob_id) = 0; + virtual int prepare_single_get( + ObLobAccessParam ¶m, + ObTableScanParam &scan_param, + uint64_t &table_id) = 0; }; } // storage diff --git a/src/storage/lob/ob_lob_locator.cpp b/src/storage/lob/ob_lob_locator.cpp index 9fece50eeb..940f125416 100644 --- a/src/storage/lob/ob_lob_locator.cpp +++ b/src/storage/lob/ob_lob_locator.cpp @@ -245,7 +245,7 @@ int ObLobLocatorHelper::fill_lob_locator_v2(ObDatumRow &row, out_cols_param->at(i)->get_column_id(), rowkey_str_, access_ctx, - datum_meta.get_collation_type(), + ObLobCharsetUtil::get_collation_type(datum_meta.get_type() ,datum_meta.get_collation_type()), false, is_sys_table(access_param.iter_param_.table_id_)))) { STORAGE_LOG(WARN, "Lob: Failed to build lob locator v2", K(ret), K(i), K(datum)); diff --git a/src/storage/lob/ob_lob_manager.cpp b/src/storage/lob/ob_lob_manager.cpp index 69e161e51b..19b8f24e4e 100644 --- a/src/storage/lob/ob_lob_manager.cpp +++ b/src/storage/lob/ob_lob_manager.cpp @@ -91,6 +91,11 @@ int ObLobManager::init() LOG_WARN("init allocator failed.", K(ret)); } else if (OB_FAIL(lob_ctxs_.create(DEFAULT_LOB_META_BUCKET_CNT, &allocator_))) { LOG_WARN("Init lob meta maps falied.", K(ret)); + } else if (OB_FAIL(ext_info_log_allocator_.init( + common::ObMallocAllocator::get_instance(), + OB_MALLOC_NORMAL_BLOCK_SIZE, + lib::ObMemAttr(tenant_id, "ExtInfoLog", ObCtxIds::LOB_CTX_ID)))) { + LOG_WARN("init ext info log allocator failed.", K(ret)); } else { OB_ASSERT(sizeof(ObLobCommon) == sizeof(uint32)); lob_ctx_.lob_meta_mngr_ = &meta_manager_; @@ -875,6 +880,119 @@ int ObLobManager::query( return ret; } +int ObLobManager::query(ObString& data, ObLobQueryIter *&result) +{ + INIT_SUCC(ret); + ObLobQueryIter* iter = OB_NEW(ObLobQueryIter, ObMemAttr(MTL_ID(), "LobQueryIter")); + if (OB_ISNULL(iter)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("alloc lob meta scan iterator fail", K(ret)); + } else if (OB_FAIL(iter->open(data, 0, data.length(), CS_TYPE_BINARY, false))) { + LOG_WARN("do lob meta scan failed.", K(ret), K(data)); + } else { + result = iter; + } + return ret; +} + +int ObLobManager::load_all(ObLobAccessParam ¶m, ObLobPartialData &partial_data) +{ + INIT_SUCC(ret); + char *output_buf = nullptr; + uint64_t output_len = param.byte_size_; + ObString output_data; + if (OB_ISNULL(output_buf = static_cast(param.allocator_->alloc(output_len)))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("alloc fail", K(ret), K(param)); + } else if (OB_FALSE_IT(output_data.assign_buffer(output_buf, output_len))) { + } else if (OB_FAIL(query(param, output_data))) { + LOG_WARN("do remote query fail", K(ret), K(param), K(output_len)); + } else if (OB_FAIL(partial_data.data_.push_back(ObLobChunkData(output_data)))) { + LOG_WARN("push_back lob chunk data fail", KR(ret)); + } else { + ObLobSeqId seq_id_generator(param.allocator_); + ObString seq_id; + uint64_t offset = 0; + int64_t chunk_count = (param.byte_size_ + partial_data.chunk_size_ - 1)/partial_data.chunk_size_; + for (int64_t i = 0; OB_SUCC(ret) && i < chunk_count; ++i) { + ObLobChunkIndex chunk_index; + chunk_index.offset_ = offset; + chunk_index.pos_ = offset; + chunk_index.byte_len_ = std::min(output_len, offset + partial_data.chunk_size_) - offset; + chunk_index.data_idx_ = 0; + if (OB_FAIL(seq_id_generator.get_next_seq_id(seq_id))) { + LOG_WARN("failed to next seq id", K(ret), K(chunk_index)); + } else if (OB_FAIL(ob_write_string(*param.allocator_, seq_id, chunk_index.seq_id_))) { + LOG_WARN("ob_write_stringt seq id fail", K(ret), K(chunk_count), K(output_len), K(partial_data.chunk_size_), K(chunk_index), K(seq_id)); + } else if (OB_FAIL(partial_data.push_chunk_index(chunk_index))) { + LOG_WARN("push_back lob chunk index fail", KR(ret), K(chunk_count), K(output_len), K(partial_data.chunk_size_), K(chunk_index)); + } else { + offset += partial_data.chunk_size_; + } + } + } + return ret; +} + +int ObLobManager::query( + ObIAllocator *allocator, + ObLobLocatorV2 &locator, + int64_t query_timeout_ts, + bool is_load_all, + ObLobPartialData *partial_data, + ObLobCursor *&cursor) +{ + INIT_SUCC(ret); + ObLobAccessParam *param = nullptr; + bool is_remote_lob = false; + common::ObAddr dst_addr; + if (! locator.has_lob_header() || ! locator.is_persist_lob() || locator.is_inrow()) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid locator", KR(ret), K(locator)); + } else if (OB_ISNULL(cursor = OB_NEWx(ObLobCursor, allocator))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("alloc fail", K(ret), "size", sizeof(ObLobCursor)); + } else if (OB_ISNULL(param = OB_NEWx(ObLobAccessParam, allocator))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("alloc fail", K(ret), "size", sizeof(ObLobAccessParam)); + } else if (OB_FAIL(build_lob_param(*param, *allocator, CS_TYPE_BINARY, + 0, UINT64_MAX, query_timeout_ts, locator))) { + LOG_WARN("build_lob_param fail", K(ret)); + } else if (! param->lob_common_->is_init_) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("lob common not init", K(ret), KPC(param->lob_common_), KPC(param)); + } else if (OB_ISNULL(param->lob_data_ = reinterpret_cast(param->lob_common_->buffer_))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("lob data is null", K(ret), KPC(param->lob_common_), KPC(param)); + } else if (OB_FAIL(is_remote(*param, is_remote_lob, dst_addr))) { + LOG_WARN("check is remote fail", K(ret), K(param)); + } else if (OB_ISNULL(partial_data)) { + if (OB_ISNULL(partial_data = OB_NEWx(ObLobPartialData, allocator))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("alloc lob param fail", K(ret), "size", sizeof(ObLobPartialData)); + } else if (OB_FAIL(partial_data->init())) { + LOG_WARN("map create fail", K(ret)); + } else if (OB_FAIL(locator.get_chunk_size(partial_data->chunk_size_))) { + LOG_WARN("get_chunk_size fail", K(ret), K(locator)); + } else { + partial_data->data_length_ = param->byte_size_; + partial_data->locator_.assign_ptr(locator.ptr_, locator.size_); + // new alloc partial_data do load data if need + if ((is_load_all || is_remote_lob) && OB_FAIL(load_all(*param, *partial_data))) { + LOG_WARN("load_all fail", K(ret)); + } + } + } + if (is_remote_lob) { + LOG_INFO("remote_lob", KPC(param->lob_common_), KPC(param->lob_data_), K(dst_addr)); + } + if (OB_FAIL(ret)) { + } else if (OB_FAIL(cursor->init(allocator, param, partial_data, lob_ctx_))) { + LOG_WARN("cursor init fail", K(ret)); + } + return ret; +} + int ObLobManager::equal(ObLobLocatorV2& lob_left, ObLobLocatorV2& lob_right, ObLobCompareParams& cmp_params, @@ -1262,6 +1380,7 @@ int ObLobManager::check_need_out_row( param.lob_common_->in_row_ = 0; // init out row ctx ObLobDataOutRowCtx *ctx = new(param.lob_data_->buffer_)ObLobDataOutRowCtx(); + ctx->chunk_size_ = param.get_schema_chunk_size() / ObLobDataOutRowCtx::OUTROW_LOB_CHUNK_SIZE_UNIT; // init char len uint64_t *char_len = reinterpret_cast(ctx + 1); *char_len = 0; @@ -1315,18 +1434,38 @@ int ObLobManager::init_out_row_ctx( // for erase, most oper len/128K + 2 // for append, most oper len/256K + 1 // for sql update, calc erase+insert - int64_t N = ((len + param.update_len_) / (ObLobMetaUtil::LOB_OPER_PIECE_DATA_SIZE / 2) + 2) * 2; - if (nullptr != param.tx_desc_) { - param.seq_no_st_ = param.tx_desc_->get_and_inc_tx_seq(param.parent_seq_no_.get_branch(), N); + int64_t N = 0; + // use store chunk size for erase, append, partial update + if (param.has_store_chunk_size()) { + int64_t store_chunk_size = 0; + if (OB_FAIL(param.get_store_chunk_size(store_chunk_size))) { + LOG_WARN("get_store_chunk_size fail", KR(ret), K(param)); + } else { + N += ((len + param.update_len_) / (store_chunk_size / 2) + 2); + } } else { - // do nothing, for direct load has no tx desc, do not use seq no + LOG_DEBUG("no store chunk size", K(param)); + } + if (OB_SUCC(ret)) { + // use shema chunk size for full insert and default + N += ((len + param.update_len_) / (param.get_schema_chunk_size() / 2) + 2); + if (nullptr != param.tx_desc_) { + param.seq_no_st_ = param.tx_desc_->get_and_inc_tx_seq(param.parent_seq_no_.get_branch(), N); + } else { + // do nothing, for direct load has no tx desc, do not use seq no + LOG_DEBUG("tx_desc is null", K(param)); + } + param.used_seq_cnt_ = 0; + param.total_seq_cnt_ = N; } - param.used_seq_cnt_ = 0; - param.total_seq_cnt_ = N; } if (OB_SUCC(ret)) { out_row_ctx->seq_no_st_ = param.seq_no_st_.cast_to_int(); - out_row_ctx->is_full_ = 1; + if (ObLobDataOutRowCtx::OpType::SQL == op) { + out_row_ctx->is_full_ = 1; + } else { + out_row_ctx->is_full_ = 0; + } out_row_ctx->offset_ = param.offset_; out_row_ctx->check_sum_ = param.checksum_; out_row_ctx->seq_no_cnt_ = param.used_seq_cnt_; @@ -1345,27 +1484,31 @@ int ObLobManager::update_out_ctx( { int ret = OB_SUCCESS; ObLobDataOutRowCtx *out_row_ctx = reinterpret_cast(param.lob_data_->buffer_); - // update seq no - out_row_ctx->seq_no_cnt_ = param.used_seq_cnt_; - // update checksum - ObBatchChecksum bc; - if (old_info != nullptr) { + + if (ObLobDataOutRowCtx::OpType::DIFF == out_row_ctx->op_) { + } else { + // update seq no + out_row_ctx->seq_no_cnt_ = param.used_seq_cnt_; + // update checksum + ObBatchChecksum bc; + if (old_info != nullptr) { + bc.fill(&out_row_ctx->check_sum_, sizeof(out_row_ctx->check_sum_)); + bc.fill(&old_info->lob_id_, sizeof(old_info->lob_id_)); + bc.fill(old_info->seq_id_.ptr(), old_info->seq_id_.length()); + bc.fill(old_info->lob_data_.ptr(), old_info->lob_data_.length()); + out_row_ctx->check_sum_ = bc.calc(); + bc.reset(); + } bc.fill(&out_row_ctx->check_sum_, sizeof(out_row_ctx->check_sum_)); - bc.fill(&old_info->lob_id_, sizeof(old_info->lob_id_)); - bc.fill(old_info->seq_id_.ptr(), old_info->seq_id_.length()); - bc.fill(old_info->lob_data_.ptr(), old_info->lob_data_.length()); + bc.fill(&new_info.lob_id_, sizeof(new_info.lob_id_)); + bc.fill(new_info.seq_id_.ptr(), new_info.seq_id_.length()); + bc.fill(new_info.lob_data_.ptr(), new_info.lob_data_.length()); out_row_ctx->check_sum_ = bc.calc(); - bc.reset(); + // update modified_len + int64_t old_meta_len = (old_info == nullptr) ? 0 : old_info->byte_len_; + int64_t new_meta_len = (new_info.byte_len_); + out_row_ctx->modified_len_ += std::abs(new_meta_len - old_meta_len); } - bc.fill(&out_row_ctx->check_sum_, sizeof(out_row_ctx->check_sum_)); - bc.fill(&new_info.lob_id_, sizeof(new_info.lob_id_)); - bc.fill(new_info.seq_id_.ptr(), new_info.seq_id_.length()); - bc.fill(new_info.lob_data_.ptr(), new_info.lob_data_.length()); - out_row_ctx->check_sum_ = bc.calc(); - // update modified_len - int64_t old_meta_len = (old_info == nullptr) ? 0 : old_info->byte_len_; - int64_t new_meta_len = (new_info.byte_len_); - out_row_ctx->modified_len_ += std::abs(new_meta_len - old_meta_len); return ret; } @@ -1481,12 +1624,15 @@ int ObLobManager::append( } else { // prepare out row ctx ObLobCtx lob_ctx = lob_ctx_; + int64_t store_chunk_size = 0; if (OB_FAIL(init_out_row_ctx(param, append_lob_len, param.op_type_))) { LOG_WARN("init lob data out row ctx failed", K(ret)); + } else if (OB_FAIL(param.get_store_chunk_size(store_chunk_size))) { + LOG_WARN("get_store_chunk_size fail", KR(ret), K(param)); } // prepare write buffer ObString write_buffer; - int64_t buf_len = OB_MIN(ObLobMetaUtil::LOB_OPER_PIECE_DATA_SIZE, param.byte_size_ + append_lob_len); + int64_t buf_len = store_chunk_size; char *buf = nullptr; if (OB_SUCC(ret)) { buf = reinterpret_cast(param.allocator_->alloc(buf_len)); @@ -1851,7 +1997,8 @@ int ObLobManager::append(ObLobAccessParam& param, ObLobLocatorV2& lob, ObLobMeta ObString read_buffer; uint64_t read_buff_size = OB_MIN(LOB_READ_BUFFER_LEN, append_lob_len); char *read_buff = static_cast(param.allocator_->alloc(read_buff_size)); - if (OB_ISNULL(read_buff)) { + if (OB_FAIL(ret)) { + } else if (OB_ISNULL(read_buff)) { ret = OB_ALLOCATE_MEMORY_FAILED; LOG_WARN("alloc read buffer failed.", K(ret), K(read_buff_size)); } else { @@ -1905,6 +2052,7 @@ int ObLobManager::prepare_lob_common(ObLobAccessParam& param, bool &alloc_inside param.lob_data_->id_ = param.spec_lob_id_; } ObLobDataOutRowCtx *outrow_ctx = new(param.lob_data_->buffer_)ObLobDataOutRowCtx(); + outrow_ctx->chunk_size_ = param.get_schema_chunk_size() / ObLobDataOutRowCtx::OUTROW_LOB_CHUNK_SIZE_UNIT; // init char len uint64_t *char_len = reinterpret_cast(outrow_ctx + 1); *char_len = 0; @@ -1913,6 +2061,13 @@ int ObLobManager::prepare_lob_common(ObLobAccessParam& param, bool &alloc_inside } } else if (param.lob_common_->is_init_) { param.lob_data_ = reinterpret_cast(param.lob_common_->buffer_); + + if (0 == param.lob_data_->byte_size_) { + // that is insert when lob_data_->byte_size_ is zero. + // so should update chunk size + ObLobDataOutRowCtx *outrow_ctx = reinterpret_cast(param.lob_data_->buffer_); + outrow_ctx->chunk_size_ = param.get_schema_chunk_size() / ObLobDataOutRowCtx::OUTROW_LOB_CHUNK_SIZE_UNIT; + } } return ret; } @@ -1940,6 +2095,7 @@ int ObLobManager::append( ObLobData *lob_data = param.lob_data_; bool is_remote_lob = false; common::ObAddr dst_addr; + int64_t store_chunk_size = 0; if (OB_FAIL(ret)) { } else if (OB_FAIL(check_handle_size(param))) { LOG_WARN("check handle size failed.", K(ret)); @@ -1995,8 +2151,10 @@ int ObLobManager::append( } else if (param.lob_locator_ != nullptr && !param.lob_locator_->is_persist_lob()) { ret = OB_NOT_IMPLEMENT; LOG_WARN("Unsupport outrow tmp lob.", K(ret), K(param)); + } else if (OB_FAIL(param.get_store_chunk_size(store_chunk_size))) { + LOG_WARN("get_store_chunk_size fail", KR(ret), K(param)); } else { // outrow - ObLobMetaWriteIter iter(data, param.allocator_, ObLobMetaUtil::LOB_OPER_PIECE_DATA_SIZE); + ObLobMetaWriteIter iter(data, param.allocator_, store_chunk_size); ObLobCtx lob_ctx = lob_ctx_; if (OB_FAIL(init_out_row_ctx(param, data.length(), param.op_type_))) { LOG_WARN("init lob data out row ctx failed", K(ret)); @@ -2220,6 +2378,17 @@ int ObLobManager::process_delta(ObLobAccessParam& param, ObLobLocatorV2& lob_loc } break; } + case ObLobDiff::DiffType::WRITE_DIFF : { + if (i != 0) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("first type must be write_diff", K(ret), K(i), K(diff_header), K(diffs[i])); + } else if (OB_FAIL(process_diff(param, lob_locator, diff_header))) { + LOG_WARN("process_diff fail", K(ret), K(param), K(i), K(*diff_header)); + } else { + i = diff_header->diff_cnt_; + } + break; + } default: { ret = OB_INVALID_ARGUMENT; LOG_WARN("invalid diff type", K(ret), K(i), K(diffs[i])); @@ -2234,6 +2403,108 @@ int ObLobManager::process_delta(ObLobAccessParam& param, ObLobLocatorV2& lob_loc return ret; } +static int get_extra_diff_data(ObLobLocatorV2 &lob_locator, ObLobDiffHeader *diff_header, ObString &extra_diff_data) +{ + INIT_SUCC(ret); + char *data_ptr = diff_header->get_inline_data_ptr(); + int64_t extra_data_len = lob_locator.size_ - (data_ptr - lob_locator.ptr_); + if (extra_data_len < 0) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("invalid extra data length", K(ret), K(extra_data_len), K(lob_locator), K(*diff_header)); + } else { + extra_diff_data.assign_ptr(data_ptr, extra_data_len); + } + return ret; +} + +int ObLobManager::process_diff(ObLobAccessParam& param, ObLobLocatorV2& delta_locator, ObLobDiffHeader *diff_header) +{ + INIT_SUCC(ret); + ObTabletID piece_tablet_id; + ObLobPieceInfo piece_info; + ObLobDiff *diffs = diff_header->get_diff_ptr(); + int64_t pos = 0; + ObString new_lob_data(diff_header->persist_loc_size_, diff_header->data_); + ObString extra_diff_data; + int64_t store_chunk_size = 0; + ObLobPartialUpdateRowIter iter; + if (OB_FAIL(param.set_lob_locator(param.lob_locator_))) { + LOG_WARN("failed to set lob locator for param", K(ret), K(param)); + } else if (OB_ISNULL(param.lob_common_)) { + ret = OB_ERR_NULL_VALUE; + LOG_WARN("null lob common", K(ret), K(param)); + } else if (! param.lob_common_->is_init_) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("lob common not init", K(ret), KPC(param.lob_common_), K(param)); + } else if (param.coll_type_ != ObCollationType::CS_TYPE_BINARY) { + ret = OB_NOT_SUPPORTED; + LOG_WARN("delta lob coll_type must be binary", K(ret), K(param)); + } else if (OB_ISNULL(param.lob_data_ = reinterpret_cast(param.lob_common_->buffer_))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("lob_data_ is null", KR(ret), K(param)); + } else if (OB_FAIL(param.get_store_chunk_size(store_chunk_size))) { + LOG_WARN("get_store_chunk_size fail", KR(ret), K(param)); + } else if (OB_FAIL(get_extra_diff_data(delta_locator, diff_header, extra_diff_data))) { + LOG_WARN("get_extra_diff_data", K(ret), K(param)); + } else if (OB_FAIL(iter.open(param, delta_locator, diff_header))) { + LOG_WARN("open iter fail", K(ret), K(param), K(diff_header)); + } else if (iter.get_chunk_size() != store_chunk_size) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("chunk size not match", K(ret), K(iter.get_chunk_size()), K(store_chunk_size), KPC(param.lob_common_), K(param)); + } else { + int64_t seq_cnt = iter.get_modified_chunk_cnt(); + param.seq_no_st_ = param.tx_desc_->get_and_inc_tx_seq(param.parent_seq_no_.get_branch(), seq_cnt); + param.used_seq_cnt_ = 0; + param.total_seq_cnt_ = seq_cnt; + param.op_type_ = ObLobDataOutRowCtx::OpType::DIFF; + if (OB_FAIL(init_out_row_ctx(param, 0, param.op_type_))) { + LOG_WARN("init lob data out row ctx failed", K(ret)); + } + + while(OB_SUCC(ret)) { + ObLobMetaInfo *new_meta_row = nullptr; + ObLobMetaInfo *old_meta_row = nullptr; + int64_t offset = 0; + if (OB_FAIL(iter.get_next_row(offset, old_meta_row, new_meta_row))) { + if (OB_ITER_END != ret) { + LOG_WARN("get_next_row fail", K(ret), K(param), K(diff_header)); + } + } else if (OB_ISNULL(old_meta_row)) { + int32_t seq_id_int = 0; + ObString seq_id_st(sizeof(seq_id_int), reinterpret_cast(&seq_id_int)); + ObString seq_id_ed; + ObLobMetaWriteIter write_iter(new_meta_row->lob_data_, param.allocator_, store_chunk_size); + ObString post_data; + ObString remain_buf; + if (OB_FAIL(ObLobSeqId::get_seq_id(offset/store_chunk_size - 1, seq_id_st))) { + LOG_WARN("get_seq_id fail", K(ret), K(offset)); + } else if (OB_FAIL(write_iter.open(param, 0, post_data, remain_buf, seq_id_st, seq_id_ed))) { + LOG_WARN("failed to open meta writer", K(ret), K(write_iter), K(param.byte_size_), K(offset), K(store_chunk_size)); + } else if (OB_FAIL(write_outrow_result(param, write_iter))) { + LOG_WARN("failed to write outrow result", K(ret), K(write_iter), K(param.byte_size_), K(offset), K(store_chunk_size)); + } + write_iter.close(); + } else if (OB_FAIL(update_one_piece( + param, + lob_ctx_, + *old_meta_row, + *new_meta_row, + piece_info, + new_meta_row->lob_data_))) { + LOG_WARN("update_one_piece fail", K(ret), K(offset), K(store_chunk_size)); + } + } + } + if (ret == OB_ITER_END) { + ret = OB_SUCCESS; + } + if (OB_FAIL(ret)) { + } else { + param.ext_info_log_.set_raw(extra_diff_data.ptr(), extra_diff_data.length()); + } + return ret; +} + int ObLobManager::getlength_remote(ObLobAccessParam& param, common::ObAddr& dst_addr, uint64_t &len) { int ret = OB_SUCCESS; @@ -2759,6 +3030,7 @@ int ObLobManager::write_outrow_inner(ObLobAccessParam& param, ObLobQueryIter *it // other situations are invalid uint32_t inrow_st = 0; ObString seq_id_st, seq_id_ed; + int64_t store_chunk_size = 0; if (old_data.length() > 0) { // inrow to outrow, set st 0, set ed null seq_id_st.assign_ptr(reinterpret_cast(&inrow_st), sizeof(uint32_t)); @@ -2783,9 +3055,12 @@ int ObLobManager::write_outrow_inner(ObLobAccessParam& param, ObLobQueryIter *it LOG_WARN("unknown state for range.", K(ret), K(found_begin), K(found_end)); } - if (OB_SUCC(ret)) { + if (OB_FAIL(ret)) { + } else if (OB_FAIL(param.get_store_chunk_size(store_chunk_size))) { + LOG_WARN("get_store_chunk_size fail", KR(ret), K(param)); + } else { // prepare write iter - ObLobMetaWriteIter write_iter(read_buf, param.allocator_, ObLobMetaUtil::LOB_OPER_PIECE_DATA_SIZE); + ObLobMetaWriteIter write_iter(read_buf, param.allocator_, store_chunk_size); if (OB_FAIL(write_iter.open(param, iter, read_buf, padding_size, post_data, remain_buf, seq_id_st, seq_id_ed))) { LOG_WARN("failed to open meta writer", K(ret), K(write_iter), K(meta_iter), K(found_begin), K(found_end), K(range_begin), K(range_end)); @@ -3369,8 +3644,10 @@ int ObLobManager::erase_process_meta_info(ObLobAccessParam& param, ObLobMetaScan result.meta_result_.len_ = tmp_len; // global pos, from 0 + // meta_iter.get_cur_pos use byte_len for binary charset, so cur_piece_begin should use byte_len too for binary charset + // or if char_len is less than for byte_len, will cause cur_piece_begin incorrect uint64_t cur_piece_end = meta_iter.get_cur_pos(); - uint64_t cur_piece_begin = cur_piece_end - result.meta_result_.info_.char_len_; + uint64_t cur_piece_begin = cur_piece_end - (is_char ? result.meta_result_.info_.char_len_ : result.meta_result_.info_.byte_len_); // local pos, from current piece; // if is_char, char pos; else byte pos @@ -3400,6 +3677,10 @@ int ObLobManager::erase_process_meta_info(ObLobAccessParam& param, ObLobMetaScan piece_byte_len = piece_char_len; piece_char_len = result.meta_result_.info_.char_len_; } + } else { + // consider we process char lob as byte len + // we may get a char len which is diff from byte len + piece_char_len = piece_byte_len; } if (OB_FAIL(ret)) { @@ -4150,5 +4431,651 @@ int ObLobQueryRemoteReader::do_fetch_rpc_buffer(ObLobAccessParam& param, return ret; } +/********** ObLobCursor ****************/ +ObLobCursor::~ObLobCursor() +{ + // meta_cache_.destroy(); + // modified_metas_.destroy(); + if (nullptr != param_) { + param_->~ObLobAccessParam(); + param_ = nullptr; + } + if (nullptr != partial_data_) { + partial_data_->~ObLobPartialData(); + partial_data_ = nullptr; + } +} +int ObLobCursor::init(ObIAllocator *allocator, ObLobAccessParam* param, ObLobPartialData *partial_data, ObLobCtx &lob_ctx) +{ + int ret = OB_SUCCESS; + param_ = param; + allocator_ = allocator; + partial_data_ = partial_data; + update_buffer_.set_allocator(allocator_); + if (OB_ISNULL(partial_data)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("partial_data is null", KR(ret)); + } else if (OB_FAIL(partial_data->get_ori_data_length(ori_data_length_))) { + LOG_WARN("get_ori_data_length fail", KR(ret)); + } else if (partial_data->is_full_mode()) { + if (OB_FAIL(init_full(allocator, partial_data))){ + LOG_WARN("init_full fail", KR(ret)); + } + } else if (OB_FAIL(lob_ctx.lob_meta_mngr_->open(*param_, &getter_))) { + LOG_WARN("ObLobMetaSingleGetter open fail", K(ret)); + } + return ret; +} + +int ObLobCursor::init_full(ObIAllocator *allocator, ObLobPartialData *partial_data) +{ + int ret = OB_SUCCESS; + if (OB_FAIL(update_buffer_.append(partial_data->data_[0].data_))) { + LOG_WARN("append data to update buffer fail", KR(ret), K(partial_data->data_.count())); + } else { + partial_data_->data_[0].data_ = update_buffer_.string(); + is_full_mode_ = true; + } + return ret; +} + +int ObLobCursor::get_ptr(int64_t offset, int64_t len, const char *&ptr) +{ + INIT_SUCC(ret); + ObString data; + int64_t start_offset = offset; + int64_t end_offset = offset + len; + int start_chunk_pos = get_chunk_pos(start_offset); + int end_chunk_pos = get_chunk_pos(end_offset - 1); + if (start_chunk_pos != end_chunk_pos && OB_FAIL(merge_chunk_data(start_chunk_pos, end_chunk_pos))) { + LOG_WARN("merge_chunk_data fail", KR(ret), K(start_chunk_pos), K(end_chunk_pos), K(offset), K(len)); + } else if (OB_FAIL(get_chunk_data(start_chunk_pos, data))) { + LOG_WARN("get_chunk_data fail", KR(ret), K(start_chunk_pos), K(end_chunk_pos), K(offset), K(len)); + } else if (data.empty() || data.length() < start_offset - get_chunk_offset(start_chunk_pos) + len) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("data not enough", KR(ret), K(offset), K(len), K(start_offset), "data_len", data.length()); + } else { + ptr = data.ptr() + offset - get_chunk_offset(start_chunk_pos); + } + return ret; +} + +int ObLobCursor::get_ptr_for_write(int64_t offset, int64_t len, char *&ptr) +{ + INIT_SUCC(ret); + ObString data; + int64_t start_offset = offset; + int64_t end_offset = offset + len; + int start_chunk_pos = get_chunk_pos(start_offset); + int end_chunk_pos = get_chunk_pos(end_offset - 1); + if (start_chunk_pos != end_chunk_pos && OB_FAIL(merge_chunk_data(start_chunk_pos, end_chunk_pos))) { + LOG_WARN("merge_chunk_data fail", KR(ret), K(start_chunk_pos), K(end_chunk_pos), K(offset), K(len)); + } else { + for (int i = start_chunk_pos; OB_SUCC(ret) && i <= end_chunk_pos; ++i) { + int chunk_idx = -1; + if (OB_FAIL(get_chunk_idx(i, chunk_idx))) { + LOG_WARN("get_chunk_idx fail", KR(ret), K(i), K(start_chunk_pos), K(end_chunk_pos)); + } else if (OB_FAIL(record_chunk_old_data(chunk_idx))) { + LOG_WARN("record_chunk_old_data fail", KR(ret), K(i)); + } + } + if(OB_FAIL(ret)) { + } else if (OB_FAIL(get_chunk_data(start_chunk_pos, data))) { + LOG_WARN("get_chunk_data fail", KR(ret), K(start_chunk_pos), K(end_chunk_pos), K(offset), K(len)); + } else if (data.empty() || data.length() < start_offset - get_chunk_offset(start_chunk_pos) + len) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("data not enough", KR(ret), K(offset), K(len), K(start_offset), "data_len", data.length()); + } else { + ptr = data.ptr() + offset - get_chunk_offset(start_chunk_pos); + } + } + return ret; +} + +int ObLobCursor::get_chunk_data_start_pos(const int cur_chunk_pos, int &start_pos) +{ + INIT_SUCC(ret); + const ObLobChunkIndex *chunk_index = nullptr; + const ObLobChunkData *chunk_data = nullptr; + if (OB_FAIL(get_chunk_data(cur_chunk_pos, chunk_index, chunk_data))) { + LOG_WARN("get_chunk_data fail", KR(ret), K(cur_chunk_pos)); + } else { + start_pos = get_chunk_pos(chunk_index->offset_ - chunk_index->pos_); + } + return ret; +} + +int ObLobCursor::get_chunk_data_end_pos(const int cur_chunk_pos, int &end_pos) +{ + INIT_SUCC(ret); + const ObLobChunkIndex *chunk_index = nullptr; + const ObLobChunkData *chunk_data = nullptr; + if (OB_FAIL(get_chunk_data(cur_chunk_pos, chunk_index, chunk_data))) { + LOG_WARN("get_chunk_data fail", KR(ret), K(cur_chunk_pos)); + } else { + end_pos = get_chunk_pos(chunk_index->offset_ - chunk_index->pos_ + chunk_data->data_.length() - 1); + } + return ret; +} + +int ObLobCursor::merge_chunk_data(int start_chunk_pos, int end_chunk_pos) +{ + INIT_SUCC(ret); + bool need_merge = false; + ObSEArray chunk_idx_array; + // get the fisrt and last chunk pos of chunk data that start_chunk_pos use + if (OB_FAIL(get_chunk_data_start_pos(start_chunk_pos, start_chunk_pos))) { + LOG_WARN("get_chunk_data_start_pos fail", KR(ret), K(start_chunk_pos)); + } else if (OB_FAIL(get_chunk_data_end_pos(end_chunk_pos, end_chunk_pos))) { + LOG_WARN("get_chunk_data_start_pos fail", KR(ret), K(end_chunk_pos)); + } + // get chunk_index data array index + for (int i = start_chunk_pos; OB_SUCC(ret) && i <= end_chunk_pos; ++i) { + const ObLobChunkIndex *chunk_index = nullptr; + const ObLobChunkData *chunk_data = nullptr; + if (OB_FAIL(get_chunk_data(i, chunk_index, chunk_data))) { + LOG_WARN("get_chunk_data fail", KR(ret), K(i), K(start_chunk_pos), K(end_chunk_pos)); + // some chunk share same data area, so no need push again + // and it will only be shared with adjacent chunks, so only need to check the last + } else if (! chunk_idx_array.empty() && chunk_idx_array[chunk_idx_array.count() - 1] == chunk_index->data_idx_) {// skip + } else if (OB_FAIL(chunk_idx_array.push_back(chunk_index->data_idx_))) { + LOG_WARN("push_back idx fail", KR(ret), K(i)); + } + } + int64_t merge_len = 0; + bool use_update_buffer = false; + if (OB_SUCC(ret)) { + // should merge if has multi data area + need_merge = chunk_idx_array.count() != 1; + for (int i = 0; OB_SUCC(ret) && need_merge && i < chunk_idx_array.count(); ++i) { + const ObLobChunkData &chunk_data = partial_data_->data_[chunk_idx_array[i]]; + merge_len += chunk_data.data_.length(); + + if (! update_buffer_.empty() && update_buffer_.ptr() == chunk_data.data_.ptr()) { + // if update_buffer_ is uesed, last chunk should have some pointer with update_buffer_ + if (i != chunk_idx_array.count() - 1) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("invalid chunk data", KR(ret), K(i), K(chunk_idx_array), K(start_chunk_pos), K(end_chunk_pos)); + } else { + use_update_buffer = true; + LOG_DEBUG("set use update buffer", K(i), K(chunk_idx_array.count())); + } + } + } + } + + // get merge buffer ptr + char *buf = nullptr; + if (OB_FAIL(ret) || ! need_merge) { + } else if (use_update_buffer) { + // old data also record in chunk data, so there just reset update_buffer_ but not free + // and reserve new buffer for merge + ObString old_data; + if (OB_FAIL(update_buffer_.get_result_string(old_data))) { + LOG_WARN("alloc fail", KR(ret), K(merge_len), K(start_chunk_pos), K(end_chunk_pos), K(chunk_idx_array), K(update_buffer_)); + } else if (OB_FAIL(update_buffer_.reserve(merge_len))) { + LOG_WARN("reserve buffer fail", KR(ret), K(merge_len), K(start_chunk_pos), K(end_chunk_pos), K(chunk_idx_array), K(update_buffer_)); + } else { + buf = update_buffer_.ptr(); + update_buffer_.set_length(merge_len); + } + } else if (OB_ISNULL(buf = reinterpret_cast(allocator_->alloc(merge_len)))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("alloc fail", KR(ret), K(merge_len), K(start_chunk_pos), K(end_chunk_pos), K(chunk_idx_array)); + } + + // do merge if need + if (OB_FAIL(ret) || ! need_merge) { + } else { + int new_chunk_data_idx = chunk_idx_array[0]; + // copy data from old area to merge area + int64_t pos = 0; + for (int i = 0; OB_SUCC(ret) && i < chunk_idx_array.count(); ++i) { + ObLobChunkData &chunk_data = partial_data_->data_[chunk_idx_array[i]]; + MEMCPY(buf + pos, chunk_data.data_.ptr(), chunk_data.data_.length()); + pos += chunk_data.data_.length(); + allocator_->free(chunk_data.data_.ptr()); + chunk_data.data_.reset(); + } + if (OB_SUCC(ret) && pos != merge_len) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("data merge len incorrect", KR(ret), K(pos), K(merge_len)); + } + // update chunk index offset info + pos = 0; + bool append_chunk_set = false; + for (int i = start_chunk_pos; OB_SUCC(ret) && i <= end_chunk_pos; ++i) { + int chunk_idx = -1; + if (OB_FAIL(get_chunk_idx(i, chunk_idx))) { + LOG_WARN("get_chunk_idx fail", KR(ret), K(i)); + } else if (append_chunk_set && chunk_index(chunk_idx).is_add_) { + } else { + chunk_index(chunk_idx).pos_ = pos; + chunk_index(chunk_idx).data_idx_ = new_chunk_data_idx; + pos += chunk_index(chunk_idx).byte_len_; + if (chunk_index(chunk_idx).is_add_) append_chunk_set = true; + } + } + // update chunk data pointer + if (OB_SUCC(ret)) { + partial_data_->data_[new_chunk_data_idx].data_.assign_ptr(buf, merge_len); + } + // defensive check + if (OB_SUCC(ret) && OB_FAIL(check_data_length())) { + LOG_WARN("check len fail", KR(ret)); + } + } + return ret; +} + +int ObLobCursor::check_data_length() +{ + INIT_SUCC(ret); + int64_t check_data_len = 0; + for (int i = 0; i < partial_data_->data_.count(); ++i) { + check_data_len += partial_data_->data_[i].data_.length(); + } + int64_t check_index_len = 0; + for (int i = 0; i < partial_data_->index_.count(); ++i) { + check_index_len += partial_data_->index_[i].byte_len_; + } + if (check_data_len != check_index_len) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("check len fail", KR(ret), K(check_data_len), K(check_index_len)); + } + return ret; +} + +int ObLobCursor::get_chunk_data(int chunk_pos, ObString &data) +{ + INIT_SUCC(ret); + const ObLobChunkIndex *chunk_index = nullptr; + const ObLobChunkData *chunk_data = nullptr; + int64_t pos = 0; + if (OB_FAIL(get_chunk_data(chunk_pos, chunk_index, chunk_data))) { + LOG_WARN("get_chunk_data fail", KR(ret), K(chunk_pos)); + // all append chunk will share same chunk index. so the real data pos need subtract append chunk offset + // for normal exist chunk, get_chunk_offset(chunk_pos) is equal to chunk_index->offset_ + } else if (0 > (pos = chunk_index->pos_ + get_chunk_offset(chunk_pos) - chunk_index->offset_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("pos is invalid", KR(ret), K(pos), K(chunk_index->pos_), K(get_chunk_offset(chunk_pos)), K(chunk_pos), K(chunk_index->offset_)); + } else { + data.assign_ptr(chunk_data->data_.ptr() + pos, chunk_data->data_.length() - pos); + } + return ret; +} + +int ObLobCursor::get_last_chunk_data_idx(int &chunk_idx) +{ + INIT_SUCC(ret); + int chunk_pos = get_chunk_pos(partial_data_->data_length_ - 1); + if (OB_FAIL(get_chunk_idx(chunk_pos, chunk_idx))) { + LOG_WARN("get_chunk_idx fail", KR(ret), K(partial_data_->data_length_), K(chunk_pos)); + } + return ret; +} + +bool ObLobCursor::is_append_chunk(int chunk_pos) const +{ + int64_t chunk_offset = get_chunk_offset(chunk_pos); + return chunk_offset >= ori_data_length_; +} + +int ObLobCursor::get_chunk_idx(int chunk_pos, int &chunk_idx) +{ + INIT_SUCC(ret); + ObLobMetaInfo meta_info; + ObLobChunkIndex new_chunk_index; + ObLobChunkData chunk_data; + int real_idx = -1; + if (is_append_chunk(chunk_pos)) { + int append_chunk_pos = get_chunk_pos(ori_data_length_ + partial_data_->chunk_size_ - 1); + if (OB_FAIL(partial_data_->search_map_.get_refactored(append_chunk_pos, real_idx))) { + LOG_WARN("get append chunk fail", KR(ret), K(chunk_pos), K(append_chunk_pos), K(ori_data_length_)); + } else { + chunk_idx = real_idx; + } + } else if (OB_SUCC(partial_data_->search_map_.get_refactored(chunk_pos, real_idx))) { + chunk_idx = real_idx; + } else if (OB_FAIL(fetch_meta(chunk_pos, meta_info))) { + LOG_WARN("fetch_meta fail", KR(ret), K(chunk_pos)); + // data return by storage points origin data memory + // should copy if the data may be modified, or old data may be corrupted + } else if (OB_FAIL(ob_write_string(*allocator_, meta_info.lob_data_, chunk_data.data_))) { + LOG_WARN("copy data fail", KR(ret), K(chunk_pos), K(meta_info)); + } else if (OB_FAIL(ob_write_string(*allocator_, meta_info.seq_id_, new_chunk_index.seq_id_))) { + LOG_WARN("copy seq_id data fail", KR(ret), K(chunk_pos), K(meta_info)); + } else if (OB_FAIL(partial_data_->data_.push_back(chunk_data))) { + LOG_WARN("push_back data fail", KR(ret), K(chunk_pos), K(chunk_data)); + } else { + new_chunk_index.offset_ = chunk_pos * partial_data_->chunk_size_; + new_chunk_index.byte_len_ = meta_info.byte_len_; + new_chunk_index.data_idx_ = partial_data_->data_.count() - 1; + if (OB_FAIL(partial_data_->push_chunk_index(new_chunk_index))) { + LOG_WARN("push_back index fail", KR(ret), K(chunk_pos), K(new_chunk_index)); + } else { + chunk_idx = partial_data_->index_.count() - 1; + } + } + return ret; +} + +int ObLobCursor::get_chunk_data(int chunk_pos, const ObLobChunkIndex *&chunk_index, const ObLobChunkData *&chunk_data) +{ + INIT_SUCC(ret); + int chunk_idx = -1; + if (OB_FAIL(get_chunk_idx(chunk_pos, chunk_idx))) { + LOG_WARN("get_chunk_idx fail", KR(ret), K(chunk_pos)); + } else { + chunk_index = &partial_data_->index_[chunk_idx]; + chunk_data = &partial_data_->data_[chunk_index->data_idx_]; + } + return ret; +} + + +int ObLobCursor::get_chunk_pos(int64_t offset) const +{ + return offset / partial_data_->chunk_size_; +} + +int64_t ObLobCursor::get_chunk_offset(int pos) const +{ + return pos * partial_data_->chunk_size_; +} + +int ObLobCursor::fetch_meta(int idx, ObLobMetaInfo &meta_info) +{ + INIT_SUCC(ret); + uint32_t seq_id_buf = 0; + ObString seq_id(sizeof(uint32_t), (char*)(&seq_id_buf)); + if (OB_FAIL(ObLobSeqId::get_seq_id(idx, seq_id))) { + LOG_WARN("get_seq_id fail", K(ret), K(idx)); + } else if (OB_FAIL(getter_.get_next_row(seq_id, meta_info))) { + LOG_WARN("get_next_row fail", K(ret), K(seq_id)); + } + return ret; +} + +int ObLobCursor::append(const char* buf, int64_t buf_len) +{ + return set(partial_data_->data_length_, buf, buf_len); +} + +int ObLobCursor::get_data(ObString &data) const +{ + INIT_SUCC(ret); + if (is_full_mode()) { + data = update_buffer_.string(); + } else { + ret = OB_NOT_SUPPORTED; + } + return ret; +} + +int ObLobCursor::reset_data(const ObString &data) +{ + INIT_SUCC(ret); + if (is_full_mode()) { + update_buffer_.reuse(); + ret = update_buffer_.append(data); + partial_data_->data_[0].data_ = update_buffer_.string(); + partial_data_->data_length_ = update_buffer_.length(); + } else { + ret = OB_NOT_SUPPORTED; + } + return ret; +} + +int ObLobCursor::move_data_to_update_buffer(ObLobChunkData *chunk_data) +{ + INIT_SUCC(ret); + if (update_buffer_.length() == 0) { + if (OB_FAIL(update_buffer_.append(chunk_data->data_))) { + LOG_WARN("update buffer reserve fail", KR(ret), KPC(chunk_data), K(update_buffer_)); + } else { + chunk_data->data_ = update_buffer_.string(); + } + } else if (update_buffer_.ptr() != chunk_data->data_.ptr() || update_buffer_.length() != chunk_data->data_.length()) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("update buffer state incorrect", KR(ret), K(update_buffer_), KPC(chunk_data)); + } + return ret; +} + +int ObLobCursor::push_append_chunk(int64_t append_len) +{ + INIT_SUCC(ret); + int last_chunk_idx = -1; + if (OB_FAIL(get_last_chunk_data_idx(last_chunk_idx))) { + LOG_WARN("get_last_chunk_data fail", KR(ret)); + } else if (! chunk_index(last_chunk_idx).is_add_ && chunk_index(last_chunk_idx).byte_len_ + append_len > partial_data_->chunk_size_) { + ObLobChunkIndex new_chunk_index; + new_chunk_index.offset_ = chunk_index(last_chunk_idx).offset_ + partial_data_->chunk_size_; + new_chunk_index.pos_ = chunk_index(last_chunk_idx).pos_ + partial_data_->chunk_size_; + new_chunk_index.byte_len_ = 0; + new_chunk_index.is_add_ = 1; + new_chunk_index.data_idx_ = chunk_index(last_chunk_idx).data_idx_; + if (OB_FAIL(record_chunk_old_data(last_chunk_idx))) { + LOG_WARN("record_chunk_old_data fail", KR(ret), K(last_chunk_idx)); + } else if (OB_FAIL(partial_data_->push_chunk_index(new_chunk_index))) { + LOG_WARN("push_back index fail", KR(ret), K(new_chunk_index)); + } else { + // should be careful. this may cause check_data_length fail + chunk_index(last_chunk_idx).byte_len_ = partial_data_->chunk_size_; + } + } + return ret; +} + +int ObLobCursor::set(int64_t offset, const char *buf, int64_t buf_len, bool use_memmove) +{ + INIT_SUCC(ret); + int64_t start_offset = offset; + int64_t end_offset = offset + buf_len; + int start_chunk_pos = get_chunk_pos(start_offset); + int old_end_chunk_pos = get_chunk_pos(partial_data_->data_length_ - 1); + int end_chunk_pos = get_chunk_pos(end_offset - 1); + int64_t append_len = end_offset > partial_data_->data_length_ ? end_offset - partial_data_->data_length_ : 0; + int start_chunk_idx = -1; + if (start_chunk_pos < old_end_chunk_pos && OB_FAIL(merge_chunk_data(start_chunk_pos, old_end_chunk_pos))) { + LOG_WARN("merge_chunk_data fail", KR(ret), K(start_chunk_pos), K(old_end_chunk_pos), K(offset), K(buf_len), K(end_chunk_pos)); + } else if (append_len > 0 && OB_FAIL(push_append_chunk(append_len))) { + LOG_WARN("push_append_chunk fail", KR(ret), K(append_len)); + } else if (OB_FAIL(get_chunk_idx(start_chunk_pos, start_chunk_idx))) { + LOG_WARN("get_chunk_idx fail", KR(ret), K(start_chunk_pos)); + } else if (append_len > 0 && OB_FAIL(move_data_to_update_buffer(&chunk_data(start_chunk_idx)))) { + LOG_WARN("move_data_to_update_buffer fail", KR(ret), K(start_chunk_pos), K(append_len), K(start_chunk_idx)); + } else if (append_len > 0 && OB_FAIL(update_buffer_.reserve(append_len))) { + LOG_WARN("reserve fail", KR(ret), K(start_chunk_pos), K(append_len), K(start_chunk_idx)); + } else if (append_len > 0 && OB_FAIL(update_buffer_.set_length(update_buffer_.length() + append_len))) { + LOG_WARN("set_length fail", KR(ret), K(start_chunk_pos), K(append_len), K(start_chunk_idx)); + } else if (append_len > 0 && OB_FALSE_IT(chunk_data(start_chunk_idx).data_ = update_buffer_.string())) { + } else { + for (int i = start_chunk_pos, chunk_idx = -1; OB_SUCC(ret) && i <= end_chunk_pos; ++i) { + if (OB_FAIL(get_chunk_idx(i, chunk_idx))) { + LOG_WARN("get_chunk_idx fail", KR(ret), K(i)); + } else if (OB_FAIL(record_chunk_old_data(chunk_idx))) { + LOG_WARN("record_chunk_old_data fail", KR(ret), K(i)); + } else if (i == end_chunk_pos && append_len > 0) { + chunk_index(chunk_idx).byte_len_ = (end_offset - chunk_index(chunk_idx).offset_); + } + } + if (OB_SUCC(ret)) { + if (use_memmove) { + MEMMOVE(chunk_data(start_chunk_idx).data_.ptr() + chunk_index(start_chunk_idx).pos_ + (start_offset - chunk_index(start_chunk_idx).offset_), buf, buf_len); + } else { + MEMCPY(chunk_data(start_chunk_idx).data_.ptr() + chunk_index(start_chunk_idx).pos_ + (start_offset - chunk_index(start_chunk_idx).offset_), buf, buf_len); + } + partial_data_->data_length_ += append_len; + } + // defensive check + if (OB_SUCC(ret) && OB_FAIL(check_data_length())) { + LOG_WARN("check len fail", KR(ret)); + } + } + return ret; +} + +int ObLobCursor::record_chunk_old_data(int chunk_idx) +{ + INIT_SUCC(ret); + ObLobChunkIndex &chunk_index = partial_data_->index_[chunk_idx]; + if (OB_FAIL(set_old_data(chunk_index))) { + LOG_WARN("record_chunk_old_data fail", KR(ret), K(chunk_index)); + } + return ret; +} + +int ObLobCursor::set_old_data(ObLobChunkIndex &chunk_index) +{ + int ret = OB_SUCCESS; + char *buf = nullptr; + if (chunk_index.old_data_idx_ >= 0) { // has set old + } else if (chunk_index.is_add_) { // add no old + } else if (OB_ISNULL(buf = static_cast(allocator_->alloc(chunk_index.byte_len_)))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("alloc fail", KR(ret), K(chunk_index)); + } else { + const ObLobChunkData &chunk_data = partial_data_->data_[chunk_index.data_idx_]; + MEMCPY(buf, chunk_data.data_.ptr() + chunk_index.pos_, chunk_index.byte_len_); + if (OB_FAIL(partial_data_->old_data_.push_back(ObLobChunkData(ObString(chunk_index.byte_len_, buf))))) { + LOG_WARN("push_back fail", KR(ret), K(chunk_index)); + } else { + chunk_index.old_data_idx_ = partial_data_->old_data_.count() - 1; + chunk_index.is_modified_ = 1; + } + } + return ret; +} + +int ObLobCursor::get(int64_t offset, int64_t len, ObString &data) const +{ + INIT_SUCC(ret); + const char *ptr = nullptr;; + if (OB_FAIL(get_ptr(offset, len, ptr))) { + LOG_WARN("get_ptr fail", KR(ret), K(offset), K(len), K(data.length())); + } else if (OB_ISNULL(ptr)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get_ptr fail", KR(ret), K(offset), K(len), K(data.length())); + } else { + data.assign_ptr(ptr, len); + } + return ret; +} + +// if lob has only one chunk and contains all data, will return true +bool ObLobCursor::has_one_chunk_with_all_data() +{ + bool res = false; + if (OB_ISNULL(partial_data_)) { + } else if (1 != partial_data_->index_.count()) { + } else { + res = (ori_data_length_ == chunk_data(0).data_.length()); + } + return res; +} + +int ObLobCursor::get_one_chunk_with_all_data(ObString &data) +{ + INIT_SUCC(ret); + if (OB_ISNULL(partial_data_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("partial_data_ is null", KR(ret), KPC(this)); + } else if (1 != partial_data_->index_.count()) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("partial_data_ has not only one chunk", KR(ret), K(partial_data_->index_.count())); + } else if (ori_data_length_ != chunk_data(0).data_.length()) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("partial_data_ data length incorrect", KR(ret), K(ori_data_length_), K(chunk_data(0))); + } else { + data = chunk_data(0).data_; + LOG_DEBUG("get chunk data success", K(chunk_data(0)), K(data)); + } + return ret; +} + +/********** ObLobCursor ****************/ + +/********** ObLobPartialUpdateRowIter ****************/ + +ObLobPartialUpdateRowIter::~ObLobPartialUpdateRowIter() +{ +} + +int ObLobPartialUpdateRowIter::open(ObLobAccessParam ¶m, ObLobLocatorV2 &delta_lob, ObLobDiffHeader *diff_header) +{ + int ret = OB_SUCCESS; + param_ = ¶m; + delta_lob_ = delta_lob; + char *buf = diff_header->data_; + int64_t data_len = diff_header->persist_loc_size_; + int64_t pos = 0; + if (OB_FAIL(partial_data_.init())) { + LOG_WARN("map create fail", K(ret)); + } else if (OB_FAIL(partial_data_.deserialize(buf, data_len, pos))) { + LOG_WARN("deserialize partial data fail", K(ret), K(data_len), K(pos)); + } else if (OB_FAIL(partial_data_.sort_index())) { + LOG_WARN("sort_index fail", K(ret), K(data_len), K(partial_data_)); + } + return ret; +} + +int ObLobPartialUpdateRowIter::get_next_row(int64_t &offset, ObLobMetaInfo *&old_info, ObLobMetaInfo *&new_info) +{ + int ret = OB_SUCCESS; + bool found_row = false; + for(; OB_SUCC(ret) && ! found_row && chunk_iter_ < partial_data_.index_.count(); ++chunk_iter_) { + ObLobChunkIndex &idx = partial_data_.index_[chunk_iter_]; + if (1 == idx.is_modified_ || 1 == idx.is_add_) { + ObLobChunkData &chunk_data = partial_data_.data_[idx.data_idx_]; + found_row = true; + ObString old_data; + if (! idx.is_add_ && idx.old_data_idx_ >= 0) { + old_data = partial_data_.old_data_[idx.old_data_idx_].data_; + } + if (! idx.is_add_) { + old_meta_info_.lob_id_ = param_->lob_data_->id_; + old_meta_info_.seq_id_ = idx.seq_id_; + // old_meta_info_.offset_ = idx.offset_; + old_meta_info_.byte_len_ = old_data.length(); + // json column schema charset is utf8, may byte_len_ not equal char_len_ + old_meta_info_.char_len_ = old_meta_info_.byte_len_; + old_meta_info_.piece_id_ = ObLobMetaUtil::LOB_META_INLINE_PIECE_ID;; + old_meta_info_.lob_data_ = old_data; + + new_meta_info_.lob_id_ = param_->lob_data_->id_; + new_meta_info_.seq_id_ = idx.seq_id_; + // new_meta_info_.offset_ = idx.offset_; + new_meta_info_.byte_len_ = idx.byte_len_; + new_meta_info_.char_len_ = idx.byte_len_; + new_meta_info_.piece_id_ = ObLobMetaUtil::LOB_META_INLINE_PIECE_ID;; + new_meta_info_.lob_data_.assign_ptr(chunk_data.data_.ptr() + idx.pos_, idx.byte_len_); + + offset = idx.offset_; + old_info = &old_meta_info_; + new_info = &new_meta_info_; + } else { + new_meta_info_.lob_id_ = param_->lob_data_->id_; + // new_meta_info_.offset_ = idx.offset_; + new_meta_info_.byte_len_ = idx.byte_len_; + new_meta_info_.char_len_ = idx.byte_len_; + new_meta_info_.piece_id_ = ObLobMetaUtil::LOB_META_INLINE_PIECE_ID;; + new_meta_info_.lob_data_.assign_ptr(chunk_data.data_.ptr() + idx.pos_, idx.byte_len_); + + offset = idx.offset_; + new_info = &new_meta_info_; + } + } + } + if (OB_FAIL(ret)) { + } else if (found_row) { + } else if (chunk_iter_ == partial_data_.index_.count()) { + ret = OB_ITER_END; + } + return ret; +} +/********** ObLobPartialUpdateRowIter ****************/ + } // storage } // oceanbase diff --git a/src/storage/lob/ob_lob_manager.h b/src/storage/lob/ob_lob_manager.h index 1cc64742e0..bee374cbdb 100644 --- a/src/storage/lob/ob_lob_manager.h +++ b/src/storage/lob/ob_lob_manager.h @@ -109,11 +109,12 @@ public: int open(ObString &data, uint32_t byte_offset, uint32_t byte_len, ObCollationType cs, bool is_reverse = false); // inrow open int open(ObLobAccessParam ¶m, ObLobCtx& lob_ctx, common::ObAddr& dst_addr, bool &is_remote); // open with retry inner int get_next_row(ObString& data); + int get_next_row(ObLobQueryResult &result); // for test uint64_t get_cur_pos() { return meta_iter_.get_cur_pos(); } void reset(); bool is_end() const { return is_end_; } + private: - int get_next_row(ObLobQueryResult &result); // for test bool fill_buffer_to_data(ObString& data); private: // common @@ -137,6 +138,77 @@ private: void* remote_query_ctx_; }; +class ObLobCursor : public ObILobCursor +{ +public: + ObLobCursor(): + param_(nullptr), + allocator_(nullptr), + is_full_mode_(false), + ori_data_length_(0), + partial_data_(nullptr), + getter_() + {} + + ~ObLobCursor(); + int init(ObIAllocator *allocator, ObLobAccessParam* param, ObLobPartialData *partial_data, ObLobCtx &lob_ctx); + int get_data(ObString &data) const; + int64_t get_length() const { return partial_data_->data_length_; } + int reset() { return OB_SUCCESS; } + bool is_full_mode() const { return is_full_mode_; } + bool is_append_chunk(int chunk_pos) const; + int append(const char* buf, int64_t buf_len); + int append(const ObString& data) { return append(data.ptr(), data.length()); } + + int reset_data(const ObString &data); + int set(int64_t offset, const char *buf, int64_t buf_len, bool use_memmove=false); + int get(int64_t offset, int64_t len, ObString &data) const; + // if lob has only one chunk and contains all data, will return true + bool has_one_chunk_with_all_data(); + int get_one_chunk_with_all_data(ObString &data); + + TO_STRING_KV(K(is_full_mode_), K(ori_data_length_)); + +protected: + int get_ptr(int64_t offset, int64_t len, const char *&ptr); + int get_ptr(int64_t offset, int64_t len, const char *&ptr) const{ return const_cast(this)->get_ptr(offset, len, ptr); } + int get_ptr_for_write(int64_t offset, int64_t len, char *&ptr); + +private: + int init_full(ObIAllocator *allocator, ObLobPartialData *partial_data); + int get_chunk_pos(int64_t offset) const; + int64_t get_chunk_offset(int pos) const; + int fetch_meta(int idx, ObLobMetaInfo &meta_info); + int merge_chunk_data(int start_meta_idx, int end_meta_idx); + int get_chunk_data(int chunk_pos, ObString &data); + int get_chunk_idx(int chunk_pos, int &chunk_idx); + int get_chunk_data(int chunk_pos, const ObLobChunkIndex *&chunk_index, const ObLobChunkData *&chunk_data); + int get_last_chunk_data_idx(int &chunk_idx); + + int push_append_chunk(int64_t append_len); + int move_data_to_update_buffer(ObLobChunkData *chunk_data); + int set_old_data(ObLobChunkIndex &chunk_index); + int record_chunk_old_data(int chunk_idx); + int record_chunk_old_data(ObLobChunkIndex *chunk_pos); + int get_chunk_data_start_pos(const int cur_chunk_pos, int &start_pos); + int get_chunk_data_end_pos(const int cur_chunk_pos, int &end_pos); + + ObLobChunkIndex& chunk_index(int chunk_idx) { return partial_data_->index_[chunk_idx]; } + const ObLobChunkIndex& chunk_index(int chunk_idx) const { return partial_data_->index_[chunk_idx]; } + ObLobChunkData& chunk_data(int chunk_idx) { return partial_data_->data_[partial_data_->index_[chunk_idx].data_idx_]; } + const ObLobChunkData& chunk_data(int chunk_idx) const { return partial_data_->data_[partial_data_->index_[chunk_idx].data_idx_]; } + + int check_data_length(); +public: + ObLobAccessParam *param_; + ObIAllocator *allocator_; + ObStringBuffer update_buffer_; + bool is_full_mode_; + int64_t ori_data_length_; + ObLobPartialData *partial_data_; + ObLobMetaSingleGetter getter_; +}; + class ObLobManager { public: @@ -209,6 +281,8 @@ public: // Tmp Delta Lob locator interface int process_delta(ObLobAccessParam& param, ObLobLocatorV2& lob_locator); + int process_diff(ObLobAccessParam& param, ObLobLocatorV2& lob_locator, ObLobDiffHeader *diff_header); + // Lob data interface int append(ObLobAccessParam& param, ObString& data); @@ -221,6 +295,15 @@ public: ObString& data); int query(ObLobAccessParam& param, ObLobQueryIter *&result); + int query(ObString& data, + ObLobQueryIter *&result); + int query( + ObIAllocator *allocator, + ObLobLocatorV2 &locator, + int64_t query_timeout_ts, + bool is_load_all, + ObLobPartialData *partial_data, + ObLobCursor *&cursor); int write(ObLobAccessParam& param, ObString& data); int write(ObLobAccessParam& param, @@ -260,7 +343,14 @@ public: uint64_t len, int64_t timeout, ObLobLocatorV2 &lob); + + common::ObIAllocator& get_ext_info_log_allocator() { return ext_info_log_allocator_; } + static bool lob_handle_has_char_len(ObLobAccessParam& param); + static int64_t* get_char_len_ptr(ObLobAccessParam& param); + static int update_out_ctx(ObLobAccessParam& param, ObLobMetaInfo *old_info, ObLobMetaInfo& new_info); + inline bool can_write_inrow(uint64_t len, int64_t inrow_threshold) { return len <= inrow_threshold; } + private: // private function int write_inrow_inner(ObLobAccessParam& param, ObString& data, ObString& old_data); @@ -271,6 +361,12 @@ private: int query_inrow_get_iter(ObLobAccessParam& param, ObString &data, uint32_t offset, bool scan_backward, ObLobQueryIter *&result); int erase_imple_inner(ObLobAccessParam& param); + + int update( + ObLobAccessParam& param, + ObLobQueryResult old_row, + ObString& data); + // write mini unit, write lob data, write meta tablet, write piece tablet int write_one_piece(ObLobAccessParam& param, common::ObTabletID& piece_tablet_id, @@ -305,7 +401,6 @@ private: bool alloc_inside, bool &need_out_row); int init_out_row_ctx(ObLobAccessParam& param, uint64_t len, ObLobDataOutRowCtx::OpType op); - int update_out_ctx(ObLobAccessParam& param, ObLobMetaInfo *old_info, ObLobMetaInfo& new_info); int check_handle_size(ObLobAccessParam& param); int erase_process_meta_info(ObLobAccessParam& param, ObLobMetaScanIter &meta_iter, ObLobQueryResult &result, ObString &tmp_buff); int prepare_for_write(ObLobAccessParam& param, @@ -329,12 +424,12 @@ private: int fill_zero(char *ptr, uint64_t length, bool is_char, const ObCollationType coll_type, uint32_t byte_len, uint32_t byte_offset, uint32_t &char_len); int prepare_lob_common(ObLobAccessParam& param, bool &alloc_inside); - bool lob_handle_has_char_len(ObLobAccessParam& param); - int64_t* get_char_len_ptr(ObLobAccessParam& param); int fill_lob_locator_extern(ObLobAccessParam& param); + int compare(ObLobAccessParam& param_left, ObLobAccessParam& param_right, int64_t& result); + int load_all(ObLobAccessParam ¶m, ObLobPartialData &partial_data); void transform_lob_id(uint64_t src, uint64_t &dst); private: static const int64_t DEFAULT_LOB_META_BUCKET_CNT = 1543; @@ -347,6 +442,36 @@ private: ObLobCtx lob_ctx_; ObLobMetaManager meta_manager_; ObLobPieceManager piece_manager_; + common::ObFIFOAllocator ext_info_log_allocator_; +}; + +class ObLobPartialUpdateRowIter +{ +public: + ObLobPartialUpdateRowIter(): + param_(nullptr), seq_id_tmp_(0), chunk_iter_(0) + {} + + ~ObLobPartialUpdateRowIter(); + + int open(ObLobAccessParam ¶m, ObLobLocatorV2 &delta_lob, ObLobDiffHeader *diff_header); + + int get_next_row(int64_t &offset, ObLobMetaInfo *&old_info, ObLobMetaInfo *&new_info); + + int64_t get_chunk_size() const { return partial_data_.chunk_size_; } + int64_t get_modified_chunk_cnt() const { return partial_data_.get_modified_chunk_cnt(); } + +private: + ObLobMetaInfo old_meta_info_; + ObLobMetaInfo new_meta_info_; + + // updated lob + ObLobAccessParam *param_; + int32_t seq_id_tmp_; + + ObLobLocatorV2 delta_lob_; + ObLobPartialData partial_data_; + int chunk_iter_; }; } // storage diff --git a/src/storage/lob/ob_lob_meta.cpp b/src/storage/lob/ob_lob_meta.cpp index 45c37c0237..d3378e9757 100644 --- a/src/storage/lob/ob_lob_meta.cpp +++ b/src/storage/lob/ob_lob_meta.cpp @@ -17,6 +17,7 @@ #include "ob_lob_meta.h" #include "ob_lob_seq.h" #include "ob_lob_manager.h" +#include "storage/tx_storage/ob_access_service.h" #include "storage/access/ob_table_scan_iterator.h" namespace oceanbase @@ -990,6 +991,19 @@ int ObLobMetaManager::scan(ObLobAccessParam& param, ObLobMetaScanIter &iter) } return ret; } + +int ObLobMetaManager::open(ObLobAccessParam ¶m, ObLobMetaSingleGetter* getter) +{ + int ret = OB_SUCCESS; + ObILobApator *apator = &persistent_lob_adapter_; + if (OB_FAIL(getter->open(param, apator))) { + LOG_WARN("open lob scan iter failed.", K(ret), K(param)); + } else { + getter->lob_adatper_ = apator; + } + return ret; +} + // erase specified range int ObLobMetaManager::erase(ObLobAccessParam& param, ObLobMetaInfo& in_row) { @@ -1021,5 +1035,171 @@ int ObLobMetaManager::fetch_lob_id(ObLobAccessParam& param, uint64_t &lob_id) return ret; } + +int ObLobMetaWriteRowIter::get_next_row(ObNewRow *&row) +{ + int ret = OB_SUCCESS; + if (OB_FAIL(meta_iter_->get_next_row(result_))) { + if (ret != OB_ITER_END) { + LOG_WARN("get next meta info failed.", K(ret)); + } + } else { + result_.info_.lob_data_.assign_ptr(result_.data_.ptr(), result_.data_.length()); + for (int64_t i = 0; i < ObLobMetaUtil::LOB_META_COLUMN_CNT; ++i) { + row_cell_[i].reset(); + row_cell_[i].set_nop_value(); + } + row_cell_[ObLobMetaUtil::LOB_ID_COL_ID].set_varchar(reinterpret_cast(&result_.info_.lob_id_), sizeof(ObLobId)); + row_cell_[ObLobMetaUtil::LOB_ID_COL_ID].set_collation_type(common::ObCollationType::CS_TYPE_BINARY); + row_cell_[ObLobMetaUtil::SEQ_ID_COL_ID].set_varchar(result_.info_.seq_id_); + row_cell_[ObLobMetaUtil::SEQ_ID_COL_ID].set_collation_type(common::ObCollationType::CS_TYPE_BINARY); + row_cell_[ObLobMetaUtil::BYTE_LEN_COL_ID].set_uint32(result_.info_.byte_len_); + row_cell_[ObLobMetaUtil::CHAR_LEN_COL_ID].set_uint32(result_.info_.char_len_); + row_cell_[ObLobMetaUtil::PIECE_ID_COL_ID].set_uint64(result_.info_.piece_id_); + + row_cell_[ObLobMetaUtil::LOB_DATA_COL_ID].set_varchar(result_.info_.lob_data_); + row_cell_[ObLobMetaUtil::LOB_DATA_COL_ID].set_collation_type(common::ObCollationType::CS_TYPE_BINARY); + + new_row_.assign(row_cell_, ObLobMetaUtil::LOB_META_COLUMN_CNT); + // refresh param + // refrsh outrow ctx for cdc + if (OB_FAIL(ret)) { + } else if (OB_FAIL(ObLobManager::update_out_ctx(*param_, nullptr, result_.info_))) { // new row + LOG_WARN("failed update checksum.", K(ret)); + } else if (OB_FAIL(update_seq_no())) { + LOG_WARN("update_seq_no fail", K(ret)); + } else { + param_->lob_data_->byte_size_ += result_.info_.byte_len_; + param_->byte_size_ = param_->lob_data_->byte_size_; + if (ObLobManager::lob_handle_has_char_len(*param_)) { + int64_t *len = ObLobManager::get_char_len_ptr(*param_); + *len = *len + result_.info_.char_len_; + OB_ASSERT(*len >= 0); + } + // set output + row = &new_row_; + } + } + return ret; +} + +int ObLobMetaWriteRowIter::update_seq_no() +{ + int ret = OB_SUCCESS; + if (param_->seq_no_st_.is_valid()) { + if (param_->used_seq_cnt_ < param_->total_seq_cnt_) { + param_->dml_base_param_->spec_seq_no_ = param_->seq_no_st_ + param_->used_seq_cnt_; + param_->used_seq_cnt_++; + LOG_DEBUG("dml lob meta with seq no", K(param_->dml_base_param_->spec_seq_no_)); + } else { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("failed to get seq no from param.", K(ret), KPC(param_)); + } + } else { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid seq no from param.", K(ret), KPC(param_)); + } + return ret; +} + +ObLobMetaSingleGetter::~ObLobMetaSingleGetter() +{ + if (OB_NOT_NULL(row_objs_)) { + param_->allocator_->free(row_objs_); + row_objs_ = nullptr; + } + if (OB_NOT_NULL(scan_iter_)) { + scan_iter_->reset(); + if (lob_adatper_ != NULL) { + lob_adatper_->revert_scan_iter(scan_iter_); + } + scan_iter_ = nullptr; + } +} + +int ObLobMetaSingleGetter::open(ObLobAccessParam ¶m, ObILobApator* lob_adatper) +{ + int ret = OB_SUCCESS; + void *buf = nullptr; + if (OB_FAIL(lob_adatper->prepare_single_get(param, scan_param_, table_id_))) { + LOG_WARN("failed to open iter", K(ret)); + } else if (OB_ISNULL(buf = param.allocator_->alloc(sizeof(ObObj) * 4))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("alloc range obj failed.", K(ret)); + } else { + row_objs_ = reinterpret_cast(buf); + param_ = ¶m; + } + return ret; +} + +int ObLobMetaSingleGetter::get_next_row(ObString &seq_id, ObLobMetaInfo &info) +{ + int ret = OB_SUCCESS; + common::ObNewRowIterator *iter = nullptr; + blocksstable::ObDatumRow* datum_row = nullptr; + ObObj *row_objs = row_objs_; + const char *lob_id_ptr = reinterpret_cast(¶m_->lob_data_->id_); + row_objs[0].reset(); + row_objs[0].set_varchar(lob_id_ptr, sizeof(ObLobId)); // lob_id + row_objs[0].set_collation_type(common::ObCollationType::CS_TYPE_BINARY); + row_objs[1].reset(); + row_objs[1].set_varchar(seq_id.ptr(), seq_id.length()); // lob_id + row_objs[1].set_collation_type(common::ObCollationType::CS_TYPE_BINARY); + ObRowkey min_row_key(row_objs, 2); + + row_objs[2].reset(); + row_objs[2].set_varchar(lob_id_ptr, sizeof(ObLobId)); // lob_id + row_objs[2].set_collation_type(common::ObCollationType::CS_TYPE_BINARY); + row_objs[3].reset(); + row_objs[3].set_varchar(seq_id.ptr(), seq_id.length()); // lob_id + row_objs[3].set_collation_type(common::ObCollationType::CS_TYPE_BINARY); + ObRowkey max_row_key(row_objs + 2, 2); + + common::ObNewRange range; + range.table_id_ = table_id_; + range.start_key_ = min_row_key; + range.end_key_ = max_row_key; + range.border_flag_.set_inclusive_start(); + range.border_flag_.set_inclusive_end(); + scan_param_.key_ranges_.reset(); + + ObAccessService *oas = MTL(ObAccessService*); + if (OB_FAIL(scan_param_.key_ranges_.push_back(range))) { + LOG_WARN("failed to push key range.", K(ret), K(scan_param_), K(range)); + } else if (OB_ISNULL(oas)) { + ret = OB_ERR_INTERVAL_INVALID; + LOG_WARN("get access service failed.", K(ret)); + } else if (OB_NOT_NULL(scan_iter_)) { + scan_iter_->reuse(); + if (OB_FAIL(scan_iter_->rescan(scan_param_))) { + LOG_WARN("rescan fali", K(ret), K(scan_param_)); + } + } else if (OB_FAIL(oas->table_scan(scan_param_, iter))) { + LOG_WARN("do table scan fali", K(ret), K(scan_param_)); + } else { + scan_iter_ = static_cast(iter); + iter = nullptr; + } + if (OB_FAIL(ret)) { + } else if (OB_FAIL(scan_iter_->get_next_row(datum_row))) { + LOG_WARN("get next row failed.", K(ret)); + } else if (OB_FAIL(ObLobMetaUtil::transform_from_row_to_info(datum_row, info, false))) { + LOG_WARN("failed to transform row.", K(ret)); + } + if (OB_FAIL(ret)) { + if (iter != nullptr) { + iter->reset(); + if (lob_adatper_ != nullptr) { + lob_adatper_->revert_scan_iter(iter); + } + iter = nullptr; + scan_iter_ = nullptr; + } + } + return ret; +} + + } } diff --git a/src/storage/lob/ob_lob_meta.h b/src/storage/lob/ob_lob_meta.h index b87ab4cc33..d1478711e7 100644 --- a/src/storage/lob/ob_lob_meta.h +++ b/src/storage/lob/ob_lob_meta.h @@ -25,6 +25,8 @@ namespace oceanbase namespace storage { +class ObLobMetaSingleGetter; + class ObLobMetaUtil { public: static const uint64_t LOB_META_COLUMN_CNT = 6; @@ -195,6 +197,9 @@ public: int update(ObLobAccessParam& param, ObLobMetaInfo& old_row, ObLobMetaInfo& new_row); // fetch lob id int fetch_lob_id(ObLobAccessParam& param, uint64_t &lob_id); + + int open(ObLobAccessParam ¶m, ObLobMetaSingleGetter* getter); + TO_STRING_KV("[LOB]", "meta mngr"); private: // lob adaptor @@ -222,6 +227,61 @@ OB_INLINE int64_t ob_lob_writer_length_validation(const common::ObCollationType return len_ret; } + +class ObLobMetaWriteRowIter: public ObNewRowIterator +{ +public: + ObLobMetaWriteRowIter() : param_(nullptr), meta_iter_(nullptr), new_row_(), row_cell_(), result_() {} + ObLobMetaWriteRowIter(ObLobAccessParam *param, ObLobMetaWriteIter *meta_iter) + : param_(param), meta_iter_(meta_iter), new_row_(), row_cell_(), result_() + {} + virtual ~ObLobMetaWriteRowIter() {} + virtual int get_next_row(ObNewRow *&row); + virtual void reset() { new_row_.reset(); } +private: + int update_seq_no(); + +private: + // disallow copy + DISALLOW_COPY_AND_ASSIGN(ObLobMetaWriteRowIter); +private: + // data members + ObLobAccessParam *param_; + ObLobMetaWriteIter *meta_iter_; + ObNewRow new_row_; + ObObj row_cell_[ObLobMetaUtil::LOB_META_COLUMN_CNT]; + ObLobMetaWriteResult result_; +}; + +class ObLobMetaSingleGetter +{ +public: + ObLobMetaSingleGetter(): + param_(nullptr), + scan_param_(), + row_objs_(nullptr), + table_id_(0), + lob_adatper_(nullptr), + scan_iter_(nullptr) + {} + + ~ObLobMetaSingleGetter(); + + int open(ObLobAccessParam ¶m, ObILobApator* lob_adatper); + + int get_next_row(ObString &seq_id, ObLobMetaInfo &info); + +private: + ObLobAccessParam *param_; + ObTableScanParam scan_param_; + ObObj *row_objs_; + uint64_t table_id_; +public: + ObILobApator *lob_adatper_; + ObTableScanIterator *scan_iter_; +}; + + } // storage } // oceanbase diff --git a/src/storage/lob/ob_lob_persistent_adaptor.cpp b/src/storage/lob/ob_lob_persistent_adaptor.cpp index c6bc241210..af2a07cf6d 100644 --- a/src/storage/lob/ob_lob_persistent_adaptor.cpp +++ b/src/storage/lob/ob_lob_persistent_adaptor.cpp @@ -203,7 +203,7 @@ int ObPersistentLobApator::scan_lob_meta( scan_param.schema_version_ = lob_meta_tablet.get_obj()->get_tablet_meta().max_sync_storage_schema_version_; const uint64_t table_id = 0; scan_param.table_param_ = param.meta_tablet_param_; - if (OB_FAIL(build_common_scan_param(param, table_id, ObLobMetaUtil::LOB_META_COLUMN_CNT, scan_param))) { + if (OB_FAIL(build_common_scan_param(param, table_id, false, ObLobMetaUtil::LOB_META_COLUMN_CNT, scan_param))) { LOG_WARN("build common scan param failed.", K(ret)); } else if (OB_FAIL(prepare_table_param(param, scan_param, true))) { LOG_WARN("prepare lob meta table param failed.", K(ret)); @@ -278,7 +278,7 @@ int ObPersistentLobApator::get_lob_data( const uint64_t table_id = 0; bool tmp_scan_backward = param.scan_backward_; param.scan_backward_ = false; - if (OB_FAIL(build_common_scan_param(param, table_id, ObLobPieceUtil::LOB_PIECE_COLUMN_CNT, scan_param))) { + if (OB_FAIL(build_common_scan_param(param, table_id, false, ObLobPieceUtil::LOB_PIECE_COLUMN_CNT, scan_param))) { LOG_WARN("build common scan param failed.", K(ret)); } else if (OB_FAIL(prepare_table_param(param, scan_param, false))) { LOG_WARN("prepare lob meta table param failed.", K(ret)); @@ -436,6 +436,7 @@ int ObPersistentLobApator::build_lob_meta_table_dml( dml_base_param.encrypt_meta_ = &dml_base_param.encrypt_meta_legacy_; dml_base_param.snapshot_ = param.snapshot_; dml_base_param.check_schema_version_ = false; // lob tablet should not check schema version + dml_base_param.write_flag_.set_is_insert_up(); dml_base_param.write_flag_.set_lob_aux(); if (param.seq_no_st_.is_valid()) { if (param.used_seq_cnt_ < param.total_seq_cnt_) { @@ -922,6 +923,7 @@ int ObPersistentLobApator::get_lob_tablet_schema( int ObPersistentLobApator::build_common_scan_param( const ObLobAccessParam ¶m, const uint64_t table_id, + bool is_get, uint32_t col_num, ObTableScanParam& scan_param) { @@ -954,7 +956,7 @@ int ObPersistentLobApator::build_common_scan_param( scan_param.reserved_cell_count_ = scan_param.column_ids_.count(); // table param scan_param.index_id_ = table_id; // table id - scan_param.is_get_ = false; + scan_param.is_get_ = is_get; // set timeout scan_param.timeout_ = param.timeout_; // scan_param.virtual_column_exprs_ @@ -1010,18 +1012,26 @@ int ObPersistentLobApator::get_lob_tablets( ObTabletBindingMdsUserData ddl_data; if (OB_FAIL(inner_get_tablet(param, param.tablet_id_, data_tablet))) { LOG_WARN("failed to get data tablet", K(ret), K(param.ls_id_), K(param.tablet_id_)); - } else if (OB_FAIL(data_tablet.get_obj()->ObITabletMdsInterface::get_ddl_data(share::SCN::max_scn(), ddl_data))) { - LOG_WARN("failed to get ddl data from tablet", K(ret), K(data_tablet)); } else { - const common::ObTabletID &lob_meta_tablet_id = ddl_data.lob_meta_tablet_id_; - const common::ObTabletID &lob_piece_tablet_id = ddl_data.lob_piece_tablet_id_; - if (OB_UNLIKELY(check_lob_tablet_id(param.tablet_id_, lob_meta_tablet_id, lob_piece_tablet_id))) { - ret = OB_INVALID_ARGUMENT; - LOG_WARN("invalid data or lob tablet id.", K(ret), K(param.tablet_id_), K(lob_meta_tablet_id), K(lob_piece_tablet_id)); - } else if (OB_FAIL(inner_get_tablet(param, lob_meta_tablet_id, lob_meta_tablet))) { - LOG_WARN("get lob meta tablet failed.", K(ret), K(lob_meta_tablet_id)); - } else if (OB_FAIL(inner_get_tablet(param, lob_piece_tablet_id, lob_piece_tablet))) { - LOG_WARN("get lob meta tablet failed.", K(ret), K(lob_piece_tablet_id)); + if (!param.lob_meta_tablet_id_.is_valid() || !param.lob_piece_tablet_id_.is_valid()) { + if (OB_FAIL(data_tablet.get_obj()->ObITabletMdsInterface::get_ddl_data(share::SCN::max_scn(), ddl_data))) { + LOG_WARN("failed to get ddl data from tablet", K(ret), K(data_tablet)); + } else { + param.lob_meta_tablet_id_ = ddl_data.lob_meta_tablet_id_; + param.lob_piece_tablet_id_ = ddl_data.lob_piece_tablet_id_; + } + } + if (OB_SUCC(ret)) { + const common::ObTabletID &lob_meta_tablet_id = param.lob_meta_tablet_id_; + const common::ObTabletID &lob_piece_tablet_id = param.lob_piece_tablet_id_; + if (OB_UNLIKELY(check_lob_tablet_id(param.tablet_id_, lob_meta_tablet_id, lob_piece_tablet_id))) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid data or lob tablet id.", K(ret), K(param.tablet_id_), K(lob_meta_tablet_id), K(lob_piece_tablet_id)); + } else if (OB_FAIL(inner_get_tablet(param, lob_meta_tablet_id, lob_meta_tablet))) { + LOG_WARN("get lob meta tablet failed.", K(ret), K(lob_meta_tablet_id)); + } else if (OB_FAIL(inner_get_tablet(param, lob_piece_tablet_id, lob_piece_tablet))) { + LOG_WARN("get lob meta tablet failed.", K(ret), K(lob_piece_tablet_id)); + } } } return ret; @@ -1146,6 +1156,31 @@ int ObLobUpdIterator::get_next_row(ObNewRow *&row) return ret; } +int ObPersistentLobApator::prepare_single_get( + ObLobAccessParam ¶m, + ObTableScanParam &scan_param, + uint64_t &table_id) +{ + int ret = OB_SUCCESS; + ObTabletHandle data_tablet; + ObTabletHandle lob_meta_tablet; + ObTabletHandle lob_piece_tablet; + if (OB_FAIL(get_lob_tablets(param, data_tablet, lob_meta_tablet, lob_piece_tablet))) { + LOG_WARN("failed to get tablets.", K(ret), K(param)); + } else { + uint64_t tenant_id = MTL_ID(); + scan_param.tablet_id_ = lob_meta_tablet.get_obj()->get_tablet_meta().tablet_id_; + scan_param.schema_version_ = lob_meta_tablet.get_obj()->get_tablet_meta().max_sync_storage_schema_version_; + scan_param.table_param_ = param.meta_tablet_param_; + if (OB_FAIL(build_common_scan_param(param, table_id, true, ObLobMetaUtil::LOB_META_COLUMN_CNT, scan_param))) { + LOG_WARN("build common scan param failed.", K(ret)); + } else if (OB_FAIL(prepare_table_param(param, scan_param, true))) { + LOG_WARN("prepare lob meta table param failed.", K(ret)); + } + } + return ret; +} + } // storage } // oceanbase diff --git a/src/storage/lob/ob_lob_persistent_adaptor.h b/src/storage/lob/ob_lob_persistent_adaptor.h index 8bb1df34d3..38e6f99f08 100644 --- a/src/storage/lob/ob_lob_persistent_adaptor.h +++ b/src/storage/lob/ob_lob_persistent_adaptor.h @@ -89,7 +89,7 @@ private: int64_t &tenant_schema_version); int get_lob_tablets( - ObLobAccessParam& param, + ObLobAccessParam ¶m, ObTabletHandle &data_tablet, ObTabletHandle &lob_meta_tablet, ObTabletHandle &lob_piece_tablet); @@ -105,6 +105,7 @@ private: int build_common_scan_param( const ObLobAccessParam ¶m, const uint64_t table_id, + bool is_get, uint32_t col_num, ObTableScanParam& scan_param); int inner_get_tablet( @@ -159,6 +160,11 @@ private: const ObLobAccessParam ¶m, ObTableScanParam &scan_param); + int prepare_single_get( + ObLobAccessParam ¶m, + ObTableScanParam &scan_param, + uint64_t &table_id); + private: const uint64_t tenant_id_; diff --git a/src/storage/lob/ob_lob_seq.cpp b/src/storage/lob/ob_lob_seq.cpp index 370ac3ab60..6aa7200c3d 100644 --- a/src/storage/lob/ob_lob_seq.cpp +++ b/src/storage/lob/ob_lob_seq.cpp @@ -33,6 +33,21 @@ uint32_t ObLobSeqId::load32be(const char *ptr) { return ntohl(val); } +int ObLobSeqId::get_seq_id(int64_t idx, ObString &seq_id) +{ + INIT_SUCC(ret); + if (seq_id.length() < sizeof(uint32_t)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("seq id buffer not enough", K(ret), K(idx), K(seq_id)); + } else if ((idx + 1) * ObLobSeqId::LOB_SEQ_STEP_LEN > ObLobSeqId::LOB_SEQ_STEP_MAX) { + LOG_WARN("idx too big", K(ret), K(idx)); + } else { + uint32_t seq_num = (idx + 1) * ObLobSeqId::LOB_SEQ_STEP_LEN; + ObLobSeqId::store32be(seq_id.ptr(), seq_num); + } + return ret; +} + ObLobSeqId::ObLobSeqId(const ObString& seq_id, ObIAllocator* allocator) : allocator_(allocator), seq_id_(seq_id), diff --git a/src/storage/lob/ob_lob_seq.h b/src/storage/lob/ob_lob_seq.h index e57a8c9e4e..2c634f4ef8 100644 --- a/src/storage/lob/ob_lob_seq.h +++ b/src/storage/lob/ob_lob_seq.h @@ -43,6 +43,7 @@ public: void reset(); static char* store32be(char *ptr, uint32_t val); static uint32_t load32be(const char *ptr); + static int get_seq_id(int64_t idx, ObString &seq_id); int64_t to_string(char* buf, const int64_t buf_len) const; private: int init_digits(); diff --git a/src/storage/lob/ob_lob_util.cpp b/src/storage/lob/ob_lob_util.cpp index d83a001a3b..a110ab1add 100644 --- a/src/storage/lob/ob_lob_util.cpp +++ b/src/storage/lob/ob_lob_util.cpp @@ -26,6 +26,15 @@ using namespace transaction; namespace storage { +ObCollationType ObLobCharsetUtil::get_collation_type(ObObjType type, ObCollationType ori_coll_type) +{ + ObCollationType coll_type = ori_coll_type; + if (ob_is_json(type)) { + coll_type = CS_TYPE_BINARY; + } + return coll_type; +} + int ObLobAccessParam::set_lob_locator(common::ObLobLocatorV2 *lob_locator) { int ret = OB_SUCCESS; @@ -50,6 +59,51 @@ int ObLobAccessParam::set_lob_locator(common::ObLobLocatorV2 *lob_locator) return ret; } +int64_t ObLobAccessParam::get_schema_chunk_size() const +{ + uint64_t chunk_size = 0; + if (0 == schema_chunk_size_ || schema_chunk_size_ > ObLobMetaUtil::LOB_OPER_PIECE_DATA_SIZE) { + chunk_size = ObLobMetaUtil::LOB_OPER_PIECE_DATA_SIZE; + } else { + chunk_size = schema_chunk_size_; + } + return chunk_size; +} + +bool ObLobAccessParam::has_store_chunk_size() const +{ + bool bres = false; + if (OB_ISNULL(lob_common_)) { + } else if (lob_common_->in_row_ || ! lob_common_->is_init_) { + } else if (OB_ISNULL(lob_data_)) { + } else { + bres = true; + } + return bres; +} + +// chunk size can be changed online. +// that means lob data that has been writed may have different chunk size with schema +// so here should get chunk size according context +int ObLobAccessParam::get_store_chunk_size(int64_t &chunk_size) const +{ + int ret = OB_SUCCESS; + if (OB_ISNULL(lob_common_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("lob_common_ is null", KR(ret), KPC(this)); + } else if (lob_common_->in_row_ || ! lob_common_->is_init_) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("lob_common_ is not outrow", KR(ret), KPC(lob_common_), KPC(this)); + } else if (OB_ISNULL(lob_data_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("lob_data_ is null", KR(ret), KPC(lob_common_), KPC(this)); + } else { + ObLobDataOutRowCtx *outrow_ctx = reinterpret_cast(lob_data_->buffer_); + chunk_size = outrow_ctx->get_real_chunk_size(); + } + return ret; +} + int64_t ObLobAccessParam::get_inrow_threshold() { int64_t res = inrow_threshold_; @@ -117,6 +171,7 @@ int ObInsertLobColumnHelper::end_trans(transaction::ObTxDesc *tx_desc, int ObInsertLobColumnHelper::insert_lob_column(ObIAllocator &allocator, const share::ObLSID ls_id, const common::ObTabletID tablet_id, + const ObObjType &obj_type, const ObCollationType &cs_type, const ObLobStorageParam &lob_storage_param, blocksstable::ObStorageDatum &datum, @@ -168,7 +223,7 @@ int ObInsertLobColumnHelper::insert_lob_column(ObIAllocator &allocator, lob_param.sql_mode_ = SMO_DEFAULT; lob_param.ls_id_ = ls_id; lob_param.tablet_id_ = tablet_id; - lob_param.coll_type_ = cs_type; + lob_param.coll_type_ = ObLobCharsetUtil::get_collation_type(obj_type, cs_type); lob_param.allocator_ = &allocator; lob_param.lob_common_ = nullptr; lob_param.timeout_ = timeout_ts; @@ -197,6 +252,7 @@ int ObInsertLobColumnHelper::insert_lob_column(ObIAllocator &allocator, int ObInsertLobColumnHelper::insert_lob_column(ObIAllocator &allocator, const share::ObLSID ls_id, const common::ObTabletID tablet_id, + const ObObjType &obj_type, const ObCollationType &cs_type, const ObLobStorageParam &lob_storage_param, ObObj &obj, @@ -205,7 +261,7 @@ int ObInsertLobColumnHelper::insert_lob_column(ObIAllocator &allocator, int ret = OB_SUCCESS; ObStorageDatum datum; datum.from_obj(obj); - if (OB_SUCC(insert_lob_column(allocator, ls_id, tablet_id, cs_type, lob_storage_param, datum, timeout_ts, obj.has_lob_header(), MTL_ID()))) { + if (OB_SUCC(insert_lob_column(allocator, ls_id, tablet_id, obj_type, cs_type, lob_storage_param, datum, timeout_ts, obj.has_lob_header(), MTL_ID()))) { obj.set_lob_value(obj.get_type(), datum.get_string().ptr(), datum.get_string().length()); } return ret; @@ -216,6 +272,7 @@ int ObInsertLobColumnHelper::insert_lob_column(ObIAllocator &allocator, const share::ObLSID ls_id, const common::ObTabletID tablet_id, const ObLobId &lob_id, + const ObObjType &obj_type, const ObCollationType collation_type, const ObLobStorageParam &lob_storage_param, blocksstable::ObStorageDatum &datum, @@ -287,5 +344,227 @@ int ObInsertLobColumnHelper::insert_lob_column(ObIAllocator &allocator, return ret; } +OB_DEF_SERIALIZE_SIZE(ObLobChunkIndex) +{ + int64_t len = 0; + OB_UNIS_ADD_LEN(seq_id_); + OB_UNIS_ADD_LEN(offset_); + OB_UNIS_ADD_LEN(pos_); + OB_UNIS_ADD_LEN(byte_len_); + OB_UNIS_ADD_LEN(flag_); + OB_UNIS_ADD_LEN(data_idx_); + OB_UNIS_ADD_LEN(old_data_idx_); + return len; +} + +OB_DEF_SERIALIZE(ObLobChunkIndex) +{ + int ret = OB_SUCCESS; + OB_UNIS_ENCODE(seq_id_); + OB_UNIS_ENCODE(offset_); + OB_UNIS_ENCODE(pos_); + OB_UNIS_ENCODE(byte_len_); + OB_UNIS_ENCODE(flag_); + OB_UNIS_ENCODE(data_idx_); + OB_UNIS_ENCODE(old_data_idx_); + return ret; +} + +OB_DEF_DESERIALIZE(ObLobChunkIndex) +{ + int ret = OB_SUCCESS; + OB_UNIS_DECODE(seq_id_); + OB_UNIS_DECODE(offset_); + OB_UNIS_DECODE(pos_); + OB_UNIS_DECODE(byte_len_); + OB_UNIS_DECODE(flag_); + OB_UNIS_DECODE(data_idx_); + OB_UNIS_DECODE(old_data_idx_); + return ret; +} + +class ObLobChunkIndexComparator +{ +public: + bool operator()(const ObLobChunkIndex &a, const ObLobChunkIndex &b) const + { + return a.offset_ < b.offset_; + } +}; + +OB_DEF_SERIALIZE_SIZE(ObLobChunkData) +{ + int64_t len = 0; + OB_UNIS_ADD_LEN(data_); + return len; +} + +OB_DEF_SERIALIZE(ObLobChunkData) +{ + int ret = OB_SUCCESS; + OB_UNIS_ENCODE(data_) + return ret; +} + +OB_DEF_DESERIALIZE(ObLobChunkData) +{ + int ret = OB_SUCCESS; + OB_UNIS_DECODE(data_) + return ret; +} + +OB_DEF_SERIALIZE_SIZE(ObLobPartialData) +{ + int64_t len = 0; + OB_UNIS_ADD_LEN(chunk_size_); + OB_UNIS_ADD_LEN(data_length_); + OB_UNIS_ADD_LEN(locator_); + OB_UNIS_ADD_LEN(index_.count()); + for (int i = 0; i < index_.count(); ++i) { + len += index_[i].get_serialize_size(); + } + OB_UNIS_ADD_LEN(data_.count()); + for (int i = 0; i < data_.count(); ++i) { + len += data_[i].get_serialize_size(); + } + OB_UNIS_ADD_LEN(old_data_.count()); + for (int i = 0; i < old_data_.count(); ++i) { + len += old_data_[i].get_serialize_size(); + } + return len; +} + +OB_DEF_SERIALIZE(ObLobPartialData) +{ + int ret = OB_SUCCESS; + int32_t index_count = index_.count(); + int32_t data_count = data_.count(); + int32_t old_data_count = old_data_.count(); + + OB_UNIS_ENCODE(chunk_size_); + OB_UNIS_ENCODE(data_length_); + OB_UNIS_ENCODE(locator_); + OB_UNIS_ENCODE(index_count); + for (int i = 0; OB_SUCC(ret) && i < index_count; ++i) { + if (OB_FAIL(index_[i].serialize(buf, buf_len, pos))) { + LOG_ERROR("serialize failed", K(ret), K(pos), K(buf_len)); + } + } + OB_UNIS_ENCODE(data_count); + for (int i = 0; OB_SUCC(ret) && i < data_count; ++i) { + if (OB_FAIL(data_[i].serialize(buf, buf_len, pos))) { + LOG_ERROR("serialize failed", K(ret), K(pos), K(buf_len), K(i)); + } + } + OB_UNIS_ENCODE(old_data_count); + for (int i = 0; OB_SUCC(ret) && i < old_data_count; ++i) { + if (OB_FAIL(old_data_[i].serialize(buf, buf_len, pos))) { + LOG_ERROR("serialize failed", K(ret), K(pos), K(buf_len), K(i)); + } + } + return ret; +} + +OB_DEF_DESERIALIZE(ObLobPartialData) +{ + int ret = OB_SUCCESS; + int32_t index_count = 0; + int32_t data_count = 0; + int32_t old_data_count = 0; + + OB_UNIS_DECODE(chunk_size_); + OB_UNIS_DECODE(data_length_); + OB_UNIS_DECODE(locator_); + OB_UNIS_DECODE(index_count); + for (int32_t i = 0; OB_SUCC(ret) && i < index_count; ++i) { + ObLobChunkIndex idx; + int32_t data_idx = 0; + if (OB_FAIL(idx.deserialize(buf, data_len, pos))) { + LOG_ERROR("deserialize chunk idx failed", K(ret), K(pos), K(data_len), K(i)); + } else if (OB_FAIL(push_chunk_index(idx))) { + LOG_ERROR("deserialize push_back failed", K(ret), K(pos), K(data_len), K(i)); + } + } + OB_UNIS_DECODE(data_count); + for (int32_t i = 0; OB_SUCC(ret) && i < data_count; ++i) { + ObLobChunkData data; + if (OB_FAIL(data.deserialize(buf, data_len, pos))) { + LOG_ERROR("deserialize failed", K(ret), K(pos), K(data_len), K(i)); + } else if (OB_FAIL(data_.push_back(data))) { + LOG_ERROR("deserialize failed", K(ret), K(pos), K(data_len), K(i)); + } + } + OB_UNIS_DECODE(old_data_count); + for (int32_t i = 0; OB_SUCC(ret) && i < old_data_count; ++i) { + ObLobChunkData data; + if (OB_FAIL(data.deserialize(buf, data_len, pos))) { + LOG_ERROR("deserialize failed", K(ret), K(pos), K(data_len), K(i)); + } else if (OB_FAIL(old_data_.push_back(data))) { + LOG_ERROR("deserialize failed", K(ret), K(pos), K(data_len), K(i)); + } + } + return ret; +} + +int ObLobPartialData::init() +{ + int ret = OB_SUCCESS; + if (OB_FAIL(search_map_.create(10, "LobPartial"))) { + LOG_WARN("map create fail", K(ret)); + } + return ret; +} + +int ObLobPartialData::push_chunk_index(const ObLobChunkIndex &chunk_index) +{ + int ret = OB_SUCCESS; + if (OB_FAIL(index_.push_back(chunk_index))) { + LOG_ERROR("push_back failed", K(ret)); + } else if (OB_FAIL(search_map_.set_refactored(chunk_index.offset_/chunk_size_, index_.count() - 1))) { + LOG_ERROR("set_refactored failed", K(ret), K(index_.count()), K(chunk_index)); + } + return ret; +} + +int ObLobPartialData::get_ori_data_length(int64_t &len) const +{ + ObLobLocatorV2 locator(locator_); + return locator.get_lob_data_byte_len(len); +} + +int ObLobPartialData::sort_index() +{ + int ret = OB_SUCCESS; + std::sort(index_.begin(), index_.end(), ObLobChunkIndexComparator()); + search_map_.reuse(); + for (int i = 0; i < index_.count(); ++i) { + const ObLobChunkIndex &chunk_index = index_[i]; + if (OB_FAIL(search_map_.set_refactored(chunk_index.offset_/chunk_size_, i))) { + LOG_ERROR("set_refactored failed", K(ret), K(index_.count()), K(chunk_index)); + } + } + return ret; +} + +bool ObLobPartialData::is_full_mode() +{ + return data_.count() == 1 && data_[0].data_.length() == data_length_; +} + +int64_t ObLobPartialData::get_modified_chunk_cnt() const +{ + int64_t chunk_cnt = 0; + for (int i = 0; i < index_.count(); ++i) { + const ObLobChunkIndex &chunk_index = index_[i]; + if (chunk_index.is_modified_) { + chunk_cnt++; + } else if (chunk_index.is_add_) { + // new add chunk contain all new append data, it may be more than one chunk + chunk_cnt += (chunk_index.byte_len_ + chunk_size_ - 1) / chunk_size_; + } + } + return chunk_cnt; +} + } } diff --git a/src/storage/lob/ob_lob_util.h b/src/storage/lob/ob_lob_util.h index 2440694777..5425c972a8 100644 --- a/src/storage/lob/ob_lob_util.h +++ b/src/storage/lob/ob_lob_util.h @@ -23,7 +23,8 @@ #include "share/schema/ob_table_param.h" #include "common/object/ob_object.h" #include "storage/lob/ob_lob_seq.h" - +#include "storage/lob/ob_ext_info_callback.h" +#include "lib/hash/ob_hashmap.h" namespace oceanbase { @@ -31,6 +32,12 @@ namespace oceanbase namespace storage { +class ObLobCharsetUtil +{ +public: + static ObCollationType get_collation_type(ObObjType type, ObCollationType ori_coll_type); +}; + struct ObLobStorageParam { ObLobStorageParam(): @@ -50,13 +57,14 @@ struct ObLobAccessParam { meta_table_schema_(nullptr), piece_table_schema_(nullptr), main_tablet_param_(nullptr), meta_tablet_param_(nullptr), piece_tablet_param_(nullptr), tenant_id_(MTL_ID()), src_tenant_id_(MTL_ID()), - ls_id_(), tablet_id_(), coll_type_(), lob_locator_(nullptr), - lob_common_(nullptr), lob_data_(nullptr), byte_size_(0), handle_size_(0), timeout_(0), + ls_id_(), tablet_id_(), lob_meta_tablet_id_(), lob_piece_tablet_id_(), + coll_type_(), lob_locator_(nullptr), lob_common_(nullptr), + lob_data_(nullptr), byte_size_(0), handle_size_(0), timeout_(0), fb_snapshot_(), scan_backward_(false), asscess_ptable_(false), offset_(0), len_(0), parent_seq_no_(), seq_no_st_(), used_seq_cnt_(0), total_seq_cnt_(0), checksum_(0), update_len_(0), op_type_(ObLobDataOutRowCtx::OpType::SQL), is_fill_zero_(false), from_rpc_(false), - inrow_read_nocopy_(false), inrow_threshold_(OB_DEFAULT_LOB_INROW_THRESHOLD), spec_lob_id_(), + inrow_read_nocopy_(false), inrow_threshold_(OB_DEFAULT_LOB_INROW_THRESHOLD), schema_chunk_size_(OB_DEFAULT_LOB_CHUNK_SIZE), spec_lob_id_(), remote_query_ctx_(nullptr) {} ~ObLobAccessParam() { @@ -71,12 +79,26 @@ public: bool is_full_insert() const { return op_type_ == ObLobDataOutRowCtx::OpType::SQL && 0 == offset_ && 0 == byte_size_; } int set_lob_locator(common::ObLobLocatorV2 *lob_locator); + + // chunk size can be changed online. + // that means lob data that has been writed may have different chunk size with schema + // so here need use different function to get chunk size + int64_t get_schema_chunk_size() const; + bool has_store_chunk_size() const; + int get_store_chunk_size(int64_t &chunk_size) const; + ObLobDataOutRowCtx* get_data_outrow_ctx() + { + return reinterpret_cast(lob_data_->buffer_); + } + int64_t get_inrow_threshold(); - TO_STRING_KV(K_(tenant_id), K_(src_tenant_id), K_(ls_id), K_(tablet_id), KPC_(lob_locator), KPC_(lob_common), - KPC_(lob_data), K_(byte_size), K_(handle_size), K_(coll_type), K_(scan_backward), K_(offset), K_(len), - K_(parent_seq_no), K_(seq_no_st), K_(used_seq_cnt), K_(total_seq_cnt), K_(checksum), + + TO_STRING_KV(K_(tenant_id), K_(src_tenant_id), K_(ls_id), K_(tablet_id), K_(lob_meta_tablet_id), K_(lob_piece_tablet_id), + KPC_(lob_locator), KPC_(lob_common), KPC_(lob_data), K_(byte_size), K_(handle_size), + K_(coll_type), K_(scan_backward), K_(offset), K_(len), K_(parent_seq_no), K_(seq_no_st), K_(used_seq_cnt), K_(total_seq_cnt), K_(checksum), K_(update_len), K_(op_type), K_(is_fill_zero), K_(from_rpc), K_(snapshot), K_(tx_id), K_(read_latest), - K_(inrow_read_nocopy), K_(inrow_threshold), K_(spec_lob_id)); + K_(inrow_read_nocopy), K_(schema_chunk_size), K_(inrow_threshold), K_(spec_lob_id)); + public: transaction::ObTxDesc *tx_desc_; // for write/update/delete transaction::ObTxReadSnapshot snapshot_; // for read @@ -98,6 +120,8 @@ public: uint64_t src_tenant_id_; share::ObLSID ls_id_; common::ObTabletID tablet_id_; + common::ObTabletID lob_meta_tablet_id_; + common::ObTabletID lob_piece_tablet_id_; common::ObCollationType coll_type_; common::ObLobLocatorV2 *lob_locator_; // should set by set_lob_locator common::ObLobCommon *lob_common_; // lob common @@ -123,6 +147,8 @@ public: bool from_rpc_; bool inrow_read_nocopy_; int64_t inrow_threshold_; + int64_t schema_chunk_size_; + ObObj ext_info_log_; ObLobId spec_lob_id_; // remote query ctx void *remote_query_ctx_; @@ -235,6 +261,7 @@ public: static int insert_lob_column(ObIAllocator &allocator, const share::ObLSID ls_id, const common::ObTabletID tablet_id, + const ObObjType &obj_type, const ObCollationType &cs_type, const ObLobStorageParam &lob_storage_param, blocksstable::ObStorageDatum &datum, @@ -244,6 +271,7 @@ public: static int insert_lob_column(ObIAllocator &allocator, const share::ObLSID ls_id, const common::ObTabletID tablet_id, + const ObObjType &obj_type, const ObCollationType &cs_type, const ObLobStorageParam &lob_storage_param, ObObj &obj, @@ -254,6 +282,7 @@ public: const share::ObLSID ls_id, const common::ObTabletID tablet_id, const ObLobId &lob_id, + const ObObjType &obj_type, const ObCollationType collation_type, const ObLobStorageParam &lob_storage_param, blocksstable::ObStorageDatum &datum, @@ -316,12 +345,93 @@ struct ObLobDiffHeader { return reinterpret_cast(data_ + persist_loc_size_); } + + bool is_mutli_diff() { return diff_cnt_ > 0; } TO_STRING_KV(K_(diff_cnt), K_(persist_loc_size)); uint32_t diff_cnt_; uint32_t persist_loc_size_; char data_[0]; }; + +class ObLobChunkIndex +{ + OB_UNIS_VERSION(1); +public: + ObLobChunkIndex() + : seq_id_(), offset_(0), pos_(0), byte_len_(0), flag_(0), data_idx_(0), old_data_idx_(-1) + {} + + ObLobChunkIndex(uint64_t offset_, const ObLobMetaInfo& meta_info) + : seq_id_(meta_info.seq_id_), offset_(offset_), byte_len_(meta_info.byte_len_), flag_(0), data_idx_(0), old_data_idx_(-1) + {} + + int init(const uint64_t offset, const ObLobMetaInfo& meta_info); + + TO_STRING_KV(K(offset_), K(is_add_), K(is_modified_), K(byte_len_), K(pos_), K(data_idx_), K(old_data_idx_), K(seq_id_)); + +public: + ObString seq_id_; + uint64_t offset_; + uint64_t pos_; + uint32_t byte_len_; + union { + struct { + uint32_t is_add_ : 1; + uint32_t is_modified_ : 1; + uint32_t reserved_ : 30; + }; + uint32_t flag_; + }; + uint32_t data_idx_; + int32_t old_data_idx_; +}; + +class ObLobChunkData +{ + OB_UNIS_VERSION(1); +public: + ObLobChunkData() + : data_() + {} + + explicit ObLobChunkData(const ObString &data) + : data_(data) + {} + + TO_STRING_KV(K(data_)); + +public: + ObString data_; +}; + +struct ObLobPartialData +{ + OB_UNIS_VERSION(1); +public: + ObLobPartialData(): chunk_size_(0), data_length_(0) {} + + int init(); + int push_chunk_index(const ObLobChunkIndex &chunk_index); + int get_ori_data_length(int64_t &len) const; + int sort_index(); + bool is_full_mode(); + // include new add chunk + int64_t get_modified_chunk_cnt() const; + +public: + TO_STRING_KV(K(chunk_size_), K(data_length_)); + int64_t chunk_size_; + // newest data length, include append data + int64_t data_length_; + ObString locator_; + hash::ObHashMap search_map_; + // must order by offset + ObSEArray index_; + ObSEArray data_; + ObSEArray old_data_; +}; + } // storage } // oceanbase diff --git a/src/storage/ls/ob_ls_tablet_service.cpp b/src/storage/ls/ob_ls_tablet_service.cpp index a9432fad99..59aadff39c 100644 --- a/src/storage/ls/ob_ls_tablet_service.cpp +++ b/src/storage/ls/ob_ls_tablet_service.cpp @@ -4163,7 +4163,7 @@ int ObLSTabletService::insert_lob_col( lob_param.sql_mode_ = run_ctx.dml_param_.sql_mode_; lob_param.ls_id_ = run_ctx.store_ctx_.ls_id_; lob_param.tablet_id_ = run_ctx.relative_table_.get_tablet_id(); - lob_param.coll_type_ = column.col_type_.get_collation_type(); + lob_param.coll_type_ = ObLobCharsetUtil::get_collation_type(column.col_type_.get_type(), column.col_type_.get_collation_type()); lob_param.allocator_ = &run_ctx.lob_allocator_; lob_param.lob_common_ = lob_common; if (OB_NOT_NULL(del_param)) { @@ -4502,7 +4502,7 @@ int ObLSTabletService::process_delta_lob( lob_param.sql_mode_ = run_ctx.dml_param_.sql_mode_; lob_param.ls_id_ = run_ctx.store_ctx_.ls_id_; lob_param.tablet_id_ = run_ctx.relative_table_.get_tablet_id(); - lob_param.coll_type_ = column.col_type_.get_collation_type(); + lob_param.coll_type_ = ObLobCharsetUtil::get_collation_type(column.col_type_.get_type(), column.col_type_.get_collation_type()); lob_param.allocator_ = &run_ctx.lob_allocator_; // should use old obj lob ObLobLocatorV2 old_lob; @@ -4529,12 +4529,40 @@ int ObLSTabletService::process_delta_lob( } else { // update obj with new disk locator obj.set_lob_value(obj.get_type(), lob_param.lob_common_, lob_param.handle_size_); + if (! lob_param.ext_info_log_.is_null() + && OB_FAIL(register_ext_info_commit_cb(run_ctx, obj, lob_param.ext_info_log_))) { + LOG_WARN("register_ext_info_commit_cb fail", K(ret), K(lob_param)); + } } } } return ret; } +int ObLSTabletService::register_ext_info_commit_cb( + ObDMLRunningCtx &run_ctx, + ObObj &col_data, + ObObj &ext_info_data) +{ + int ret = OB_SUCCESS; + memtable::ObMvccWriteGuard guard(false); + if (ext_info_data.is_null()) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("ext_info_log is null", K(ret), K(ext_info_data)); + } else if (OB_FAIL(guard.write_auth(run_ctx.store_ctx_))) { + LOG_WARN("write_auth fail", K(ret), K(run_ctx.store_ctx_)); + } else if (OB_FAIL(run_ctx.store_ctx_.mvcc_acc_ctx_.mem_ctx_->register_ext_info_commit_cb( + run_ctx.dml_param_.timeout_, + run_ctx.dml_flag_, + run_ctx.store_ctx_.mvcc_acc_ctx_.tx_desc_, + run_ctx.store_ctx_.mvcc_acc_ctx_.tx_scn_, + col_data, + ext_info_data))) { + LOG_WARN("register_ext_info_commit_cb fail", K(ret), K(run_ctx.store_ctx_), K(col_data), K(ext_info_data)); + } + return ret; +} + int ObLSTabletService::set_lob_storage_params( ObDMLRunningCtx &run_ctx, const ObColDesc &column, @@ -4551,6 +4579,7 @@ int ObLSTabletService::set_lob_storage_params( LOG_WARN("column_param is null", K(ret), K(table_param)); } else { lob_param.inrow_threshold_ = table_param->get_data_table().get_lob_inrow_threshold(); + lob_param.schema_chunk_size_ = column_param->get_lob_chunk_size(); } return ret; } @@ -5560,7 +5589,7 @@ int ObLSTabletService::delete_lob_col( lob_param.is_total_quantity_log_ = run_ctx.dml_param_.is_total_quantity_log_; lob_param.ls_id_ = run_ctx.store_ctx_.ls_id_; lob_param.tablet_id_ = run_ctx.relative_table_.get_tablet_id(); - lob_param.coll_type_ = column.col_type_.get_collation_type(); + lob_param.coll_type_ = ObLobCharsetUtil::get_collation_type(column.col_type_.get_type(), column.col_type_.get_collation_type()); lob_param.allocator_ = &run_ctx.lob_allocator_; lob_param.lob_common_ = lob_common; lob_param.handle_size_ = data.length(); diff --git a/src/storage/ls/ob_ls_tablet_service.h b/src/storage/ls/ob_ls_tablet_service.h index 518b02af29..04e55a470f 100644 --- a/src/storage/ls/ob_ls_tablet_service.h +++ b/src/storage/ls/ob_ls_tablet_service.h @@ -657,6 +657,10 @@ private: ObObj &old_obj, ObLobLocatorV2 &delta_lob, ObObj &obj); + static int register_ext_info_commit_cb( + ObDMLRunningCtx &run_ctx, + ObObj &col_data, + ObObj &ext_info_data); static int set_lob_storage_params( ObDMLRunningCtx &run_ctx, const ObColDesc &column, diff --git a/src/storage/memtable/mvcc/ob_mvcc_ctx.cpp b/src/storage/memtable/mvcc/ob_mvcc_ctx.cpp index 1a35bed77e..6102e85c7c 100644 --- a/src/storage/memtable/mvcc/ob_mvcc_ctx.cpp +++ b/src/storage/memtable/mvcc/ob_mvcc_ctx.cpp @@ -18,6 +18,7 @@ #include "storage/tx/ob_trans_part_ctx.h" #include "storage/memtable/ob_memtable_util.h" #include "storage/tablelock/ob_table_lock_callback.h" +#include "storage/lob/ob_ext_info_callback.h" #include "storage/ls/ob_freezer.h" namespace oceanbase { @@ -259,6 +260,23 @@ void ObIMvccCtx::check_row_callback_registration_between_stmt_() TRANS_LOG_RET(ERROR, OB_ERR_UNEXPECTED, "register commit not match expection", K(*trans_ctx)); } } + +int ObIMvccCtx::register_ext_info_commit_cb( + const int64_t timeout, + const blocksstable::ObDmlFlag dml_flag, + transaction::ObTxDesc *tx_desc, + transaction::ObTxSEQ &parent_seq_no, + ObObj &index_data, + ObObj &ext_info_data) +{ + int ret = OB_SUCCESS; + storage::ObExtInfoCbRegister cb_register; + if (OB_FAIL(cb_register.register_cb(this, timeout, dml_flag, tx_desc, parent_seq_no, index_data, ext_info_data))) { + TRANS_LOG(ERROR, "register ext info callback failed", K(ret), K(cb_register), K(*this)); + } + return ret; +} + } } diff --git a/src/storage/memtable/mvcc/ob_mvcc_ctx.h b/src/storage/memtable/mvcc/ob_mvcc_ctx.h index 0d5e5c5a03..4a61a82159 100644 --- a/src/storage/memtable/mvcc/ob_mvcc_ctx.h +++ b/src/storage/memtable/mvcc/ob_mvcc_ctx.h @@ -45,6 +45,7 @@ namespace storage { class ObLsmtTransNode; class ObFreezer; +class ObExtInfoCallback; } using namespace transaction::tablelock; @@ -78,6 +79,8 @@ public: // for mvcc engine invoke virtual void old_row_free(void *row) = 0; virtual void *alloc_mvcc_row_callback() = 0; virtual void free_mvcc_row_callback(ObITransCallback *cb) = 0; + virtual storage::ObExtInfoCallback *alloc_ext_info_callback() = 0; + virtual void free_ext_info_callback(ObITransCallback *cb) = 0; virtual common::ObIAllocator &get_query_allocator() = 0; virtual void set_conflict_trans_id(const uint32_t descriptor) { UNUSED(descriptor); } @@ -168,6 +171,13 @@ public: ObLockMemtable *memtable, ObMemCtxLockOpLinkNode *lock_op, const share::SCN scn); + int register_ext_info_commit_cb( + const int64_t timeout, + const blocksstable::ObDmlFlag dml_flag, + transaction::ObTxDesc *tx_desc, + transaction::ObTxSEQ &parent_seq_no, + ObObj &index_data, + ObObj &ext_info_data); public: virtual void reset() { diff --git a/src/storage/memtable/mvcc/ob_mvcc_trans_ctx.cpp b/src/storage/memtable/mvcc/ob_mvcc_trans_ctx.cpp index 87699e09ed..b35bb5da7d 100644 --- a/src/storage/memtable/mvcc/ob_mvcc_trans_ctx.cpp +++ b/src/storage/memtable/mvcc/ob_mvcc_trans_ctx.cpp @@ -234,6 +234,7 @@ void ObTransCallbackMgr::reset() callback_remove_for_remove_memtable_count_ = 0; callback_remove_for_fast_commit_count_ = 0; callback_remove_for_rollback_to_count_ = 0; + callback_ext_info_log_count_ = 0; pending_log_size_ = 0; flushed_log_size_ = 0; } @@ -2149,11 +2150,13 @@ void ObTransCallbackMgr::print_statistics(char *buf, const int64_t buf_len, int6 } common::databuff_printf(buf, buf_len, pos, "tx_end=%ld, rollback_to=%ld, " - "fast_commit=%ld, remove_memtable=%ld]", + "fast_commit=%ld, remove_memtable=%ld, " + "ext_info_log=%ld]", get_callback_remove_for_trans_end_count(), get_callback_remove_for_rollback_to_count(), get_callback_remove_for_fast_commit_count(), - get_callback_remove_for_remove_memtable_count()); + get_callback_remove_for_remove_memtable_count(), + get_callback_ext_info_log_count()); if (!need_merge_) { common::databuff_printf(buf, buf_len, pos, " detail:[(log_epoch,length,logged,synced,appended,removed,unlog_removed,branch_removed)|"); diff --git a/src/storage/memtable/mvcc/ob_mvcc_trans_ctx.h b/src/storage/memtable/mvcc/ob_mvcc_trans_ctx.h index e3b2660407..0b954b5064 100644 --- a/src/storage/memtable/mvcc/ob_mvcc_trans_ctx.h +++ b/src/storage/memtable/mvcc/ob_mvcc_trans_ctx.h @@ -216,6 +216,7 @@ public: callback_remove_for_remove_memtable_count_(0), callback_remove_for_fast_commit_count_(0), callback_remove_for_rollback_to_count_(0), + callback_ext_info_log_count_(0), pending_log_size_(0), flushed_log_size_(0), cb_allocator_(cb_allocator), @@ -307,6 +308,8 @@ public: { return callback_remove_for_fast_commit_count_; } int64_t get_callback_remove_for_rollback_to_count() const { return callback_remove_for_rollback_to_count_; } + int64_t get_callback_ext_info_log_count() const + { return callback_ext_info_log_count_; } void add_main_list_append_cnt(int64_t cnt = 1) { ATOMIC_AAF(&callback_main_list_append_count_, cnt); } void add_slave_list_append_cnt(int64_t cnt = 1) @@ -321,6 +324,8 @@ public: { ATOMIC_AAF(&callback_remove_for_fast_commit_count_, cnt); } void add_rollback_to_callback_remove_cnt(int64_t cnt = 1) { ATOMIC_AAF(&callback_remove_for_rollback_to_count_, cnt); } + void add_callback_ext_info_log_count(int64_t cnt = 1) + { ATOMIC_AAF(&callback_ext_info_log_count_, cnt); } int get_callback_list_count() const { return callback_lists_ ? (MAX_CALLBACK_LIST_COUNT + (need_merge_ ? 1 : 0)) : 1; } int get_logging_list_count() const; @@ -423,6 +428,7 @@ private: int64_t callback_remove_for_remove_memtable_count_; int64_t callback_remove_for_fast_commit_count_; int64_t callback_remove_for_rollback_to_count_; + int64_t callback_ext_info_log_count_; // current log size in leader participant int64_t pending_log_size_; // current flushed log size in leader participant diff --git a/src/storage/memtable/mvcc/ob_tx_callback_list.cpp b/src/storage/memtable/mvcc/ob_tx_callback_list.cpp index 83b2070d18..a6a4428254 100644 --- a/src/storage/memtable/mvcc/ob_tx_callback_list.cpp +++ b/src/storage/memtable/mvcc/ob_tx_callback_list.cpp @@ -260,6 +260,8 @@ int ObTxCallbackList::callback_(ObITxCallbackFunctor &functor, if (iter->is_need_free()) { if (iter->is_table_lock_callback()) { callback_mgr_.get_ctx().free_table_lock_callback(iter); + } else if (MutatorType::MUTATOR_ROW_EXT_INFO == iter->get_mutator_type()) { + callback_mgr_.get_ctx().free_ext_info_callback(iter); } else { callback_mgr_.get_ctx().free_mvcc_row_callback(iter); } diff --git a/src/storage/memtable/ob_memtable_context.cpp b/src/storage/memtable/ob_memtable_context.cpp index 4fba7b0daa..56adf110a4 100644 --- a/src/storage/memtable/ob_memtable_context.cpp +++ b/src/storage/memtable/ob_memtable_context.cpp @@ -363,6 +363,8 @@ void ObMemtableCtx::free_mvcc_row_callback(ObITransCallback *cb) TRANS_LOG_RET(ERROR, OB_ERR_UNEXPECTED, "cb is null, unexpected error", KP(cb), K(*this)); } else if (cb->is_table_lock_callback()) { TRANS_LOG_RET(ERROR, OB_ERR_UNEXPECTED, "try to free table lock callback", KPC(cb)); + } else if (MutatorType::MUTATOR_ROW_EXT_INFO == cb->get_mutator_type()) { + TRANS_LOG_RET(ERROR, OB_ERR_UNEXPECTED, "try to free ext info callback as mvcc row callback", KP(cb), K(*this)); } else { ATOMIC_INC(&callback_free_count_); TRANS_LOG(DEBUG, "callback release succ", KP(cb), K(*this), K(lbt())); @@ -371,6 +373,39 @@ void ObMemtableCtx::free_mvcc_row_callback(ObITransCallback *cb) } } +storage::ObExtInfoCallback *ObMemtableCtx::alloc_ext_info_callback() +{ + int ret = OB_SUCCESS; + void *cb_buffer = nullptr; + storage::ObExtInfoCallback *cb = nullptr; + if (nullptr == (cb_buffer = mem_ctx_obj_pool_.alloc())) { + ret = OB_ALLOCATE_MEMORY_FAILED; + TRANS_LOG(WARN, "alloc ObExtInfoCallback fail", K(ret)); + } else if (nullptr == (cb = new(cb_buffer) storage::ObExtInfoCallback())) { + ret = OB_ALLOCATE_MEMORY_FAILED; + TRANS_LOG(WARN, "construct ObExtInfoCallback object fail", K(ret), "cb_buffer", cb_buffer); + } else { + trans_mgr_.add_callback_ext_info_log_count(1); + } + return cb; +} + +void ObMemtableCtx::free_ext_info_callback(ObITransCallback *cb) +{ + if (OB_ISNULL(cb)) { + TRANS_LOG_RET(ERROR, OB_ERR_UNEXPECTED, "cb is null, unexpected error", KP(cb), K(*this)); + } else if (MutatorType::MUTATOR_ROW_EXT_INFO != cb->get_mutator_type()) { + TRANS_LOG_RET(ERROR, OB_ERR_UNEXPECTED, "cb is not ext info callback", "type", cb->get_mutator_type(), K(*this)); + } else { + ObExtInfoCallback *ext_cb = static_cast(cb); + ext_cb->~ObExtInfoCallback(); + mem_ctx_obj_pool_.free(cb); + TRANS_LOG(DEBUG, "callback release succ", KP(cb), K(*this), K(lbt())); + trans_mgr_.add_callback_ext_info_log_count(-1); + cb = NULL; + } +} + ObIAllocator &ObMemtableCtx::get_query_allocator() { return query_allocator_; diff --git a/src/storage/memtable/ob_memtable_context.h b/src/storage/memtable/ob_memtable_context.h index 9fdbdb804f..845674d9be 100644 --- a/src/storage/memtable/ob_memtable_context.h +++ b/src/storage/memtable/ob_memtable_context.h @@ -333,7 +333,6 @@ public: int init(const uint64_t tenant_id); virtual void *old_row_alloc(const int64_t size) override; virtual void old_row_free(void *row) override; - virtual common::ObIAllocator &get_query_allocator(); virtual void inc_lock_for_read_retry_count(); // When row lock conflict occurs in a remote execution, record the trans id in @@ -447,6 +446,8 @@ public: public: // callback virtual void *alloc_mvcc_row_callback() override; virtual void free_mvcc_row_callback(ObITransCallback *cb) override; + virtual storage::ObExtInfoCallback *alloc_ext_info_callback() override; + virtual void free_ext_info_callback(ObITransCallback *cb) override; void *alloc_lock_link_node() { return mem_ctx_obj_pool_.alloc(); } void free_lock_link_node(void *ptr) { mem_ctx_obj_pool_.free(ptr); } void *alloc_table_lock_callback() { return mem_ctx_obj_pool_.alloc(); } diff --git a/src/storage/memtable/ob_memtable_mutator.cpp b/src/storage/memtable/ob_memtable_mutator.cpp index 3bd881687a..2bce728688 100644 --- a/src/storage/memtable/ob_memtable_mutator.cpp +++ b/src/storage/memtable/ob_memtable_mutator.cpp @@ -269,6 +269,10 @@ const char *get_mutator_type_str(MutatorType mutator_type) type_str = "MUTATOR_TABLE_LOCK"; break; } + case MutatorType::MUTATOR_ROW_EXT_INFO: { + type_str = "MUTATOR_ROW_EXT_INFO"; + break; + } default: { type_str = "UNKNOWN_MUTATOR_TYPE"; break; @@ -1085,6 +1089,74 @@ int ObMutatorWriter::append_table_lock_kv( return ret; } +int ObMutatorWriter::append_ext_info_log_kv( + const int64_t table_version, + const RedoDataNode &redo, + const bool is_big_row) +{ + int ret = OB_SUCCESS; + uint64_t table_id = 0; + ObStoreRowkey rowkey; + const ObMemtableKey *mtk = &redo.key_; + bool is_with_head = true; + if (OB_ISNULL(mtk)) { + ret = OB_INVALID_ARGUMENT; + TRANS_LOG(WARN, "invalid_argument", K(ret), K(mtk)); + } else if (OB_FAIL(mtk->decode(rowkey))) { + TRANS_LOG(WARN, "mtk decode fail", "ret", ret); + } else if (OB_INVALID_ID == table_id || table_version < 0) { + ret = OB_INVALID_ARGUMENT; + } else { + // dummy encrypt param, ext info log is not encrypted + const transaction::ObTxEncryptMeta *encrypt_meta = nullptr; + transaction::ObCLogEncryptInfo encrypt_info; + MutatorType mutator_type = MutatorType::MUTATOR_ROW_EXT_INFO; + ObMutatorRowHeader row_header; + row_header.mutator_type_ = mutator_type; + row_header.tablet_id_ = redo.tablet_id_; + ObMemtableMutatorRow row(table_id, + rowkey, + table_version, + redo.new_row_, + redo.old_row_, + redo.dml_flag_, + redo.modify_count_, + redo.acc_checksum_, + redo.version_, + redo.flag_, + redo.seq_no_, + redo.column_cnt_); + int64_t tmp_pos = buf_.get_position(); + int64_t row_capacity = row_capacity_; + if (OB_ISNULL(buf_.get_data())) { + ret = OB_NOT_INIT; + TRANS_LOG(WARN, "not init", K(ret)); + } else if (OB_FAIL(row_header.serialize(buf_.get_data(), row_capacity, tmp_pos))) { + if (ret == OB_ALLOCATE_MEMORY_FAILED) { + //do nothing + } else { + ret = OB_BUF_NOT_ENOUGH; + } + } else if (OB_FAIL(row.serialize(buf_.get_data(), row_capacity, tmp_pos, + encrypt_meta, encrypt_info, is_big_row))) { + if (ret == OB_ALLOCATE_MEMORY_FAILED) { + //do nothing + } else { + ret = OB_BUF_NOT_ENOUGH; + } + } else if (OB_FAIL(meta_.inc_row_count())) { + TRANS_LOG(WARN, "meta inc_row_count failed", K(ret)); + } else { + buf_.get_position() = tmp_pos; + row_capacity_ = row_capacity; + } + } + if (OB_SUCCESS != ret && OB_BUF_NOT_ENOUGH != ret) { + TRANS_LOG(WARN, "append_kv fail", K(ret), K(buf_), K(meta_)); + } + return ret; +} + int ObMutatorWriter::serialize(const uint8_t row_flag, int64_t &res_len, transaction::ObCLogEncryptInfo &encrypt_info) { @@ -1276,6 +1348,20 @@ int ObMemtableMutatorIterator::iterate_next_row(ObEncryptRowBuf &decrypt_buf, } break; } + case MutatorType::MUTATOR_ROW_EXT_INFO: { + TRANS_LOG(DEBUG, "deserialize row ext info"); + row_.reset(); + const bool unused_need_extract_encrypt_meta = false; + ObCLogEncryptStatMap unused_encrypt_stat_map; + ObEncryptMeta encrypt_meta; + if (OB_FAIL(row_.deserialize( + buf_.get_data(), buf_.get_limit(), buf_.get_position(), decrypt_buf, + encrypt_info, unused_need_extract_encrypt_meta, encrypt_meta, + unused_encrypt_stat_map, ObTransRowFlag::is_big_row(meta_.get_flags())))) { + TRANS_LOG(WARN, "deserialize mutator row fail", K(ret)); + } + break; + } default: { ret = OB_ERR_UNEXPECTED; TRANS_LOG(WARN, "Unknown mutator_type", K(ret),K(row_header_.mutator_type_),K(buf_),K(meta_)); diff --git a/src/storage/memtable/ob_memtable_mutator.h b/src/storage/memtable/ob_memtable_mutator.h index 0f4b56dc63..e037276dc2 100644 --- a/src/storage/memtable/ob_memtable_mutator.h +++ b/src/storage/memtable/ob_memtable_mutator.h @@ -286,6 +286,12 @@ public: int append_table_lock_kv( const int64_t table_version, const TableLockRedoDataNode &redo); + + int append_ext_info_log_kv( + const int64_t table_version, + const RedoDataNode &redo, + const bool is_big_row); + int append_row_kv( const int64_t table_version, const RedoDataNode &redo, diff --git a/src/storage/memtable/ob_redo_log_generator.cpp b/src/storage/memtable/ob_redo_log_generator.cpp index 1caf1f2466..8992ee934f 100644 --- a/src/storage/memtable/ob_redo_log_generator.cpp +++ b/src/storage/memtable/ob_redo_log_generator.cpp @@ -114,6 +114,8 @@ public: ret = fill_row_redo_(iter, fake_fill); } else if (MutatorType::MUTATOR_TABLE_LOCK == iter->get_mutator_type()) { ret = fill_table_lock_redo_(iter, fake_fill); + } else if (MutatorType::MUTATOR_ROW_EXT_INFO == iter->get_mutator_type()) { + ret = fill_ext_info_redo_(iter, fake_fill); } else { ret = OB_ERR_UNEXPECTED; TRANS_LOG(ERROR, "mutator row type not expected.", K(ret)); @@ -195,6 +197,27 @@ private: TRANS_LOG(DEBUG, "fill table lock redo.", K(ret), K(*titer), K(redo.lock_id_), K(redo.lock_mode_)); return ret; } + + int fill_ext_info_redo_(ObITransCallback *callback, bool &fake_fill) + { + int ret = OB_SUCCESS; + RedoDataNode redo; + ObExtInfoCallback *ext_iter = (ObExtInfoCallback *)callback; + if (OB_FAIL(ext_iter->get_redo(redo))) { + if (OB_ITER_END != ret) { + TRANS_LOG(WARN, "get_redo fail", K(ret)); + } else { + ret = OB_SUCCESS; + } + } else if (OB_FAIL(mmw_.append_ext_info_log_kv(mem_ctx_->get_max_table_version(), + redo, false/*is_big_row*/))) { + if (OB_BUF_NOT_ENOUGH != ret) { + TRANS_LOG(WARN, "mutator writer append_kv fail", K(ret)); + } + } + return ret; + } + private: ObMemtableCtx *mem_ctx_; transaction::ObTxEncryptMeta *clog_encrypt_meta_; diff --git a/src/storage/ob_memtable_ctx_obj_pool.h b/src/storage/ob_memtable_ctx_obj_pool.h index a81a5e8409..0bd6a25b0e 100644 --- a/src/storage/ob_memtable_ctx_obj_pool.h +++ b/src/storage/ob_memtable_ctx_obj_pool.h @@ -15,6 +15,7 @@ #include #include "ob_arena_object_pool.h" +#include "storage/lob/ob_ext_info_callback.h" #include "storage/tablelock/ob_mem_ctx_table_lock.h" namespace oceanbase @@ -40,7 +41,10 @@ public: alloc_lock_op_cnt_(0) {} #else ObMemtableCtxObjPool(common::ObIAllocator &allocator) - : lock_op_node_pool_(allocator), lock_callback_pool_(allocator), mvcc_callback_pool_(allocator) {} + : lock_op_node_pool_(allocator), + lock_callback_pool_(allocator), + mvcc_callback_pool_(allocator), + ext_info_callback_pool_(allocator) {} #endif ObMemtableCtxObjPool() = delete; @@ -111,6 +115,13 @@ public: { return mvcc_callback_pool_.alloc(); } + + template <> + void *alloc() + { + return ext_info_callback_pool_.alloc(); + } + #endif template @@ -134,6 +145,12 @@ public: mvcc_callback_pool_.free(obj); } + template <> + void free(void *obj) + { + ext_info_callback_pool_.free(obj); + } + void reset() { lock_op_node_pool_.reset(); @@ -146,6 +163,7 @@ private: ObArenaObjPool lock_op_node_pool_; ObArenaObjPool lock_callback_pool_; ObArenaObjPool mvcc_callback_pool_; + ObArenaObjPool ext_info_callback_pool_; #ifdef OB_ENABLE_MEMTABLE_CTX_OBJ_CACHE_DEBUG int64_t hit_mvcc_cb_cache_cnt_; diff --git a/src/storage/tx/ob_tx_log.cpp b/src/storage/tx/ob_tx_log.cpp index b03a360a90..b822f0cf45 100644 --- a/src/storage/tx/ob_tx_log.cpp +++ b/src/storage/tx/ob_tx_log.cpp @@ -746,6 +746,16 @@ int ObTxRedoLog::ob_admin_dump(memtable::ObMemtableMutatorIterator *iter_ptr, arg.writer_ptr_->dump_string(to_cstring(iter_ptr->get_table_lock_row())); break; } + case memtable::MutatorType::MUTATOR_ROW_EXT_INFO: { + arg.writer_ptr_->dump_key("ExtInfo"); + arg.writer_ptr_->start_object(); + arg.log_stat_->ext_info_log_count_++; + if (OB_FAIL(format_mutator_row_(iter_ptr->get_mutator_row(), arg))) { + TRANS_LOG(WARN, "format ext info mutator row failed", K(ret)); + } + arg.writer_ptr_->end_object(); + break; + } default: { arg.writer_ptr_->dump_key("ERROR:unknown mutator type"); const int64_t mutator_type = static_cast(iter_ptr->get_row_head().mutator_type_); diff --git a/src/storage/tx/ob_tx_replay_executor.cpp b/src/storage/tx/ob_tx_replay_executor.cpp index ef1b6632c3..966fbc2d07 100644 --- a/src/storage/tx/ob_tx_replay_executor.cpp +++ b/src/storage/tx/ob_tx_replay_executor.cpp @@ -618,6 +618,12 @@ int ObTxReplayExecutor::replay_redo_in_memtable_(ObTxRedoLog &redo, const bool s } } else if (FALSE_IT(row_head = mmi_ptr_->get_row_head())) { // do nothing + } else if (MutatorType::MUTATOR_ROW_EXT_INFO == row_head.mutator_type_) { + // ext info redo log is only used for obcdc, no need replay + if (EXECUTE_COUNT_PER_SEC(8)) { + TRANS_LOG(INFO, "ext info redo log no need replay", K(row_head), K(redo)); + } + TRANS_LOG(DEBUG, "ext info redo log no need replay", K(row_head), K(redo)); } else if (OB_FAIL(replay_one_row_in_memtable_(row_head, mmi_ptr_))) { if (OB_MINOR_FREEZE_NOT_ALLOW == ret) { if (TC_REACH_TIME_INTERVAL(1000 * 1000)) { @@ -742,6 +748,10 @@ int ObTxReplayExecutor::replay_one_row_in_memtable_(ObMutatorRowHeader &row_head } break; } + case MutatorType::MUTATOR_ROW_EXT_INFO: { + TRANS_LOG(DEBUG, "[Replay Tx] ignore replay row ext info", K(row_head)); + break; + } default: { ret = OB_ERR_UNEXPECTED; TRANS_LOG(ERROR, "[Replay Tx] Unknown mutator_type", K(row_head.mutator_type_)); diff --git a/unittest/share/CMakeLists.txt b/unittest/share/CMakeLists.txt index af1b048fa2..c51946fd72 100644 --- a/unittest/share/CMakeLists.txt +++ b/unittest/share/CMakeLists.txt @@ -73,11 +73,13 @@ ob_unittest(test_throttling_utils) ob_unittest(test_json_base) ob_unittest(test_json_bin) ob_unittest(test_json_path) +ob_unittest(test_json_schema) ob_unittest(test_json_tree) if(OB_BUILD_CLOSE_MODULES) ob_unittest(test_xml_bin) ob_unittest(test_xml_parser) + ob_unittest(test_binary_aggregate) ob_unittest(test_xml_tree) ob_unittest(test_xml_tree_base) ob_unittest(test_xpath) diff --git a/unittest/share/test_binary_aggregate.cpp b/unittest/share/test_binary_aggregate.cpp new file mode 100644 index 0000000000..4b767ed5a0 --- /dev/null +++ b/unittest/share/test_binary_aggregate.cpp @@ -0,0 +1,683 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + * This file contains interface support for the binary aggregate test. + */ + +#include + +#define private public +#include "lib/json_type/ob_json_tree.h" +#include "lib/json_type/ob_json_bin.h" +#include "lib/json_type/ob_json_parse.h" +#include "lib/xml/ob_multi_mode_bin.h" +#include "lib/xml/ob_xml_bin.h" +#include "lib/xml/ob_tree_base.h" +#include "lib/xml/ob_mul_mode_reader.h" +#include "lib/xml/ob_xml_tree.h" +#include "lib/timezone/ob_timezone_info.h" +#include "lib/xml/ob_xml_parser.h" +#include "lib/xml/ob_xml_util.h" +#include "lib/xml/ob_binary_aggregate.h" +#undef private + +#include +#include +using namespace std; + +namespace oceanbase { +namespace common{ + +class TestBinaryAgg : public ::testing::Test { +public: + TestBinaryAgg() + {} + ~TestBinaryAgg() + {} + virtual void SetUp() + {} + virtual void TearDown() + {} + + static void SetUpTestCase() + {} + + static void TearDownTestCase() + {} + +private: + // disallow copy + DISALLOW_COPY_AND_ASSIGN(TestBinaryAgg); +}; + +static void get_xml_document_1(ObMulModeMemCtx* ctx, ObXmlDocument*& handle) +{ + int ret = 0; + common::ObString xml_text( + "\n" + " John Smith\n" + " \n" + " Ola Nordmann\n" + "
Langgt 23
\n" + " 4000 Stavanger\n" + " Norway\n" + "
\n" + "
\n" + ); + + ObXmlDocument* doc = nullptr; + ret = ObXmlParserUtils::parse_document_text(ctx, xml_text, doc); + ASSERT_EQ(OB_SUCCESS, ret); + handle = doc; +} + +static void get_xml_document_2(ObMulModeMemCtx* ctx, ObXmlDocument*& handle) +{ + int ret = 0; + common::ObString xml_text( + "\n" + " John Smith\n" + "\n" + ); + + ObXmlDocument* doc = nullptr; + ret = ObXmlParserUtils::parse_document_text(ctx, xml_text, doc); + ASSERT_EQ(OB_SUCCESS, ret); + handle = doc; +} + +#define PREDICATE_TEST_COUNT 20 +ObString xml_text_array[PREDICATE_TEST_COUNT] = { + // 0 + "\n" + " John Smith\n" + "\n", + + // 1 + "\n" + " carrot1 Smith\n" + " carrot2 Smith\n" + "\n", + + // 2 + "Everyday Italian", + + // 3 + "29.99", + + // 4 + "", + + // 5 + "200530.00", + + // 6 + "abcd", + + // 7 + "a tree_array; + + get_xml_document_1(ctx, doc1); + ASSERT_EQ(OB_SUCCESS, ret); + tree_array.push_back(doc1); + + get_xml_document_2(ctx, doc2); + ASSERT_EQ(OB_SUCCESS, ret); + tree_array.push_back(doc2); + + ObBinAggSerializer bin_agg(ctx->allocator_, ObBinAggType::AGG_XML, static_cast(M_CONTENT)); + + { + for (int i = 0; i < tree_array.count(); i++) { + ObXmlBin bin(ctx); + ObIMulModeBase* tree = tree_array.at(i); + ASSERT_EQ(bin.parse_tree(tree), OB_SUCCESS); + + ASSERT_EQ(bin_agg.append_key_and_value(&bin), OB_SUCCESS); + } + + ASSERT_EQ(bin_agg.serialize(), OB_SUCCESS); + + ObIMulModeBase *base; + ObStringBuffer *buffer = bin_agg.get_buffer(); + ASSERT_EQ(ObMulModeFactory::get_xml_base(ctx, buffer->string(), ObNodeMemType::BINARY_TYPE, ObNodeMemType::TREE_TYPE, base), OB_SUCCESS); + + ObStringBuffer str_buf(ctx->allocator_); + base->print_document(str_buf, CS_TYPE_UTF8MB4_GENERAL_CI, 0, 0); + ASSERT_EQ(str_buf.string(), "John SmithOla Nordmann
Langgt 23
4000 StavangerNorway
John Smith"); + cout << str_buf.ptr() << endl; + } + +} + +#define TEST_ROW_COUNT 10000 + +static void get_multi_node_document(ObMulModeMemCtx* ctx, ObXmlDocument*& handle) +{ + int ret = 0; + // common::ObString xml_text( + // "book0Everydaycarrot202399.99 book1Everydaycarrot202399.99 book2Everydaycarrot202399.99 book3Everydaycarrot202399.99 book4Everydaycarrot202399.99 book5Everydaycarrot202399.99 book6Everydaycarrot202399.99 book7Everydaycarrot202399.99 book8Everydaycarrot202399.99 book9Everyday" + // ); + common::ObString xml_text( + "tagi_value" + ); + ObXmlDocument* doc = nullptr; + ret = ObXmlParserUtils::parse_document_text(ctx, xml_text, doc); + ASSERT_EQ(OB_SUCCESS, ret); + handle = doc; +} + +TEST_F(TestBinaryAgg, xmlagg_performance_test) +{ + int ret = 0; + ObArenaAllocator allocator(ObModIds::TEST); + ObXmlDocument* doc = nullptr; + ObMulModeMemCtx* ctx = nullptr; + ASSERT_EQ(ObXmlUtil::create_mulmode_tree_context(&allocator, ctx), OB_SUCCESS); + + get_multi_node_document(ctx, doc); + ASSERT_EQ(OB_SUCCESS, ret); + ObXmlBin bin(ctx); + ASSERT_EQ(bin.parse_tree(doc), OB_SUCCESS); + + std::chrono::milliseconds ms_start = std::chrono::duration_cast< std::chrono::milliseconds >( + std::chrono::system_clock::now().time_since_epoch()); + + int start_time = ms_start.count(); + cout << start_time << endl; + + ObBinAggSerializer bin_agg(ctx->allocator_, ObBinAggType::AGG_XML, static_cast(M_CONTENT)); + + for (int i = 0; i < TEST_ROW_COUNT; i++) { + ASSERT_EQ(bin_agg.append_key_and_value(&bin), OB_SUCCESS); + } + + ASSERT_EQ(bin_agg.serialize(), OB_SUCCESS); + + std::chrono::milliseconds ms_end = std::chrono::duration_cast< std::chrono::milliseconds >( + std::chrono::system_clock::now().time_since_epoch()); + + int end_time = ms_end.count(); + cout << end_time << endl; + cout << "total used: " << end_time - start_time << endl; + +} + +TEST_F(TestBinaryAgg, xmlagg_tree_test) +{ + int ret = 0; + ObArenaAllocator allocator(ObModIds::TEST); + ObXmlDocument* doc = nullptr; + ObMulModeMemCtx* ctx = nullptr; + ASSERT_EQ(ObXmlUtil::create_mulmode_tree_context(&allocator, ctx), OB_SUCCESS); + + get_multi_node_document(ctx, doc); + ASSERT_EQ(OB_SUCCESS, ret); + + ObXmlBin bin(ctx); + ASSERT_EQ(bin.parse_tree(doc), OB_SUCCESS); + ObString input_str; + ASSERT_EQ(OB_SUCCESS, doc->get_raw_binary(input_str, ctx->allocator_)); + + std::chrono::milliseconds ms_start = std::chrono::duration_cast< std::chrono::milliseconds >( + std::chrono::system_clock::now().time_since_epoch()); + + int start_time = ms_start.count(); + cout << start_time << endl; + + ObXmlDocument *content = NULL; + content = OB_NEWx(ObXmlDocument, ctx->allocator_, ObMulModeNodeType::M_CONTENT, ctx); + ASSERT_EQ(OB_SUCCESS, content->alter_member_sort_policy(false)); + + for (int i = 0; i < TEST_ROW_COUNT; i++) { + ObIMulModeBase *base; + ASSERT_EQ(OB_SUCCESS, ObMulModeFactory::get_xml_base(ctx, input_str, ObNodeMemType::BINARY_TYPE, ObNodeMemType::TREE_TYPE, base)); + ObXmlDocument *input_doc = static_cast(base); + ASSERT_EQ(OB_SUCCESS, content->add_element(input_doc->at(0))); + } + + ObString binary_str; + ASSERT_EQ(OB_SUCCESS, content->get_raw_binary(binary_str, ctx->allocator_)); + + + std::chrono::milliseconds ms_end = std::chrono::duration_cast< std::chrono::milliseconds >( + std::chrono::system_clock::now().time_since_epoch()); + + int end_time = ms_end.count(); + cout << end_time << endl; + cout << "total used: " << end_time - start_time << endl; + +} + +TEST_F(TestBinaryAgg, mulit_bin) +{ + int ret = 0; + + ObArenaAllocator allocator(ObModIds::TEST); + ObMulModeMemCtx* ctx = nullptr; + ASSERT_EQ(ObXmlUtil::create_mulmode_tree_context(&allocator, ctx), OB_SUCCESS); + + ObArray tree_array; + for (int i = 0; i < 5; i++) { + ObXmlDocument* doc = nullptr; + get_xml_document(ctx, doc, i); + ASSERT_EQ(OB_SUCCESS, ret); + tree_array.push_back(doc); + } + + { + cout << "TEST CASE: 1" << endl; + ObBinAggSerializer bin_agg(ctx->allocator_, ObBinAggType::AGG_XML, static_cast(M_CONTENT), true); + + for (int i = 0; i < 5; i++) { + ObXmlBin bin(ctx); + ObIMulModeBase* tree = tree_array.at(i); + cout << "Add XML: " << i << endl; + ASSERT_EQ(bin.parse_tree(tree), OB_SUCCESS); + + ASSERT_EQ(bin_agg.append_key_and_value(&bin), OB_SUCCESS); + } + + ASSERT_EQ(bin_agg.serialize(), OB_SUCCESS); + + ObIMulModeBase *base; + ObStringBuffer *buffer = bin_agg.get_buffer(); + + ASSERT_EQ(ObMulModeFactory::get_xml_base(ctx, buffer->string(), ObNodeMemType::BINARY_TYPE, ObNodeMemType::TREE_TYPE, base), OB_SUCCESS); + + ObStringBuffer str_buf(ctx->allocator_); + ParamPrint param_list; + base->print_content(str_buf, false, false, 0, param_list); + ASSERT_EQ(str_buf.string(), "John Smithcarrot1 Smithcarrot2 SmithEveryday Italian29.99"); + cout << str_buf.ptr() << endl; + } + { + cout << "TEST CASE: 2" << endl; + ObBinAggSerializer bin_agg(ctx->allocator_, ObBinAggType::AGG_XML, static_cast(M_CONTENT), true); + + int doc_array[5] = {0, 2, 4, 5, 6}; + for (int i = 0; i < 5; i++) { + cout << "Add XML: " << i << endl; + ObIMulModeBase *input_base; + ASSERT_EQ(ObMulModeFactory::get_xml_base(ctx, xml_text_array[doc_array[i]], ObNodeMemType::TREE_TYPE, ObNodeMemType::BINARY_TYPE, input_base, xml_test_type[doc_array[i]]), OB_SUCCESS); + + ObXmlBin *bin = static_cast(input_base); + ASSERT_EQ(bin_agg.append_key_and_value(bin), OB_SUCCESS); + } + + ASSERT_EQ(bin_agg.serialize(), OB_SUCCESS); + + ObIMulModeBase *base; + ObStringBuffer *buffer = bin_agg.get_buffer(); + + ASSERT_EQ(ObMulModeFactory::get_xml_base(ctx, buffer->string(), ObNodeMemType::BINARY_TYPE, ObNodeMemType::TREE_TYPE, base), OB_SUCCESS); + + ObStringBuffer str_buf(ctx->allocator_); + ParamPrint param_list; + base->print_content(str_buf, false, false, 0, param_list); + ASSERT_EQ(str_buf.string(), "John SmithEveryday Italian200530.00abcd"); + cout << str_buf.ptr() << endl; + } + + { + cout << "TEST CASE: 3" << endl; + ObBinAggSerializer bin_agg(ctx->allocator_, ObBinAggType::AGG_XML, static_cast(M_CONTENT), true); + + int doc_array[2] = {7, 8}; + for (int i = 0; i < 2; i++) { + cout << "Add XML: " << i << endl; + ObIMulModeBase *input_base; + ASSERT_EQ(ObMulModeFactory::get_xml_base(ctx, xml_text_array[doc_array[i]], ObNodeMemType::TREE_TYPE, ObNodeMemType::BINARY_TYPE, input_base, xml_test_type[doc_array[i]]), OB_SUCCESS); + + ObXmlBin *bin = static_cast(input_base); + ASSERT_EQ(bin_agg.append_key_and_value(bin), OB_SUCCESS); + } + + ASSERT_EQ(bin_agg.serialize(), OB_SUCCESS); + + ObIMulModeBase *base; + ObStringBuffer *buffer = bin_agg.get_buffer(); + + ASSERT_EQ(ObMulModeFactory::get_xml_base(ctx, buffer->string(), ObNodeMemType::BINARY_TYPE, ObNodeMemType::TREE_TYPE, base), OB_SUCCESS); + + ObStringBuffer str_buf(ctx->allocator_); + ParamPrint param_list; + base->print_content(str_buf, false, false, 0, param_list); + ASSERT_EQ(str_buf.string(), "aallocator_, ObBinAggType::AGG_XML, static_cast(M_CONTENT), true); + int doc_array[5] = {7, 6, 8, 9, 5}; + for (int i = 0; i < 5; i++) { + cout << "Add XML: " << i << endl; + ObIMulModeBase *input_base; + ASSERT_EQ(ObMulModeFactory::get_xml_base(ctx, xml_text_array[doc_array[i]], ObNodeMemType::TREE_TYPE, ObNodeMemType::BINARY_TYPE, input_base, xml_test_type[doc_array[i]]), OB_SUCCESS); + + ObXmlBin *bin = static_cast(input_base); + ASSERT_EQ(bin_agg.append_key_and_value(bin), OB_SUCCESS); + } + + ASSERT_EQ(bin_agg.serialize(), OB_SUCCESS); + + ObIMulModeBase *base; + ObStringBuffer *buffer = bin_agg.get_buffer(); + + ASSERT_EQ(ObMulModeFactory::get_xml_base(ctx, buffer->string(), ObNodeMemType::BINARY_TYPE, ObNodeMemType::TREE_TYPE, base), OB_SUCCESS); + + ObStringBuffer str_buf(ctx->allocator_); + ParamPrint param_list; + base->print_content(str_buf, false, false, 0, param_list); + ASSERT_EQ(str_buf.string(), "a
abcd200530.00"); + cout << str_buf.ptr() << endl; + } + + { + cout << "TEST CASE: 5" << endl; + ObBinAggSerializer bin_agg(ctx->allocator_, ObBinAggType::AGG_XML, static_cast(M_CONTENT), true); + int doc_array[12] = {0, 7, 8, 1, 2, 3, 9, 4, 5, 6, 10, 11}; + for (int i = 0; i < 12; i++) { + cout << "Add XML: " << i << endl; + ObIMulModeBase *input_base; + ASSERT_EQ(ObMulModeFactory::get_xml_base(ctx, xml_text_array[doc_array[i]], ObNodeMemType::TREE_TYPE, ObNodeMemType::BINARY_TYPE, input_base, xml_test_type[doc_array[i]]), OB_SUCCESS); + + ObXmlBin *bin = static_cast(input_base); + ASSERT_EQ(bin_agg.append_key_and_value(bin), OB_SUCCESS); + } + + ASSERT_EQ(bin_agg.serialize(), OB_SUCCESS); + + ObIMulModeBase *base; + ObStringBuffer *buffer = bin_agg.get_buffer(); + + ASSERT_EQ(ObMulModeFactory::get_xml_base(ctx, buffer->string(), ObNodeMemType::BINARY_TYPE, ObNodeMemType::TREE_TYPE, base), OB_SUCCESS); + + ObStringBuffer str_buf(ctx->allocator_); + ParamPrint param_list; + base->print_content(str_buf, false, false, 0, param_list); + ASSERT_EQ(str_buf.string(), "John Smithacarrot1 Smithcarrot2 SmithEveryday Italian29.99
200530.00abcdxyzcarrot"); + cout << str_buf.ptr() << endl; + } +} + +ObString json_text_array[PREDICATE_TEST_COUNT] = { + "{\"a\":100, \"b\":200, \"c\":300}", // 0 + "{ \"item\" : \"canvas\"}", // 1 + "{\"c\":2, \"a\":0, \"b\":1}", // 2 + "\"not object\"", // 3 + "\"123\"", // 4 + "333", // 5 + "[\"book\", \"read\"]", // 6 +}; + +ObString json_key_array[PREDICATE_TEST_COUNT] = { + "abcd", // 0 + "efg", // 1 + "carrot", // 2 + "newkey", // 3 + "xyz", // 4 + "yxz", // 5 + "", // 6 +}; + +TEST_F(TestBinaryAgg, json_agg) +{ + int ret = 0; + + ObArenaAllocator allocator(ObModIds::TEST); + ObMulModeMemCtx* ctx = nullptr; + ASSERT_EQ(ObXmlUtil::create_mulmode_tree_context(&allocator, ctx), OB_SUCCESS); + { + cout << "TEST JSON CASE: 1" << endl; + ObBinAggSerializer bin_agg(&allocator, ObBinAggType::AGG_JSON, static_cast(ObJsonNodeType::J_OBJECT)); + ObStringBuffer value(&allocator); + int json_array[2] = {0, 1}; + for (int i = 0; i < 2; i++) { + cout << "Add JSON: " << i << endl; + ObIJsonBase *input_base = NULL; + ASSERT_EQ(ObJsonBaseFactory::get_json_base(&allocator, json_text_array[json_array[i]], ObJsonInType::JSON_TREE, ObJsonInType::JSON_BIN, input_base), OB_SUCCESS); + ObJsonBin *bin = static_cast(input_base); + ASSERT_EQ(bin_agg.append_key_and_value(json_key_array[json_array[i]], value, bin), OB_SUCCESS); + } + + ASSERT_EQ(bin_agg.serialize(), OB_SUCCESS); + + ObIJsonBase *base = NULL; + ObStringBuffer *buffer = bin_agg.get_buffer(); + + ASSERT_EQ(ObJsonBaseFactory::get_json_base(&allocator, buffer->string(), ObJsonInType::JSON_BIN, ObJsonInType::JSON_BIN, base), OB_SUCCESS); + + ObJsonBuffer buf(&allocator); + ASSERT_EQ(base->print(buf, true), OB_SUCCESS); + cout << buf.ptr() << endl; + ASSERT_EQ(buf.string(), "{\"abcd\": {\"a\": 100, \"b\": 200, \"c\": 300}, \"efg\": {\"item\": \"canvas\"}}"); + } + + { + cout << "TEST JSON CASE: 2" << endl; + ObBinAggSerializer bin_agg(&allocator, ObBinAggType::AGG_JSON, static_cast(ObJsonNodeType::J_OBJECT)); + int json_array[3] = {1, 2, 0}; + ObStringBuffer value(&allocator); + for (int i = 0; i < 3; i++) { + cout << "Add JSON: " << i << endl; + ObIJsonBase *input_base = NULL; + ASSERT_EQ(ObJsonBaseFactory::get_json_base(&allocator, json_text_array[json_array[i]], ObJsonInType::JSON_TREE, ObJsonInType::JSON_BIN, input_base), OB_SUCCESS); + ObJsonBin *bin = static_cast(input_base); + ASSERT_EQ(bin_agg.append_key_and_value(json_key_array[json_array[i]], value, bin), OB_SUCCESS); + } + + ASSERT_EQ(bin_agg.serialize(), OB_SUCCESS); + + ObIJsonBase *base = NULL; + ObStringBuffer *buffer = bin_agg.get_buffer(); + + ASSERT_EQ(ObJsonBaseFactory::get_json_base(&allocator, buffer->string(), ObJsonInType::JSON_BIN, ObJsonInType::JSON_BIN, base), OB_SUCCESS); + + ObJsonBuffer buf(&allocator); + ASSERT_EQ(base->print(buf, true), OB_SUCCESS); + cout << buf.ptr() << endl; + ASSERT_EQ(buf.string(), "{\"efg\": {\"item\": \"canvas\"}, \"carrot\": {\"a\": 0, \"b\": 1, \"c\": 2}, \"abcd\": {\"a\": 100, \"b\": 200, \"c\": 300}}"); + } + + { + cout << "TEST JSON CASE: 3" << endl; + ObBinAggSerializer bin_agg(&allocator, ObBinAggType::AGG_JSON, static_cast(ObJsonNodeType::J_OBJECT)); + int json_array[5] = {0, 1, 2, 3, 4}; + ObStringBuffer value(&allocator); + for (int i = 0; i < 5; i++) { + cout << "Add JSON: " << i << endl; + ObIJsonBase *input_base = NULL; + ASSERT_EQ(ObJsonBaseFactory::get_json_base(&allocator, json_text_array[json_array[i]], ObJsonInType::JSON_TREE, ObJsonInType::JSON_BIN, input_base), OB_SUCCESS); + ObJsonBin *bin = static_cast(input_base); + ASSERT_EQ(bin_agg.append_key_and_value(json_key_array[json_array[i]], value, bin), OB_SUCCESS); + } + + ASSERT_EQ(bin_agg.serialize(), OB_SUCCESS); + + ObIJsonBase *base = NULL; + ObStringBuffer *buffer = bin_agg.get_buffer(); + + ASSERT_EQ(ObJsonBaseFactory::get_json_base(&allocator, buffer->string(), ObJsonInType::JSON_BIN, ObJsonInType::JSON_BIN, base), OB_SUCCESS); + + ObJsonBuffer buf(&allocator); + ASSERT_EQ(base->print(buf, true), OB_SUCCESS); + cout << buf.ptr() << endl; + ASSERT_EQ(buf.string(), "{\"abcd\": {\"a\": 100, \"b\": 200, \"c\": 300}, \"efg\": {\"item\": \"canvas\"}, \"carrot\": {\"a\": 0, \"b\": 1, \"c\": 2}, \"newkey\": \"not object\", \"xyz\": \"123\"}"); + } + + { + cout << "TEST JSON CASE: 4" << endl; + ObBinAggSerializer bin_agg(&allocator, ObBinAggType::AGG_JSON, static_cast(ObJsonNodeType::J_OBJECT)); + int json_array[3] = {3, 5, 4}; + ObStringBuffer value(&allocator); + for (int i = 0; i < 3; i++) { + cout << "Add JSON: " << i << endl; + ObIJsonBase *input_base = NULL; + ASSERT_EQ(ObJsonBaseFactory::get_json_base(&allocator, json_text_array[json_array[i]], ObJsonInType::JSON_TREE, ObJsonInType::JSON_BIN, input_base), OB_SUCCESS); + ObJsonBin *bin = static_cast(input_base); + ASSERT_EQ(bin_agg.append_key_and_value(json_key_array[json_array[i]], value, bin), OB_SUCCESS); + } + + ASSERT_EQ(bin_agg.serialize(), OB_SUCCESS); + + ObIJsonBase *base = NULL; + ObStringBuffer *buffer = bin_agg.get_buffer(); + + ASSERT_EQ(ObJsonBaseFactory::get_json_base(&allocator, buffer->string(), ObJsonInType::JSON_BIN, ObJsonInType::JSON_BIN, base), OB_SUCCESS); + + ObJsonBuffer buf(&allocator); + ASSERT_EQ(base->print(buf, true), OB_SUCCESS); + cout << buf.ptr() << endl; + ASSERT_EQ(buf.string(), "{\"newkey\": \"not object\", \"yxz\": 333, \"xyz\": \"123\"}"); + } + + { + cout << "TEST JSON CASE: 5" << endl; + ObBinAggSerializer bin_agg(&allocator, ObBinAggType::AGG_JSON, static_cast(ObJsonNodeType::J_OBJECT)); + int json_array[5] = {0, 1, 4, 5, 6}; + ObStringBuffer value(&allocator); + for (int i = 0; i < 5; i++) { + cout << "Add JSON: " << i << endl; + ObIJsonBase *input_base = NULL; + ASSERT_EQ(ObJsonBaseFactory::get_json_base(&allocator, json_text_array[json_array[i]], ObJsonInType::JSON_TREE, ObJsonInType::JSON_BIN, input_base), OB_SUCCESS); + ObJsonBin *bin = static_cast(input_base); + ASSERT_EQ(bin_agg.append_key_and_value(json_key_array[json_array[i]], value, bin), OB_SUCCESS); + } + + ASSERT_EQ(bin_agg.serialize(), OB_SUCCESS); + + ObIJsonBase *base = NULL; + ObStringBuffer *buffer = bin_agg.get_buffer(); + + ASSERT_EQ(ObJsonBaseFactory::get_json_base(&allocator, buffer->string(), ObJsonInType::JSON_BIN, ObJsonInType::JSON_BIN, base), OB_SUCCESS); + + ObJsonBuffer buf(&allocator); + ASSERT_EQ(base->print(buf, true), OB_SUCCESS); + cout << buf.ptr() << endl; + ASSERT_EQ(buf.string(), "{\"abcd\": {\"a\": 100, \"b\": 200, \"c\": 300}, \"efg\": {\"item\": \"canvas\"}, \"xyz\": \"123\", \"yxz\": 333, \"\": [\"book\", \"read\"]}"); + } + + // json_query with wrapper + { + cout << "TEST JSON CASE: 6" << endl; + ObBinAggSerializer bin_agg(&allocator, ObBinAggType::AGG_JSON, static_cast(ObJsonNodeType::J_ARRAY)); + int json_array[8] = {0, 0, 0, 6}; + ObStringBuffer value(&allocator); + ObString input_null; + for (int i = 0; i < 4; i++) { + cout << "Add JSON: " << i << endl; + ObIJsonBase *input_base = NULL; + ASSERT_EQ(ObJsonBaseFactory::get_json_base(&allocator, json_text_array[json_array[i]], ObJsonInType::JSON_TREE, ObJsonInType::JSON_BIN, input_base), OB_SUCCESS); + ObJsonBin *bin = static_cast(input_base); + ASSERT_EQ(bin_agg.append_key_and_value(input_null, value, bin), OB_SUCCESS); + } + + ASSERT_EQ(bin_agg.serialize(), OB_SUCCESS); + + ObIJsonBase *base = NULL; + ObStringBuffer *buffer = bin_agg.get_buffer(); + + ASSERT_EQ(ObJsonBaseFactory::get_json_base(&allocator, buffer->string(), ObJsonInType::JSON_BIN, ObJsonInType::JSON_BIN, base), OB_SUCCESS); + + ObJsonBuffer buf(&allocator); + ASSERT_EQ(base->print(buf, true), OB_SUCCESS); + cout << buf.ptr() << endl; + //ASSERT_EQ(buf.string(), "{\"\": [\"book\", \"read\"], \"abcd\": {\"a\": 100, \"b\": 200, \"c\": 300}, \"efg\": {\"item\": \"canvas\"}, \"xyz\": \"123\", \"yxz\": 333}"); + } + + { + cout << "TEST JSON CASE: 7" << endl; + ObBinAggSerializer bin_agg(&allocator, ObBinAggType::AGG_JSON, static_cast(ObJsonNodeType::J_ARRAY)); + int json_array[8] = {0, 0, 0, 0, 6}; + ObStringBuffer value(&allocator); + ObString input_null; + for (int i = 0; i < 5; i++) { + cout << "Add JSON: " << i << endl; + ObIJsonBase *input_base = NULL; + ASSERT_EQ(ObJsonBaseFactory::get_json_base(&allocator, json_text_array[json_array[i]], ObJsonInType::JSON_TREE, ObJsonInType::JSON_BIN, input_base), OB_SUCCESS); + ObJsonBin *bin = static_cast(input_base); + ASSERT_EQ(bin_agg.append_key_and_value(input_null, value, bin), OB_SUCCESS); + } + + ASSERT_EQ(bin_agg.serialize(), OB_SUCCESS); + + ObIJsonBase *base = NULL; + ObStringBuffer *buffer = bin_agg.get_buffer(); + + ASSERT_EQ(ObJsonBaseFactory::get_json_base(&allocator, buffer->string(), ObJsonInType::JSON_BIN, ObJsonInType::JSON_BIN, base), OB_SUCCESS); + + ObJsonBuffer buf(&allocator); + ASSERT_EQ(base->print(buf, true), OB_SUCCESS); + cout << buf.ptr() << endl; + //ASSERT_EQ(buf.string(), "{\"\": [\"book\", \"read\"], \"abcd\": {\"a\": 100, \"b\": 200, \"c\": 300}, \"efg\": {\"item\": \"canvas\"}, \"xyz\": \"123\", \"yxz\": 333}"); + } + +} + +}; +}; + +int main(int argc, char** argv) +{ + ::testing::InitGoogleTest(&argc, argv); + + return RUN_ALL_TESTS(); +} diff --git a/unittest/share/test_json_base.cpp b/unittest/share/test_json_base.cpp index 52bee9f6f1..550875a5aa 100644 --- a/unittest/share/test_json_base.cpp +++ b/unittest/share/test_json_base.cpp @@ -9,11 +9,12 @@ * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. * See the Mulan PubL v2 for more details. */ - +#define USING_LOG_PREFIX SHARE #include #define private public #include "lib/json_type/ob_json_tree.h" #include "lib/json_type/ob_json_bin.h" +#include "lib/json_type/ob_json_diff.h" #include "lib/json_type/ob_json_base.h" #include "lib/string/ob_sql_string.h" #include "lib/timezone/ob_timezone_info.h" @@ -46,6 +47,27 @@ private: DISALLOW_COPY_AND_ASSIGN(TestJsonBase); }; +static int init_update_ctx(ObIAllocator &allocator, ObJsonBin *bin) +{ + INIT_SUCC(ret); + ObJsonBinUpdateCtx *update_ctx = nullptr; + ObLobInRowUpdateCursor *cursor = nullptr; + if (OB_ISNULL(update_ctx = OB_NEWx(ObJsonBinUpdateCtx, &allocator, &allocator))) { + LOG_WARN("alloc update_ctx fail", K(ret)); + } else if (OB_ISNULL(cursor = OB_NEWx(ObLobInRowUpdateCursor, &allocator, &allocator))) { + LOG_WARN("alloc cursor fail", K(ret)); + } else if (OB_FAIL(cursor->init(bin->get_cursor()))) { + LOG_WARN("init cursor fail", K(ret)); + } else { + update_ctx->set_lob_cursor(cursor); + bin->get_cursor()->reset(); + bin->set_cursor(cursor); + bin->get_ctx()->update_ctx_ = update_ctx; + bin->get_ctx()->is_update_ctx_alloc_ = true; + } + return ret; +} + // json text 比较 // // @param [in] json_text_a 比较的json text a @@ -890,7 +912,7 @@ TEST_F(TestJsonBase, test_seek) ObArenaAllocator allocator(ObModIds::TEST); ObIJsonBase *j_tree = NULL; ObIJsonBase *j_bin = NULL; - ObJsonBaseVector hit; + ObJsonSeekResult hit; ObJsonBuffer j_buf(&allocator); // 1. seek_not_exist_member @@ -1099,12 +1121,12 @@ TEST_F(TestJsonBase, test_seek) } ObString raw_str; ASSERT_EQ(OB_SUCCESS, jb_res->get_raw_binary(raw_str, &allocator)); - ASSERT_EQ(27, raw_str.length()); + ASSERT_EQ(8 + 27, raw_str.length()); ASSERT_EQ(OB_SUCCESS, ObJsonBaseFactory::transform(&allocator, jb_res, ObJsonInType::JSON_BIN, j_bin)); raw_str.reset(); ASSERT_EQ(OB_SUCCESS, j_bin->get_raw_binary(raw_str, &allocator)); - ASSERT_EQ(27, raw_str.length()); + ASSERT_EQ(8 + 27, raw_str.length()); // 10. seek test3(bin->seek->print) common::ObString j_text10("{\"some_key\": true}"); @@ -1118,7 +1140,7 @@ TEST_F(TestJsonBase, test_seek) ASSERT_EQ(hit.size(), 1); raw_str.reset(); ASSERT_EQ(OB_SUCCESS, hit[0]->get_raw_binary(raw_str, &allocator)); - ObJsonBin j_bin_tmp(raw_str.ptr(), raw_str.length()); + ObJsonBin j_bin_tmp(raw_str.ptr(), raw_str.length(), &allocator); ObIJsonBase *j_base = &j_bin_tmp; j_buf.reset(); ASSERT_EQ(OB_SUCCESS, j_bin_tmp.reset_iter()); @@ -1132,7 +1154,7 @@ TEST_F(TestJsonBase, test_oracle_seek) ObArenaAllocator allocator(ObModIds::TEST); ObIJsonBase *j_tree = NULL; ObIJsonBase *j_bin = NULL; - ObJsonBaseVector hit; + ObJsonSeekResult hit; ObJsonBuffer j_buf(&allocator); // 1. seek_not_exist_member @@ -1511,7 +1533,7 @@ TEST_F(TestJsonBase, test_seek_str_cmp) ObArenaAllocator allocator(ObModIds::TEST); ObIJsonBase *j_tree = NULL; ObIJsonBase *j_bin = NULL; - ObJsonBaseVector hit; + ObJsonSeekResult hit; ObJsonBuffer j_buf(&allocator); common::ObString var_name("sql"); @@ -1607,7 +1629,7 @@ TEST_F(TestJsonBase, test_seek_func) ObArenaAllocator allocator(ObModIds::TEST); ObIJsonBase *j_tree = NULL; ObIJsonBase *j_bin = NULL; - ObJsonBaseVector hit; + ObJsonSeekResult hit; ObJsonBuffer j_buf(&allocator); // 11. seek_type @@ -1870,7 +1892,7 @@ TEST_F(TestJsonBase, test_seek_bad_filter) ObArenaAllocator allocator(ObModIds::TEST); ObIJsonBase *j_tree = NULL; ObIJsonBase *j_bin = NULL; - ObJsonBaseVector hit; + ObJsonSeekResult hit; ObJsonBuffer j_buf(&allocator); INIT_SUCC(ret); /* @@ -1943,7 +1965,7 @@ TEST_F(TestJsonBase, test_seek_filter) ObArenaAllocator allocator(ObModIds::TEST); ObIJsonBase *j_tree = NULL; ObIJsonBase *j_bin = NULL; - ObJsonBaseVector hit; + ObJsonSeekResult hit; ObJsonBuffer j_buf(&allocator); /* @@ -3346,7 +3368,7 @@ TEST_F(TestJsonBase, test_get_raw_binary) // 2. test json bin ASSERT_EQ(OB_SUCCESS, ObJsonBaseFactory::get_json_base(&allocator, j_arr_text, ObJsonInType::JSON_TREE, ObJsonInType::JSON_BIN, j_bin)); - ASSERT_EQ(OB_SUCCESS, j_bin->get_raw_binary(str)); + ASSERT_EQ(OB_SUCCESS, j_bin->get_raw_binary(str, &allocator)); } TEST_F(TestJsonBase, test_get_key) @@ -3447,13 +3469,13 @@ TEST_F(TestJsonBase, test_get_used_size) ASSERT_EQ(OB_SUCCESS, ObJsonBaseFactory::get_json_base(&allocator, j_obj_text, ObJsonInType::JSON_TREE, ObJsonInType::JSON_TREE, j_tree)); ASSERT_EQ(OB_SUCCESS, j_tree->get_used_size(use_size)); - ASSERT_EQ(50, use_size); + ASSERT_EQ(50 + 8 /*sizeof(ObJsonBinDocHeader)*/, use_size); // 2. test json bin ASSERT_EQ(OB_SUCCESS, ObJsonBaseFactory::get_json_base(&allocator, j_obj_text, ObJsonInType::JSON_TREE, ObJsonInType::JSON_BIN, j_bin)); ASSERT_EQ(OB_SUCCESS, j_bin->get_used_size(use_size)); - ASSERT_EQ(50, use_size); + ASSERT_EQ(50 + 8 /*sizeof(ObJsonBinDocHeader)*/, use_size); } TEST_F(TestJsonBase, test_get_free_space) @@ -3475,9 +3497,14 @@ TEST_F(TestJsonBase, test_get_free_space) // 2. test json bin ASSERT_EQ(OB_SUCCESS, ObJsonBaseFactory::get_json_base(&allocator, j_obj_text, ObJsonInType::JSON_TREE, ObJsonInType::JSON_BIN, j_bin)); + + ObJsonBin *bin = static_cast(j_bin); + ASSERT_EQ(OB_SUCCESS, init_update_ctx(allocator, bin)); + ObJsonBinUpdateCtx &update_ctx = *bin->get_update_ctx(); + ASSERT_EQ(OB_SUCCESS, j_bin->object_remove(key)); ASSERT_EQ(OB_SUCCESS, j_bin->get_free_space(free_space)); - ASSERT_EQ(4, free_space); + ASSERT_EQ(6, free_space); } TEST_F(TestJsonBase, test_array_append) @@ -3506,6 +3533,11 @@ TEST_F(TestJsonBase, test_array_append) ObIJsonBase *j_bin_val = NULL; ASSERT_EQ(OB_SUCCESS, ObJsonBaseFactory::transform(&allocator, &j_uint1, ObJsonInType::JSON_BIN, j_bin_val)); + + ObJsonBin *bin = static_cast(j_bin); + ASSERT_EQ(OB_SUCCESS, init_update_ctx(allocator, bin)); + ObJsonBinUpdateCtx &update_ctx = *bin->get_update_ctx(); + ASSERT_EQ(OB_SUCCESS, j_bin->array_append(j_bin_val)); j_buf.reset(); ASSERT_EQ(OB_SUCCESS, j_bin->print(j_buf, false)); @@ -3538,6 +3570,11 @@ TEST_F(TestJsonBase, test_array_insert) ObIJsonBase *j_bin_val = NULL; ASSERT_EQ(OB_SUCCESS, ObJsonBaseFactory::transform(&allocator, &j_uint1, ObJsonInType::JSON_BIN, j_bin_val)); + + ObJsonBin *bin = static_cast(j_bin); + ASSERT_EQ(OB_SUCCESS, init_update_ctx(allocator, bin)); + ObJsonBinUpdateCtx &update_ctx = *bin->get_update_ctx(); + ASSERT_EQ(OB_SUCCESS, j_bin->array_insert(2, j_bin_val)); j_buf.reset(); ASSERT_EQ(OB_SUCCESS, j_bin->print(j_buf, false)); @@ -3565,6 +3602,9 @@ TEST_F(TestJsonBase, test_array_remove) ObIJsonBase *j_bin = NULL; ASSERT_EQ(OB_SUCCESS, ObJsonBaseFactory::transform(&allocator, j_tree, ObJsonInType::JSON_BIN, j_bin)); + ObJsonBin *bin = static_cast(j_bin); + ASSERT_EQ(OB_SUCCESS, init_update_ctx(allocator, bin)); + ObJsonBinUpdateCtx &update_ctx = *bin->get_update_ctx(); ASSERT_EQ(OB_SUCCESS, j_bin->array_remove(3)); ASSERT_EQ(3, j_bin->element_count()); j_buf.reset(); @@ -3601,11 +3641,15 @@ TEST_F(TestJsonBase, test_object_add) ObIJsonBase *j_bin_val2 = NULL; ASSERT_EQ(OB_SUCCESS, ObJsonBaseFactory::transform(&allocator, &j_uint2, ObJsonInType::JSON_BIN, j_bin_val2)); + + ObJsonBin *bin = static_cast(j_bin); + ASSERT_EQ(OB_SUCCESS, init_update_ctx(allocator, bin)); + ObJsonBinUpdateCtx &update_ctx = *bin->get_update_ctx(); + ASSERT_EQ(OB_SUCCESS, j_bin->object_add(key2, j_bin_val2)); j_buf.reset(); ASSERT_EQ(OB_SUCCESS, j_bin->print(j_buf, false)); - ASSERT_EQ(0, strncmp("{\"os\": \"Mac\", \"key1\": 1, \"name\": \"Safari\", \"key2\": 2}", - j_buf.ptr(), j_buf.length())); + ASSERT_EQ("{\"os\": \"Mac\", \"key1\": 1, \"key2\": 2, \"name\": \"Safari\"}", std::string(j_buf.ptr(), j_buf.length())); } TEST_F(TestJsonBase, test_object_remove) @@ -3630,6 +3674,9 @@ TEST_F(TestJsonBase, test_object_remove) ObIJsonBase *j_bin = NULL; ASSERT_EQ(OB_SUCCESS, ObJsonBaseFactory::transform(&allocator, j_tree, ObJsonInType::JSON_BIN, j_bin)); + ObJsonBin *bin = static_cast(j_bin); + ASSERT_EQ(OB_SUCCESS, init_update_ctx(allocator, bin)); + ObJsonBinUpdateCtx &update_ctx = *bin->get_update_ctx(); common::ObString key1("os"); ASSERT_EQ(OB_SUCCESS, j_bin->object_remove(key1)); j_buf.reset(); @@ -3673,19 +3720,29 @@ TEST_F(TestJsonBase, test_replace) j_tree = &j_arr; ASSERT_EQ(OB_SUCCESS, ObJsonBaseFactory::transform(&allocator, j_tree, ObJsonInType::JSON_BIN, j_bin)); + ObJsonBin *bin2 = static_cast(j_bin); + ASSERT_EQ(OB_SUCCESS, init_update_ctx(allocator, bin2)); + bin2->set_seek_flag(false); + ObJsonBinUpdateCtx &update_ctx2 = *bin2->get_update_ctx(); ASSERT_EQ(OB_SUCCESS, j_bin->get_array_element(0, jb_bin_old_ptr)); ObIJsonBase *j_bin_new_ptr = NULL; ASSERT_EQ(OB_SUCCESS, ObJsonBaseFactory::transform(&allocator, &j_uint0, ObJsonInType::JSON_BIN, j_bin_new_ptr)); + ObJsonBin *bin = static_cast(j_bin); + bin->set_seek_flag(false); ASSERT_EQ(OB_SUCCESS, j_bin->replace(jb_bin_old_ptr, j_bin_new_ptr)); j_buf.reset(); ASSERT_EQ(OB_SUCCESS, j_bin->print(j_buf, false)); - ASSERT_EQ(0, strncmp("[0]", j_buf.ptr(), j_buf.length())); + ASSERT_EQ("[0]", std::string(j_buf.ptr(), j_buf.length())); // object j_tree = &j_obj; ASSERT_EQ(OB_SUCCESS, ObJsonBaseFactory::transform(&allocator, j_tree, ObJsonInType::JSON_BIN, j_bin)); + bin = static_cast(j_bin); + bin->set_seek_flag(false); + ASSERT_EQ(OB_SUCCESS, init_update_ctx(allocator, bin)); + ObJsonBinUpdateCtx &update_ctx = *bin->get_update_ctx(); ASSERT_EQ(OB_SUCCESS, j_bin->get_object_value(key0, jb_bin_old_ptr)); ASSERT_EQ(OB_SUCCESS, ObJsonBaseFactory::transform(&allocator, &j_val0, ObJsonInType::JSON_BIN, j_bin_new_ptr)); @@ -4510,6 +4567,8 @@ TEST_F(TestJsonBase, test_to_bit) int main(int argc, char** argv) { + oceanbase::common::ObLogger::get_logger().set_log_level("INFO"); + OB_LOGGER.set_log_level("INFO"); ::testing::InitGoogleTest(&argc, argv); // system("rm -f test_json_base.log"); // OB_LOGGER.set_file_name("test_json_base.log"); diff --git a/unittest/share/test_json_bin.cpp b/unittest/share/test_json_bin.cpp index 6624292401..815848ea67 100644 --- a/unittest/share/test_json_bin.cpp +++ b/unittest/share/test_json_bin.cpp @@ -9,12 +9,14 @@ * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. * See the Mulan PubL v2 for more details. */ - +#define USING_LOG_PREFIX SHARE #include #define private public +#include "lib/lob/ob_lob_base.h" #include "lib/json_type/ob_json_tree.h" #include "lib/json_type/ob_json_bin.h" #include "lib/json_type/ob_json_parse.h" +#include "lib/json_type/ob_json_diff.h" #include "lib/timezone/ob_timezone_info.h" #undef private @@ -46,6 +48,139 @@ private: }; +static std::string buf_to_hex(const char* ptr, int len) +{ + std::stringstream ss; + ss << std::hex; + for (int i = 0; i < len; ++i) ss << static_cast(ptr[i]) << " "; + return ss.str(); +} + +static int init_update_ctx(ObIAllocator &allocator, ObJsonBin *bin) +{ + INIT_SUCC(ret); + ObJsonBinUpdateCtx *update_ctx = nullptr; + ObLobInRowUpdateCursor *cursor = nullptr; + if (OB_ISNULL(update_ctx = OB_NEWx(ObJsonBinUpdateCtx, &allocator, &allocator))) { + LOG_WARN("alloc update_ctx fail", K(ret)); + } else if (OB_ISNULL(cursor = OB_NEWx(ObLobInRowUpdateCursor, &allocator, &allocator))) { + LOG_WARN("alloc cursor fail", K(ret)); + } else if (OB_FAIL(cursor->init(bin->get_cursor()))) { + LOG_WARN("init cursor fail", K(ret)); + } else { + update_ctx->set_lob_cursor(cursor); + bin->get_cursor()->reset(); + bin->set_cursor(cursor); + bin->get_ctx()->update_ctx_ = update_ctx; + bin->get_ctx()->is_update_ctx_alloc_ = true; + } + return ret; +} + +static void check_diff_valid(ObIAllocator &allocator, const ObString& old_str, ObJsonBinUpdateCtx &update_ctx) +{ + ObJsonBuffer old_buf(&allocator); + ASSERT_EQ(OB_SUCCESS, old_buf.append(old_str)); + ASSERT_EQ(old_buf.length(), old_str.length()); + ObString new_buf; + update_ctx.current_data(new_buf); + for (int i = 0; i < update_ctx.binary_diffs_.count(); ++i) { + ObJsonBinaryDiff &diff = update_ctx.binary_diffs_[i]; + if (diff.dst_offset_ >= old_buf.length()) { + ASSERT_EQ(OB_SUCCESS, old_buf.append(new_buf.ptr() + diff.dst_offset_, diff.dst_len_)); + } else { + MEMCPY(old_buf.ptr() + diff.dst_offset_, new_buf.ptr() + diff.dst_offset_, diff.dst_len_); + } + } + ASSERT_EQ(old_buf.length(), new_buf.length()); + for (int i = 0; i < old_buf.length(); ++i) { + uint8_t old_c = old_buf.ptr()[i]; + uint8_t new_c = new_buf.ptr()[i]; + ASSERT_EQ(new_c, old_c) << " pos " << i << " old buf " << buf_to_hex(old_buf.ptr(), old_buf.length()) << " new buf "<< buf_to_hex(new_buf.ptr(), new_buf.length()); + } + ASSERT_EQ(0, MEMCMP(old_buf.ptr(), new_buf.ptr(), new_buf.length())); +} + +static void check_json_diff_valid(ObIAllocator &allocator, const ObString& j_text, ObJsonBinUpdateCtx &target_update_ctx, int json_diff_count) +{ + int ret = OB_SUCCESS; + ObIJsonBase *j_base = NULL; + ASSERT_EQ(OB_SUCCESS, ObJsonBaseFactory::get_json_base(&allocator, j_text, + ObJsonInType::JSON_TREE, ObJsonInType::JSON_TREE, j_base)); + + ObJsonDiffArray &json_diffs = target_update_ctx.json_diffs_; + ObString update_buffer; + target_update_ctx.current_data(update_buffer); + + ASSERT_EQ(json_diff_count, json_diffs.count()); + + ObJsonPathCache path_cache(&allocator); + for (int i = 0; i < json_diffs.count(); ++i) { + ObJsonPath* json_path = nullptr; + ObString path_str = json_diffs[i].path_; + int path_idx = path_cache.size(); + ASSERT_EQ(OB_SUCCESS, path_cache.find_and_add_cache(json_path, path_str, path_idx)); + ASSERT_EQ(path_cache.path_stat_at(path_idx), ObPathParseStat::OK_NOT_NULL); + ObJsonSeekResult hit; + if (json_diffs[i].op_ == ObJsonDiffOp::REPLACE + || json_diffs[i].op_ == ObJsonDiffOp::REMOVE) { + ASSERT_EQ(OB_SUCCESS, j_base->seek(*json_path, json_path->path_node_cnt(), true, false, hit)); + ASSERT_EQ(1, hit.size()); + ObIJsonBase *parent = nullptr; + ObIJsonBase *json_old = hit[0]; + ObJsonBin j_new_bin(&allocator); + j_new_bin.set_seek_flag(false); + if (json_diffs[i].op_ == ObJsonDiffOp::REPLACE) { + ASSERT_EQ(OB_SUCCESS, j_new_bin.reset( + json_diffs[i].value_type_, + json_diffs[i].value_, + 0, + json_diffs[i].entry_var_type_, + nullptr)); + ASSERT_EQ(OB_SUCCESS, json_old->get_parent(parent)); + ObJsonNode *json_new = nullptr; + ASSERT_EQ(OB_SUCCESS, j_new_bin.to_tree(json_new)); + if(OB_NOT_NULL(parent)) { + ASSERT_EQ(OB_SUCCESS, parent->replace(json_old, json_new)); + } else { + ASSERT_EQ(OB_SUCCESS, j_base->replace(json_old, json_new)); + } + } else { + if(OB_NOT_NULL(parent)) { + parent = j_base; + } + // if (j_base->json_type() == ObJsonNodeType::J_OBJECT) { + // ASSERT_EQ(OB_SUCCESS, parent->object_remove()) + // } else { + // ASSERT_EQ(OB_SUCCESS, parent->array_remove()) + // } + } + } else if (json_diffs[i].op_ == ObJsonDiffOp::INSERT) { + ASSERT_EQ(OB_SUCCESS, j_base->seek(*json_path, json_path->path_node_cnt(), true, false, hit)); + ASSERT_EQ(0, hit.size()); + } else { + ASSERT_FALSE(true) << "invalid op" << (int)json_diffs[i].op_; + } + } + ObJsonBuffer j_src_buffer(&allocator); + ASSERT_EQ(OB_SUCCESS, j_base->print(j_src_buffer, false)); + ObJsonBin j_target_bin; + j_target_bin.set_seek_flag(false); + ObJsonBinCtx j_target_bin_ctx; + ASSERT_EQ(OB_SUCCESS, j_target_bin.reset(update_buffer, 0, &j_target_bin_ctx)); + ObJsonBuffer j_target_buffer(&allocator); + ASSERT_EQ(OB_SUCCESS, j_target_bin.print(j_target_buffer, false)); + ASSERT_EQ(std::string(j_src_buffer.ptr(), j_src_buffer.length()), std::string(j_target_buffer.ptr(), j_target_buffer.length())); + ASSERT_NE(std::string(j_src_buffer.ptr(), j_src_buffer.length()), std::string(j_text.ptr(), j_text.length())); + ASSERT_NE(std::string(j_target_buffer.ptr(), j_target_buffer.length()), std::string(j_text.ptr(), j_text.length())); + std::cout << "-------- origin" << std::endl; + std::cout << std::string(j_text.ptr(), j_text.length()) << std::endl; + std::cout << "-------- src" << std::endl; + std::cout << std::string(j_src_buffer.ptr(), j_src_buffer.length()) << std::endl; + std::cout << "-------- target" << std::endl; + std::cout << std::string(j_target_buffer.ptr(), j_target_buffer.length()) << std::endl; +} + // rapidjson 解析仅包含字符串的json text测试 // 输入: json text // 预期: 解析完整json tree @@ -58,10 +193,11 @@ TEST_F(TestJsonBin, test_tree_to_bin) ASSERT_EQ(OB_SUCCESS, ObJsonBaseFactory::get_json_base(&allocator, j_text, ObJsonInType::JSON_TREE, ObJsonInType::JSON_BIN, j_bin)); - ASSERT_EQ(OB_SUCCESS, j_bin->get_raw_binary(result)); + ASSERT_EQ(OB_SUCCESS, j_bin->get_raw_binary(result, &allocator)); // check result - char *ptr = result.ptr(); + uint64_t doc_header_len = sizeof(ObJsonBinDocHeader); + char *ptr = result.ptr() + doc_header_len; int64_t offset = 0; ObJsonBinHeader *header = reinterpret_cast(ptr); ASSERT_EQ(header->is_continuous_, 1); @@ -76,7 +212,7 @@ TEST_F(TestJsonBin, test_tree_to_bin) uint64_t obj_size; ObJsonVar::read_var(ptr + offset, header->obj_size_size_, &obj_size); - ASSERT_EQ(obj_size, result.length()); + ASSERT_EQ(doc_header_len + obj_size, result.length()); fprintf(stdout, "[test] used_size is %lu\n", obj_size); offset += ObJsonVar::get_var_size(header->obj_size_size_); @@ -122,10 +258,11 @@ TEST_F(TestJsonBin, t2b_array) ASSERT_TRUE(j_bin->is_bin()); common::ObString result; - ASSERT_EQ(OB_SUCCESS, j_bin->get_raw_binary(result)); + ASSERT_EQ(OB_SUCCESS, j_bin->get_raw_binary(result, &allocator)); // check result - char *ptr = result.ptr(); + uint64_t doc_header_len = sizeof(ObJsonBinDocHeader); + char *ptr = result.ptr() + doc_header_len; uint64_t offset = 0; ObJsonBinHeader *header = reinterpret_cast(ptr); offset += OB_JSON_BIN_HEADER_LEN; @@ -139,7 +276,7 @@ TEST_F(TestJsonBin, t2b_array) ObJsonVar::read_var(ptr + offset, header->obj_size_size_, &array_size); offset += ObJsonVar::get_var_size(header->obj_size_size_); ASSERT_EQ(count, 6); - ASSERT_EQ(array_size, result.length()); + ASSERT_EQ(doc_header_len + array_size, result.length()); fprintf(stdout, "[test] array_size is %lu\n", array_size); uint64_t value_entry_size = ObJsonVar::get_var_size(header->entry_size_); @@ -258,7 +395,7 @@ TEST_F(TestJsonBin, deserialize_bin_to_tree) ObJsonInType::JSON_TREE, ObJsonInType::JSON_BIN, j_bin)); common::ObString result; - ASSERT_EQ(OB_SUCCESS, j_bin->get_raw_binary(result)); + ASSERT_EQ(OB_SUCCESS, j_bin->get_raw_binary(result, &allocator)); ObJsonBin *bin = static_cast(j_bin); ObJsonNode *new_json_tree = nullptr; @@ -298,7 +435,7 @@ TEST_F(TestJsonBin, test_bin_lookup) ASSERT_EQ(OB_SUCCESS, ObJsonBaseFactory::get_json_base(&allocator, j_text, ObJsonInType::JSON_TREE, ObJsonInType::JSON_BIN, j_bin)); common::ObString result; - ASSERT_EQ(OB_SUCCESS, j_bin->get_raw_binary(result)); + ASSERT_EQ(OB_SUCCESS, j_bin->get_raw_binary(result, &allocator)); // root ObJsonBin *bin = static_cast(j_bin); @@ -390,10 +527,11 @@ TEST_F(TestJsonBin, test_json_bin_load_without_common_header) ObJsonInType::JSON_TREE, ObJsonInType::JSON_BIN, j_bin)); common::ObString result; - ASSERT_EQ(OB_SUCCESS, j_bin->get_raw_binary(result)); + ASSERT_EQ(OB_SUCCESS, j_bin->get_raw_binary(result, &allocator)); // check result - char *ptr = result.ptr(); + uint64_t doc_header_len = sizeof(ObJsonBinDocHeader); + char *ptr = result.ptr() + doc_header_len; uint64_t offset = 0; ObJsonBinHeader *header = reinterpret_cast(ptr); ASSERT_EQ(header->type_, ObJBVerType::J_ARRAY_V0); @@ -401,10 +539,11 @@ TEST_F(TestJsonBin, test_json_bin_load_without_common_header) offset += OB_JSON_BIN_HEADER_LEN; uint64_t obj_size; ObJsonVar::read_var(ptr + offset + ObJsonVar::get_var_size(header->count_size_), header->obj_size_size_, &obj_size); - ASSERT_EQ(obj_size, result.length()); + ASSERT_EQ(doc_header_len + obj_size, result.length()); fprintf(stdout, "[test] obj_size is %lu\n", obj_size); ObJsonBin test_bin(result.ptr(), result.length(), &allocator); + test_bin.set_seek_flag(false); test_bin.reset_iter(); ASSERT_EQ(ObJsonNodeType::J_ARRAY, test_bin.json_type()); uint64_t sub_member_count = test_bin.element_count(); @@ -467,14 +606,18 @@ TEST_F(TestJsonBin, test_bin_update) ASSERT_EQ(OB_SUCCESS, ObJsonBaseFactory::get_json_base(&allocator, j_text, ObJsonInType::JSON_TREE, ObJsonInType::JSON_BIN, j_bin)); common::ObString result; - ASSERT_EQ(OB_SUCCESS, j_bin->get_raw_binary(result)); + ASSERT_EQ(OB_SUCCESS, j_bin->get_raw_binary(result, &allocator)); // root ObJsonBin *bin = static_cast(j_bin); + bin->set_seek_flag(false); uint64_t root_member_count = bin->element_count(); ASSERT_EQ(3, root_member_count); ASSERT_EQ(ObJsonNodeType::J_OBJECT, bin->json_type()); + ASSERT_EQ(OB_SUCCESS, init_update_ctx(allocator, bin)); + ObJsonBinUpdateCtx &update_ctx = *bin->get_update_ctx(); + ObString lkey("greeting"); bin->lookup(lkey); ASSERT_EQ(ObJsonNodeType::J_ARRAY, bin->json_type()); @@ -497,7 +640,10 @@ TEST_F(TestJsonBin, test_bin_update) ObJsonInType::JSON_BIN, j_bin_val)); ASSERT_EQ(OB_SUCCESS, bin->move_parent_iter()); ASSERT_EQ(OB_SUCCESS, bin->update(0, static_cast(j_bin_val))); - + ASSERT_EQ(2, update_ctx.binary_diffs_.count()); + ASSERT_EQ(update_ctx.is_rebuild_all_, false); + check_diff_valid(allocator, result, update_ctx); + check_json_diff_valid(allocator, j_text, update_ctx, 1); bin->element(0); actual_type = (static_cast(bin->json_type()) & 0x7F); ASSERT_EQ(static_cast(ObJsonNodeType::J_STRING), actual_type); @@ -521,6 +667,10 @@ TEST_F(TestJsonBin, test_bin_update) ObJsonInType::JSON_BIN, j_bin_val)); ASSERT_EQ(OB_SUCCESS, bin->move_parent_iter()); ASSERT_EQ(OB_SUCCESS, bin->update(1, static_cast(j_bin_val))); + ASSERT_EQ(3, update_ctx.binary_diffs_.count()); + ASSERT_EQ(update_ctx.is_rebuild_all_, false); + check_diff_valid(allocator, result, update_ctx); + check_json_diff_valid(allocator, j_text, update_ctx, 2); bin->element(1); actual_type = (static_cast(bin->json_type()) & 0x7F); ASSERT_EQ(static_cast(ObJsonNodeType::J_DOUBLE), actual_type); @@ -542,7 +692,10 @@ TEST_F(TestJsonBin, test_bin_update) ObJsonInType::JSON_BIN, j_bin_val)); ASSERT_EQ(OB_SUCCESS, bin->move_parent_iter()); ASSERT_EQ(OB_SUCCESS, bin->update(2, static_cast(j_bin_val))); - + ASSERT_EQ(5, update_ctx.binary_diffs_.count()); + ASSERT_EQ(update_ctx.is_rebuild_all_, false); + check_diff_valid(allocator, result, update_ctx); + check_json_diff_valid(allocator, j_text, update_ctx, 3); bin->element(2); actual_type = (static_cast(bin->json_type()) & 0x7F); ASSERT_EQ(static_cast(ObJsonNodeType::J_INT), actual_type); @@ -564,7 +717,10 @@ TEST_F(TestJsonBin, test_bin_update) ObJsonInType::JSON_BIN, j_bin_val)); ASSERT_EQ(OB_SUCCESS, bin->move_parent_iter()); ASSERT_EQ(OB_SUCCESS, bin->update(3, static_cast(j_bin_val))); - + ASSERT_EQ(7, update_ctx.binary_diffs_.count()); + ASSERT_EQ(update_ctx.is_rebuild_all_, false); + check_diff_valid(allocator, result, update_ctx); + check_json_diff_valid(allocator, j_text, update_ctx, 4); bin->element(3); actual_type = (static_cast(bin->json_type()) & 0x7F); ASSERT_EQ(static_cast(ObJsonNodeType::J_UINT), actual_type); @@ -596,10 +752,14 @@ TEST_F(TestJsonBin, test_bin_remove) ASSERT_EQ(OB_SUCCESS, ObJsonBaseFactory::get_json_base(&allocator, j_text, ObJsonInType::JSON_TREE, ObJsonInType::JSON_BIN, j_bin)); common::ObString result; - ASSERT_EQ(OB_SUCCESS, j_bin->get_raw_binary(result)); + ASSERT_EQ(OB_SUCCESS, j_bin->get_raw_binary(result, &allocator)); // root ObJsonBin *bin = static_cast(j_bin); + bin->set_seek_flag(false); + ASSERT_EQ(OB_SUCCESS, init_update_ctx(allocator, bin)); + ObJsonBinUpdateCtx &update_ctx = *bin->get_update_ctx(); + uint64_t root_member_count = bin->element_count(); ASSERT_EQ(3, root_member_count); ASSERT_EQ(ObJsonNodeType::J_OBJECT, bin->json_type()); @@ -623,6 +783,10 @@ TEST_F(TestJsonBin, test_bin_remove) { ASSERT_EQ(OB_SUCCESS, bin->move_parent_iter()); ASSERT_EQ(OB_SUCCESS, bin->remove(0)); + ASSERT_EQ(1, update_ctx.binary_diffs_.count()); + ASSERT_EQ(update_ctx.is_rebuild_all_, false); + check_diff_valid(allocator, result, update_ctx); + check_json_diff_valid(allocator, j_text, update_ctx, 1); bin->element(0); uint8_t actual_type = (static_cast(bin->json_type()) & 0x7F); ASSERT_EQ(static_cast(ObJsonNodeType::J_DOUBLE), actual_type); @@ -632,6 +796,10 @@ TEST_F(TestJsonBin, test_bin_remove) { ASSERT_EQ(OB_SUCCESS, bin->move_parent_iter()); ASSERT_EQ(OB_SUCCESS, bin->remove(0)); + ASSERT_EQ(2, update_ctx.binary_diffs_.count()); + ASSERT_EQ(update_ctx.is_rebuild_all_, false); + check_diff_valid(allocator, result, update_ctx); + check_json_diff_valid(allocator, j_text, update_ctx, 2); bin->element(0); uint8_t actual_type = (static_cast(bin->json_type()) & 0x7F); ASSERT_EQ(static_cast(ObJsonNodeType::J_INT), actual_type); @@ -641,6 +809,10 @@ TEST_F(TestJsonBin, test_bin_remove) { ASSERT_EQ(OB_SUCCESS, bin->move_parent_iter()); ASSERT_EQ(OB_SUCCESS, bin->remove(0)); + ASSERT_EQ(3, update_ctx.binary_diffs_.count()); + ASSERT_EQ(update_ctx.is_rebuild_all_, false); + check_diff_valid(allocator, result, update_ctx); + check_json_diff_valid(allocator, j_text, update_ctx, 3); bin->element(0); uint8_t actual_type = (static_cast(bin->json_type()) & 0x7F); ASSERT_EQ(static_cast(ObJsonNodeType::J_INT), actual_type); @@ -650,6 +822,10 @@ TEST_F(TestJsonBin, test_bin_remove) { ASSERT_EQ(OB_SUCCESS, bin->move_parent_iter()); ASSERT_EQ(OB_SUCCESS, bin->remove(0)); + ASSERT_EQ(4, update_ctx.binary_diffs_.count()); + ASSERT_EQ(update_ctx.is_rebuild_all_, false); + check_diff_valid(allocator, result, update_ctx); + check_json_diff_valid(allocator, j_text, update_ctx, 4); bin->element(0); uint8_t actual_type = (static_cast(bin->json_type()) & 0x7F); ASSERT_EQ(static_cast(ObJsonNodeType::J_BOOLEAN), actual_type); @@ -660,10 +836,15 @@ TEST_F(TestJsonBin, test_bin_remove) bin->reset_iter(); // test reset iter bin->lookup(lkey); ASSERT_EQ(OB_SUCCESS, bin->remove(0)); + ASSERT_EQ(5, update_ctx.binary_diffs_.count()); + ASSERT_EQ(update_ctx.is_rebuild_all_, false); + check_diff_valid(allocator, result, update_ctx); + check_json_diff_valid(allocator, j_text, update_ctx, 5); bin->element(0); uint8_t actual_type = (static_cast(bin->json_type()) & 0x7F); ASSERT_EQ(static_cast(ObJsonNodeType::J_NULL), actual_type); } + ASSERT_EQ(OB_SUCCESS, bin->reset_iter()); size_t free_space; ASSERT_EQ(OB_SUCCESS, bin->get_free_space(free_space)); ASSERT_GT(free_space, 0); @@ -687,12 +868,20 @@ TEST_F(TestJsonBin, test_bin_update_with_obj) ASSERT_EQ(OB_SUCCESS, ObJsonBaseFactory::get_json_base(&allocator, j_text2, ObJsonInType::JSON_TREE, ObJsonInType::JSON_BIN, j_bin2)); + common::ObString result2; + ASSERT_EQ(OB_SUCCESS, j_bin2->get_raw_binary(result2, &allocator)); + ObJsonBin *bin = static_cast(j_bin2); + bin->set_seek_flag(false); + ASSERT_EQ(OB_SUCCESS, init_update_ctx(allocator, bin)); + ObJsonBinUpdateCtx &update_ctx = *bin->get_update_ctx(); + // 1. update j_bin2.b.b1 from 321 ---> "updated" { common::ObJsonBuffer buf(&allocator); ASSERT_EQ(OB_SUCCESS, j_bin1->print(buf, true)); std::cout << buf.ptr() << std::endl; ObJsonBin *bin2 = static_cast(j_bin2); + bin2->set_seek_flag(false); ASSERT_EQ(OB_SUCCESS, bin2->lookup("b")); size_t idx; ASSERT_EQ(OB_SUCCESS, bin2->lookup_index("b1", &idx)); @@ -703,12 +892,17 @@ TEST_F(TestJsonBin, test_bin_update_with_obj) ASSERT_EQ(OB_SUCCESS, ObJsonBaseFactory::transform(&allocator, j_tree_str, ObJsonInType::JSON_BIN, j_bin_str)); ASSERT_EQ(OB_SUCCESS, bin2->update(idx, static_cast(j_bin_str))); + ASSERT_EQ(2, update_ctx.binary_diffs_.count()); + ASSERT_EQ(update_ctx.is_rebuild_all_, false); + check_diff_valid(allocator, result2, update_ctx); + check_json_diff_valid(allocator, j_text2, update_ctx, 1); ASSERT_EQ(OB_SUCCESS, bin2->element(idx)); uint8_t actual_type = (static_cast(bin2->json_type()) & 0x7F); ASSERT_EQ(static_cast(ObJsonNodeType::J_STRING), actual_type); ObString new_ele("updated"); ObString data(bin2->get_data_length(), bin2->get_data()); ASSERT_EQ(0, new_ele.compare(data)); + ASSERT_EQ(OB_SUCCESS, bin2->reset_iter()); size_t free_space; ASSERT_EQ(OB_SUCCESS, bin2->get_free_space(free_space)); ASSERT_EQ(free_space, 2); // from 321 in storage not inlined, use 2 bytes @@ -716,11 +910,21 @@ TEST_F(TestJsonBin, test_bin_update_with_obj) ASSERT_EQ(OB_SUCCESS, bin2->reset_iter()); } // 2. update j_bin1.b with j_bin2.b + common::ObString result1; + ASSERT_EQ(OB_SUCCESS, j_bin1->get_raw_binary(result1, &allocator)); + ObJsonBin *bin1 = static_cast(j_bin1); + bin1->set_seek_flag(false); + + ASSERT_EQ(OB_SUCCESS, init_update_ctx(allocator, bin1)); + ObJsonBinUpdateCtx &update_ctx1 = *bin1->get_update_ctx(); + { size_t free_space; ObJsonBin *bin2 = static_cast(j_bin2); + bin2->set_seek_flag(false); ASSERT_EQ(OB_SUCCESS, bin2->get_free_space(free_space)); ObJsonBin *bin = static_cast(j_bin1); + bin->set_seek_flag(false); ASSERT_EQ(OB_SUCCESS, bin->get_free_space(free_space)); size_t idx; ASSERT_EQ(OB_SUCCESS, bin->lookup_index("b", &idx)); @@ -729,8 +933,9 @@ TEST_F(TestJsonBin, test_bin_update_with_obj) ObJsonBuffer rbuf(&allocator); ASSERT_EQ(OB_SUCCESS, bin2->rebuild_at_iter(rbuf)); - ASSERT_EQ(OB_SUCCESS, bin2->get_free_space(free_space)); + // ASSERT_EQ(OB_SUCCESS, bin2->get_free_space(free_space)); ObJsonBin testbin(rbuf.ptr(), rbuf.length(), &allocator); + testbin.set_seek_flag(false); ASSERT_EQ(OB_SUCCESS, testbin.reset_iter()); ASSERT_EQ(OB_SUCCESS, testbin.get_free_space(free_space)); ObIJsonBase *j_bin_test = &testbin; @@ -739,8 +944,12 @@ TEST_F(TestJsonBin, test_bin_update_with_obj) std::cout << buf.ptr() << std::endl; ASSERT_EQ(OB_SUCCESS, bin->update(idx, bin2)); + ASSERT_EQ(2, update_ctx1.binary_diffs_.count()); + ASSERT_EQ(update_ctx1.is_rebuild_all_, false); + check_diff_valid(allocator, result1, update_ctx1); + check_json_diff_valid(allocator, j_text, update_ctx1, 1); // print free space - + ASSERT_EQ(OB_SUCCESS, testbin.reset_iter()); ASSERT_EQ(OB_SUCCESS, bin->get_free_space(free_space)); ASSERT_GT(free_space, 0); fprintf(stdout, "[test] after update bin free_space is %zu\n", free_space); @@ -784,7 +993,7 @@ TEST_F(TestJsonBin, issue_37549565) ObIJsonBase *j_bin = NULL; ASSERT_EQ(OB_SUCCESS, ObJsonBaseFactory::get_json_base(&allocator, j_text, ObJsonInType::JSON_TREE, ObJsonInType::JSON_BIN, j_bin)); - ObJsonBaseVector hit; + ObJsonSeekResult hit; int cnt = test_path.path_node_cnt(); ret = j_bin->seek(test_path, cnt, false, false, hit); ASSERT_EQ(OB_SUCCESS, ret); @@ -808,7 +1017,7 @@ TEST_F(TestJsonBin, test_bin_to_tree_after_seek) ObIJsonBase *j_bin = NULL; ASSERT_EQ(OB_SUCCESS, ObJsonBaseFactory::get_json_base(&allocator, j_text, ObJsonInType::JSON_TREE, ObJsonInType::JSON_BIN, j_bin)); - ObJsonBaseVector hit; + ObJsonSeekResult hit; int cnt = test_path.path_node_cnt(); ret = j_bin->seek(test_path, cnt, false, false, hit); ASSERT_EQ(OB_SUCCESS, ret); @@ -842,7 +1051,7 @@ TEST_F(TestJsonBin, datetime) ASSERT_EQ(OB_SUCCESS, ObJsonBaseFactory::transform(&allocator, j_tree_datetime, ObJsonInType::JSON_BIN, j_bin_datetime)); ObString res_str; - ASSERT_EQ(OB_SUCCESS, j_bin_datetime->get_raw_binary(res_str)); + ASSERT_EQ(OB_SUCCESS, j_bin_datetime->get_raw_binary(res_str, &allocator)); // json_cell_str ObIJsonBase *j_bin2 = NULL; @@ -961,7 +1170,7 @@ TEST_F(TestJsonBin, test_seek_member) { ObIJsonBase *j_bin = NULL; ASSERT_EQ(OB_SUCCESS, ObJsonBaseFactory::get_json_base(&allocator, j_text, ObJsonInType::JSON_TREE, ObJsonInType::JSON_BIN, j_bin)); - ObJsonBaseVector hit; + ObJsonSeekResult hit; int cnt = test_path.path_node_cnt(); long t_seek1 =getCurrentTime(); ret = j_bin->seek(test_path, cnt, false, false, hit); @@ -996,7 +1205,7 @@ TEST_F(TestJsonBin, test_seek_member_wildcard) { ObIJsonBase *j_bin = NULL; ASSERT_EQ(OB_SUCCESS, ObJsonBaseFactory::get_json_base(&allocator, j_text, ObJsonInType::JSON_TREE, ObJsonInType::JSON_BIN, j_bin)); - ObJsonBaseVector hit; + ObJsonSeekResult hit; int cnt = test_path.path_node_cnt(); long t_seek1 =getCurrentTime(); std::cout<<"time of seek:"<seek(test_path, cnt, true, false, hit)); ASSERT_EQ(hit.size(), 1); @@ -1038,7 +1247,7 @@ TEST_F(TestJsonBin, test_seek_array_range) { ObIJsonBase *j_bin = NULL; ASSERT_EQ(OB_SUCCESS, ObJsonBaseFactory::get_json_base(&allocator, j_text, ObJsonInType::JSON_TREE, ObJsonInType::JSON_BIN, j_bin)); - ObJsonBaseVector hit; + ObJsonSeekResult hit; int cnt = test_path.path_node_cnt(); ASSERT_EQ(OB_SUCCESS, j_bin->seek(test_path, cnt, false, false, hit)); ASSERT_EQ(hit.size(), 2); @@ -1066,7 +1275,7 @@ TEST_F(TestJsonBin, test_seek_ellipsis) { ObIJsonBase *j_bin = NULL; ASSERT_EQ(OB_SUCCESS, ObJsonBaseFactory::get_json_base(&allocator, j_text, ObJsonInType::JSON_TREE, ObJsonInType::JSON_BIN, j_bin)); - ObJsonBaseVector hit; + ObJsonSeekResult hit; int cnt = test_path.path_node_cnt(); long t_seek1 =getCurrentTime(); std::cout<<"time of seek:"<(j_bin); + bin->set_seek_flag(false); + common::ObString result; + ASSERT_EQ(OB_SUCCESS, j_bin->get_raw_binary(result, &allocator)); + ASSERT_EQ(OB_SUCCESS, init_update_ctx(allocator, bin)); + ObJsonBinUpdateCtx &update_ctx = *bin->get_update_ctx(); + + ObIJsonBase *j_bin1 = nullptr; + ObString lkey("greeting"); + ASSERT_EQ(OB_SUCCESS, j_bin->get_object_value(lkey, j_bin1)); + + ObJsonString j_str("hahahahahah", strlen("hahahahahah")); + ObIJsonBase *j_tree_str = &j_str; + ObIJsonBase *j_bin_str = NULL; + // TREE -> BIN + ASSERT_EQ(OB_SUCCESS, ObJsonBaseFactory::transform(&allocator, j_tree_str, + ObJsonInType::JSON_BIN, j_bin_str)); + ASSERT_EQ(OB_SUCCESS, j_bin->object_add("greeting", j_bin_str)); + + ASSERT_FALSE(update_ctx.is_rebuild_all_); + ASSERT_EQ(2, update_ctx.binary_diffs_.count()); + check_diff_valid(allocator, result, update_ctx); + check_json_diff_valid(allocator, j_text, update_ctx, 1); + + // after j_bin has been update, j_bin1 is unavailable + ASSERT_EQ(false, j_bin->is_tree()); + size_t free_space; + ASSERT_EQ(OB_SUCCESS, j_bin->get_free_space(free_space)); + ASSERT_GT(free_space, 0); + fprintf(stdout, "[test] after update bin free_space is %zu\n", free_space); + + // append new key, will make j_bin to tree + ASSERT_EQ(OB_SUCCESS, j_bin->object_add("test_new_key", j_bin_str)); + ASSERT_EQ(false, j_bin->is_tree()); + ASSERT_TRUE(update_ctx.is_rebuild_all_); + + ObJsonBuffer buf(&allocator); + ASSERT_EQ(OB_SUCCESS, j_bin->print(buf, true)); + std::cout << buf.ptr() << std::endl; + EXPECT_STREQ(buf.ptr(), "{\"farewell\": 2, \"greeting\": \"hahahahahah\", \"json_text\": 3, \"test_new_key\": \"hahahahahah\"}"); +} + +TEST_F(TestJsonBin, test_bin_object_add2) +{ + common::ObString j_text("{ \"greeting\" : [\"test\", 1.1, 2, -10, true, null], \"farewell\" : 2, \"json_text\" : 3 }"); + ObArenaAllocator allocator(ObModIds::TEST); + ObIJsonBase *j_bin = NULL; + ASSERT_EQ(OB_SUCCESS, ObJsonBaseFactory::get_json_base(&allocator, j_text, + ObJsonInType::JSON_TREE, ObJsonInType::JSON_BIN, j_bin)); + common::ObString result; + ASSERT_EQ(OB_SUCCESS, j_bin->get_raw_binary(result, &allocator)); + ObJsonBin *bin = static_cast(j_bin); + bin->set_seek_flag(false); + + ASSERT_EQ(OB_SUCCESS, init_update_ctx(allocator, bin)); + ObJsonBinUpdateCtx &update_ctx = *bin->get_update_ctx(); ObIJsonBase *j_bin1 = nullptr; ObString lkey("greeting"); @@ -1105,19 +1370,26 @@ TEST_F(TestJsonBin, test_bin_object_add) ASSERT_EQ(OB_SUCCESS, j_bin->object_add("greeting", j_bin_str)); // after j_bin has been update, j_bin1 is unavailable ASSERT_EQ(false, j_bin->is_tree()); + + ASSERT_FALSE(update_ctx.is_rebuild_all_); + ASSERT_EQ(2, update_ctx.binary_diffs_.count()); + check_diff_valid(allocator, result, update_ctx); + check_json_diff_valid(allocator, j_text, update_ctx, 1); + size_t free_space; ASSERT_EQ(OB_SUCCESS, j_bin->get_free_space(free_space)); - ASSERT_GT(free_space, 0); + //ASSERT_GT(free_space, 0); fprintf(stdout, "[test] after update bin free_space is %zu\n", free_space); // append new key, will make j_bin to tree - ASSERT_EQ(OB_SUCCESS, j_bin->object_add("test_new_key", j_bin_str)); + ASSERT_EQ(OB_SUCCESS, j_bin->object_add("json_texa", j_bin_str)); ASSERT_EQ(false, j_bin->is_tree()); + ASSERT_TRUE(update_ctx.is_rebuild_all_); ObJsonBuffer buf(&allocator); ASSERT_EQ(OB_SUCCESS, j_bin->print(buf, true)); std::cout << buf.ptr() << std::endl; - EXPECT_STREQ(buf.ptr(), "{\"farewell\": 2, \"greeting\": \"hahahahahah\", \"json_text\": 3, \"test_new_key\": \"hahahahahah\"}"); + EXPECT_STREQ(buf.ptr(), "{\"farewell\": 2, \"greeting\": \"hahahahahah\", \"json_texa\": \"hahahahahah\", \"json_text\": 3}"); } TEST_F(TestJsonBin, test_bin_append) @@ -1127,7 +1399,13 @@ TEST_F(TestJsonBin, test_bin_append) ObIJsonBase *j_bin = NULL; ASSERT_EQ(OB_SUCCESS, ObJsonBaseFactory::get_json_base(&allocator, j_text, ObJsonInType::JSON_TREE, ObJsonInType::JSON_BIN, j_bin)); + common::ObString result; + ASSERT_EQ(OB_SUCCESS, j_bin->get_raw_binary(result, &allocator)); ObJsonBin *bin = static_cast(j_bin); + bin->set_seek_flag(false); + + ASSERT_EQ(OB_SUCCESS, init_update_ctx(allocator, bin)); + ObJsonBinUpdateCtx &update_ctx = *bin->get_update_ctx(); common::ObString key_str1("greeting"); common::ObString j_text1("[\"greeting\", 1.1, 2, -10, true, null]"); @@ -1135,8 +1413,10 @@ TEST_F(TestJsonBin, test_bin_append) ASSERT_EQ(OB_SUCCESS, ObJsonBaseFactory::get_json_base(&allocator, j_text1, ObJsonInType::JSON_TREE, ObJsonInType::JSON_BIN, j_bin1)); ObJsonBin *bin1 = static_cast(j_bin1); + bin1->set_seek_flag(false); ASSERT_EQ(OB_SUCCESS, bin->add(key_str1, bin1)); + ASSERT_TRUE(update_ctx.is_rebuild_all_); ASSERT_EQ(OB_SUCCESS, bin->lookup(key_str1)); common::ObString str2("json_string-json_string-json_string-json_string-json_string-json_string-json_string-" @@ -1148,7 +1428,9 @@ TEST_F(TestJsonBin, test_bin_append) ASSERT_EQ(OB_SUCCESS, ObJsonBaseFactory::transform(&allocator, j_tree_str, ObJsonInType::JSON_BIN, j_bin_str)); ObJsonBin *bin2 = static_cast(j_bin_str); + bin2->set_seek_flag(false); ASSERT_EQ(OB_SUCCESS, bin->append(bin2)); + ASSERT_TRUE(update_ctx.is_rebuild_all_); ObJsonBuffer buf(&allocator); ASSERT_EQ(OB_SUCCESS, j_bin->print(buf, true)); std::cout << buf.ptr() << std::endl; @@ -1162,7 +1444,9 @@ TEST_F(TestJsonBin, test_bin_array_insert) ASSERT_EQ(OB_SUCCESS, ObJsonBaseFactory::get_json_base(&allocator, j_text, ObJsonInType::JSON_TREE, ObJsonInType::JSON_BIN, j_bin)); ObJsonBin *bin = static_cast(j_bin); - + ASSERT_EQ(OB_SUCCESS, init_update_ctx(allocator, bin)); + ObJsonBinUpdateCtx &update_ctx = *bin->get_update_ctx(); + common::ObString str1("json_string-json_string-json_string-json_string-json_string-json_string-json_string-" "json_string-json_string-json_string-json_string-json_string-json_string-json_string-" "json_string-json_string-json_string-json_string-json_string-json_string-json_string"); @@ -1175,7 +1459,7 @@ TEST_F(TestJsonBin, test_bin_array_insert) ASSERT_EQ(OB_SUCCESS, bin->insert(bin1, 10)); ASSERT_EQ(7, bin->element_count()); - + ASSERT_TRUE(update_ctx.is_rebuild_all_); ObJsonBuffer buf(&allocator); ASSERT_EQ(OB_SUCCESS, j_bin_str->print(buf, true)); std::cout << buf.ptr() << std::endl; @@ -1251,13 +1535,13 @@ TEST_F(TestJsonBin, large_array) // create new json bin load and validate { ObString raw_bin; - j_bin_array->get_raw_binary(raw_bin); + j_bin_array->get_raw_binary(raw_bin, &allocator); ObJsonBin new_json_bin(raw_bin.ptr(), raw_bin.length(), &allocator); ASSERT_EQ(OB_SUCCESS, new_json_bin.reset_iter()); ObIJsonBase *j_base_new = &new_json_bin; validate_large_array(j_base_new, 80000); } - allocator.reset(); + //allocator.reset(); } TEST_F(TestJsonBin, large_two_depth_array) @@ -1292,7 +1576,7 @@ TEST_F(TestJsonBin, large_two_depth_array) ASSERT_EQ(OB_SUCCESS, j_bin_val->get_array_element(1, j_bin1)); validate_large_array(j_bin1, 80000); } - allocator.reset(); + //allocator.reset(); } TEST_F(TestJsonBin, large_object_array) @@ -1329,7 +1613,7 @@ TEST_F(TestJsonBin, large_object_array) EXPECT_STREQ(buf.ptr(), "c"); // large obj to raw binary and read raw binary again ObString raw_bin; - j_bin_val->get_raw_binary(raw_bin); + j_bin_val->get_raw_binary(raw_bin, &allocator); ObJsonBin new_json_bin(raw_bin.ptr(), raw_bin.length(), &allocator); ASSERT_EQ(OB_SUCCESS, new_json_bin.reset_iter()); ObIJsonBase *nnwr = &new_json_bin; @@ -1342,7 +1626,7 @@ TEST_F(TestJsonBin, large_object_array) ASSERT_EQ(OB_SUCCESS, j_bin1->print(buf, false)); EXPECT_STREQ(buf.ptr(), "c"); } - allocator.reset(); + //allocator.reset(); } TEST_F(TestJsonBin, large_50_depth_array) @@ -1376,7 +1660,7 @@ TEST_F(TestJsonBin, large_50_depth_array) curr_wraper = j_bin1; } validate_large_array(curr_wraper, 80000); - allocator.reset(); + //allocator.reset(); } TEST_F(TestJsonBin, large_50_depth_object) @@ -1410,7 +1694,7 @@ TEST_F(TestJsonBin, large_50_depth_object) curr_wraper = j_bin1; } validate_large_array(curr_wraper, 80000); - allocator.reset(); + //allocator.reset(); } TEST_F(TestJsonBin, large_array_update) @@ -1429,10 +1713,14 @@ TEST_F(TestJsonBin, large_array_update) ASSERT_EQ(OB_SUCCESS, ObJsonBaseFactory::get_json_base(&allocator, j_text, ObJsonInType::JSON_TREE, ObJsonInType::JSON_BIN, j_bin)); common::ObString result; - ASSERT_EQ(OB_SUCCESS, j_bin->get_raw_binary(result)); + ASSERT_EQ(OB_SUCCESS, j_bin->get_raw_binary(result, &allocator)); // root ObJsonBin *bin = static_cast(j_bin); + bin->set_seek_flag(false); + ASSERT_EQ(OB_SUCCESS, init_update_ctx(allocator, bin)); + ObJsonBinUpdateCtx &update_ctx = *bin->get_update_ctx(); + ObString lkey("greeting"); bin->lookup(lkey); @@ -1448,13 +1736,17 @@ TEST_F(TestJsonBin, large_array_update) ASSERT_EQ(OB_SUCCESS, bin->move_parent_iter()); ASSERT_EQ(OB_SUCCESS, bin->update(0, static_cast(j_bin_arr))); + ASSERT_FALSE(update_ctx.is_rebuild_all_); + ASSERT_EQ(2, update_ctx.binary_diffs_.count()); + check_diff_valid(allocator, result, update_ctx); - bin->element(2); + ASSERT_EQ(OB_SUCCESS, bin->element(0)); + ASSERT_EQ(80000, bin->element_count()); ASSERT_EQ(OB_SUCCESS, bin->move_parent_iter()); ASSERT_EQ(OB_SUCCESS, bin->update(1, static_cast(j_bin_arr))); - - - bin->element(2); + ASSERT_TRUE(update_ctx.is_rebuild_all_); + ASSERT_EQ(OB_SUCCESS, bin->element(1)); + ASSERT_EQ(80000, bin->element_count()); ASSERT_EQ(OB_SUCCESS, bin->move_parent_iter()); actual_type = (static_cast(bin->json_type()) & 0x7F); ASSERT_EQ(static_cast(ObJsonNodeType::J_ARRAY), actual_type); @@ -1465,7 +1757,70 @@ TEST_F(TestJsonBin, large_array_update) bin->element(1); ASSERT_EQ(bin->element_count(), 80000); } - allocator.reset(); + //allocator.reset(); +} + +TEST_F(TestJsonBin, large_array_insert) +{ + ObArenaAllocator allocator(ObModIds::TEST); + ObJsonArray j_arr(&allocator); + large_test_make_array(allocator, j_arr); + ObIJsonBase *j_tree_arr = &j_arr; + ObIJsonBase *j_bin_arr = NULL; + // TREE -> BIN + ASSERT_EQ(OB_SUCCESS, ObJsonBaseFactory::transform(&allocator, j_tree_arr, + ObJsonInType::JSON_BIN, j_bin_arr)); + + common::ObString j_text("{ \"greeting\" : [\"test\", 1.1, 2, -10, true, null], \"farewell\" : 2, \"json_text\" : 3 }"); + ObIJsonBase *j_bin = NULL; + ASSERT_EQ(OB_SUCCESS, ObJsonBaseFactory::get_json_base(&allocator, j_text, + ObJsonInType::JSON_TREE, ObJsonInType::JSON_BIN, j_bin)); + common::ObString result; + ASSERT_EQ(OB_SUCCESS, j_bin->get_raw_binary(result, &allocator)); + + // root + ObJsonBin *bin = static_cast(j_bin); + bin->set_seek_flag(false); + ASSERT_EQ(OB_SUCCESS, init_update_ctx(allocator, bin)); + ObJsonBinUpdateCtx &update_ctx = *bin->get_update_ctx(); + + ObString lkey("greeting"); + bin->lookup(lkey); + + // use large array update + { + bin->element(0); + uint8_t actual_type = (static_cast(bin->json_type()) & 0x7F); + ASSERT_EQ(static_cast(ObJsonNodeType::J_STRING), actual_type); + ObString ele0("test"); + ObString data(bin->get_data_length(), bin->get_data()); + std::cout << "data:" << data << std::endl; + ASSERT_EQ(0, ele0.compare(data)); + + ASSERT_EQ(OB_SUCCESS, bin->move_parent_iter()); + ASSERT_EQ(OB_SUCCESS, bin->array_insert(0, static_cast(j_bin_arr))); + ASSERT_FALSE(update_ctx.is_rebuild_all_); + ASSERT_EQ(2, update_ctx.binary_diffs_.count()); + check_diff_valid(allocator, result, update_ctx); + ASSERT_EQ(ObJsonNodeType::J_ARRAY, bin->json_type()); + ASSERT_EQ(OB_SUCCESS, bin->element(0)); + ASSERT_EQ(80000, bin->element_count()); + ASSERT_EQ(OB_SUCCESS, bin->move_parent_iter()); + ASSERT_EQ(OB_SUCCESS, bin->array_insert(1, static_cast(j_bin_arr))); + ASSERT_EQ(ObJsonNodeType::J_ARRAY, bin->json_type()); + ASSERT_TRUE(update_ctx.is_rebuild_all_); + ASSERT_EQ(OB_SUCCESS, bin->element(1)); + ASSERT_EQ(80000, bin->element_count()); + ASSERT_EQ(OB_SUCCESS, bin->move_parent_iter()); + ASSERT_EQ(ObJsonNodeType::J_ARRAY, bin->json_type()); + bin->element(0); + ASSERT_EQ(bin->element_count(), 80000); + + ASSERT_EQ(OB_SUCCESS, bin->move_parent_iter()); + bin->element(1); + ASSERT_EQ(bin->element_count(), 80000); + } + //allocator.reset(); } void parse_string_with_length(ObIAllocator *allocator, uint64_t size) @@ -1514,10 +1869,11 @@ TEST_F(TestJsonBin, test_binary_replace) ASSERT_EQ(OB_SUCCESS, ObJsonBaseFactory::get_json_base(&allocator, j_text, ObJsonInType::JSON_TREE, ObJsonInType::JSON_BIN, j_bin)); common::ObString result; - ASSERT_EQ(OB_SUCCESS, j_bin->get_raw_binary(result)); + ASSERT_EQ(OB_SUCCESS, j_bin->get_raw_binary(result, &allocator)); // root ObJsonBin *bin = static_cast(j_bin); + bin->set_seek_flag(false); uint64_t root_member_count = bin->element_count(); ASSERT_EQ(3, root_member_count); ASSERT_EQ(ObJsonNodeType::J_OBJECT, bin->json_type()); @@ -1527,9 +1883,12 @@ TEST_F(TestJsonBin, test_binary_replace) ASSERT_EQ(ObJsonNodeType::J_ARRAY, bin->json_type()); uint64_t sub_member_count = bin->element_count(); ASSERT_EQ(6, sub_member_count); + + ASSERT_EQ(OB_SUCCESS, init_update_ctx(allocator, bin)); + ObJsonBinUpdateCtx &update_ctx = *bin->get_update_ctx(); // "test" --> "hahahahahah" { - bin->element(0); + ASSERT_EQ(OB_SUCCESS, bin->element(0)); uint8_t actual_type = (static_cast(bin->json_type()) & 0x7F); ASSERT_EQ(static_cast(ObJsonNodeType::J_STRING), actual_type); ObString ele0("test"); @@ -1550,9 +1909,12 @@ TEST_F(TestJsonBin, test_binary_replace) ASSERT_EQ(OB_SUCCESS, tmp_bin.reset_iter()); ObIJsonBase *j_tmp_bin = &tmp_bin; ObIJsonBase *j_bin_replace = NULL; - ASSERT_EQ(OB_SUCCESS, j_tmp_bin->get_array_element(0, j_bin_replace)); + ASSERT_EQ(OB_SUCCESS, bin->get_array_element(0, j_bin_replace)); ASSERT_EQ(OB_SUCCESS, bin->replace(j_bin_replace, j_bin_str)); - + ASSERT_FALSE(update_ctx.is_rebuild_all_); + ASSERT_EQ(2, update_ctx.binary_diffs_.count()); + check_diff_valid(allocator, result, update_ctx); + check_json_diff_valid(allocator, j_text, update_ctx, 1); bin->element(0); actual_type = (static_cast(bin->json_type()) & 0x7F); ASSERT_EQ(static_cast(ObJsonNodeType::J_STRING), actual_type); @@ -1560,6 +1922,43 @@ TEST_F(TestJsonBin, test_binary_replace) ObString data1(bin->get_data_length(), bin->get_data()); ASSERT_EQ(0, new_ele.compare(data1)); } + + // "test" --> "hahahahahah" + { + ASSERT_EQ(OB_SUCCESS, bin->move_parent_iter()); + ASSERT_EQ(OB_SUCCESS, bin->element(0)); + uint8_t actual_type = (static_cast(bin->json_type()) & 0x7F); + ASSERT_EQ(static_cast(ObJsonNodeType::J_STRING), actual_type); + ObString ele0("hahahahahah"); + ObString data(bin->get_data_length(), bin->get_data()); + ASSERT_EQ(0, ele0.compare(data)); + + ObJsonString j_str("hahahahahahaaaa", strlen("hahahahahahaaaa")); + ObIJsonBase *j_tree_str = &j_str; + ObIJsonBase *j_bin_str = NULL; + // TREE -> BIN + ASSERT_EQ(OB_SUCCESS, ObJsonBaseFactory::transform(&allocator, j_tree_str, + ObJsonInType::JSON_BIN, j_bin_str)); + ASSERT_EQ(OB_SUCCESS, bin->move_parent_iter()); + + ObString objstr; + ASSERT_EQ(OB_SUCCESS, bin->raw_binary_at_iter(objstr)); + ObJsonBin tmp_bin(objstr.ptr(), objstr.length(), &allocator); + ASSERT_EQ(OB_SUCCESS, tmp_bin.reset_iter()); + ObIJsonBase *j_tmp_bin = &tmp_bin; + ObIJsonBase *j_bin_replace = NULL; + ASSERT_EQ(OB_SUCCESS, bin->get_array_element(0, j_bin_replace)); + ASSERT_EQ(OB_SUCCESS, bin->replace(j_bin_replace, j_bin_str)); + ASSERT_FALSE(update_ctx.is_rebuild_all_); + ASSERT_EQ(4, update_ctx.binary_diffs_.count()); + check_diff_valid(allocator, result, update_ctx); + bin->element(0); + actual_type = (static_cast(bin->json_type()) & 0x7F); + ASSERT_EQ(static_cast(ObJsonNodeType::J_STRING), actual_type); + ObString new_ele("hahahahahahaaaa"); + ObString data1(bin->get_data_length(), bin->get_data()); + ASSERT_EQ(0, new_ele.compare(data1)); + } // 1.1 --> 3.1415 { ASSERT_EQ(OB_SUCCESS, bin->move_parent_iter()); @@ -1582,9 +1981,11 @@ TEST_F(TestJsonBin, test_binary_replace) ASSERT_EQ(OB_SUCCESS, tmp_bin.reset_iter()); ObIJsonBase *j_tmp_bin = &tmp_bin; ObIJsonBase *j_bin_replace = NULL; - ASSERT_EQ(OB_SUCCESS, j_tmp_bin->get_array_element(1, j_bin_replace)); + ASSERT_EQ(OB_SUCCESS, bin->get_array_element(1, j_bin_replace)); ASSERT_EQ(OB_SUCCESS, bin->replace(j_bin_replace, j_bin_double)); - + ASSERT_FALSE(update_ctx.is_rebuild_all_); + ASSERT_EQ(5, update_ctx.binary_diffs_.count()); + check_diff_valid(allocator, result, update_ctx); bin->element(1); actual_type = (static_cast(bin->json_type()) & 0x7F); ASSERT_EQ(static_cast(ObJsonNodeType::J_DOUBLE), actual_type); @@ -1612,9 +2013,11 @@ TEST_F(TestJsonBin, test_binary_replace) ASSERT_EQ(OB_SUCCESS, tmp_bin.reset_iter()); ObIJsonBase *j_tmp_bin = &tmp_bin; ObIJsonBase *j_bin_replace = NULL; - ASSERT_EQ(OB_SUCCESS, j_tmp_bin->get_array_element(2, j_bin_replace)); + ASSERT_EQ(OB_SUCCESS, bin->get_array_element(2, j_bin_replace)); ASSERT_EQ(OB_SUCCESS, bin->replace(j_bin_replace, j_bin_int)); - + ASSERT_FALSE(update_ctx.is_rebuild_all_); + ASSERT_EQ(7, update_ctx.binary_diffs_.count()); + check_diff_valid(allocator, result, update_ctx); bin->element(2); actual_type = (static_cast(bin->json_type()) & 0x7F); ASSERT_EQ(static_cast(ObJsonNodeType::J_INT), actual_type); @@ -1642,9 +2045,11 @@ TEST_F(TestJsonBin, test_binary_replace) ASSERT_EQ(OB_SUCCESS, tmp_bin.reset_iter()); ObIJsonBase *j_tmp_bin = &tmp_bin; ObIJsonBase *j_bin_replace = NULL; - ASSERT_EQ(OB_SUCCESS, j_tmp_bin->get_array_element(3, j_bin_replace)); + ASSERT_EQ(OB_SUCCESS, bin->get_array_element(3, j_bin_replace)); ASSERT_EQ(OB_SUCCESS, bin->replace(j_bin_replace, j_bin_uint)); - + ASSERT_FALSE(update_ctx.is_rebuild_all_); + ASSERT_EQ(9, update_ctx.binary_diffs_.count()); + check_diff_valid(allocator, result, update_ctx); bin->element(3); actual_type = (static_cast(bin->json_type()) & 0x7F); ASSERT_EQ(static_cast(ObJsonNodeType::J_UINT), actual_type); @@ -1665,11 +2070,1332 @@ TEST_F(TestJsonBin, test_double_nan) ObJsonInType::JSON_BIN, new_bin)); } +TEST_F(TestJsonBin, test_bin_construct_from_tree) +{ + ObArenaAllocator allocator(ObModIds::TEST); + { + ObString new_value_str("abcd"); + ObJsonString new_value_node(new_value_str.ptr(), new_value_str.length()); + ObIJsonBase *j_bin_str = nullptr; + // TREE -> BIN + ASSERT_EQ(OB_SUCCESS, ObJsonBaseFactory::transform( + &allocator, + &new_value_node, + ObJsonInType::JSON_BIN, + j_bin_str)); + ASSERT_EQ(ObJsonNodeType::J_STRING, j_bin_str->json_type()); + ASSERT_TRUE(j_bin_str->is_bin()); + } + + { + ObJsonNull j_null(true); + ObIJsonBase *j_tree_null = &j_null; + ObIJsonBase *j_bin_null = NULL; + // TREE -> BIN + ASSERT_EQ(OB_SUCCESS, ObJsonBaseFactory::transform(&allocator, j_tree_null, + ObJsonInType::JSON_BIN, j_bin_null)); + ASSERT_EQ(ObJsonNodeType::J_NULL, j_bin_null->json_type()); + ASSERT_TRUE(j_bin_null->is_bin()); + //ASSERT_EQ(1, j_bin_null); + } + + { + ObJsonInt j_int(-655666); + ObIJsonBase *j_tree_int = &j_int; + ObIJsonBase *j_bin_int = NULL; + // TREE -> BIN + ASSERT_EQ(OB_SUCCESS, ObJsonBaseFactory::transform(&allocator, j_tree_int, + ObJsonInType::JSON_BIN, j_bin_int)); + ASSERT_EQ(ObJsonNodeType::J_INT, j_bin_int->json_type()); + ASSERT_TRUE(j_bin_int->is_bin()); + } + + { + ObString data("my opaque"); + ObJsonOpaque j_opaque(data, ObObjType::ObVarcharType); + ObIJsonBase *j_tree_opaque = &j_opaque; + ObIJsonBase *j_bin_opaque = NULL; + // TREE -> BIN + ASSERT_EQ(OB_SUCCESS, ObJsonBaseFactory::transform(&allocator, j_tree_opaque, + ObJsonInType::JSON_BIN, j_bin_opaque)); + ASSERT_EQ(ObJsonNodeType::J_OPAQUE, j_bin_opaque->json_type()); + ASSERT_TRUE(j_bin_opaque->is_bin()); + ASSERT_EQ(ObObjType::ObVarcharType, j_bin_opaque->field_type()); + ASSERT_EQ("my opaque", std::string(j_bin_opaque->get_data(), j_bin_opaque->get_data_length())); + + ObIJsonBase *j_tree_opaque_2 = NULL; + // TREE -> BIN + ASSERT_EQ(OB_SUCCESS, ObJsonBaseFactory::transform(&allocator, j_bin_opaque, + ObJsonInType::JSON_TREE, j_tree_opaque_2)); + ASSERT_EQ(ObJsonNodeType::J_OPAQUE, j_tree_opaque_2->json_type()); + ASSERT_TRUE(j_tree_opaque_2->is_tree()); + ASSERT_EQ("my opaque", std::string(j_tree_opaque_2->get_data(), j_tree_opaque_2->get_data_length())); + } +} + + +TEST_F(TestJsonBin, test_bin_construct_from_bin) +{ + ObArenaAllocator allocator(ObModIds::TEST); + { + common::ObString j_text("{ \"my_str\" : \"hello my_str\", \"my_inline_number\" : 2, \"my_null\" : null, \"my_number\" : 655450}"); + ObArenaAllocator allocator(ObModIds::TEST); + ObIJsonBase *j_bin = NULL; + ASSERT_EQ(OB_SUCCESS, ObJsonBaseFactory::get_json_base(&allocator, j_text, + ObJsonInType::JSON_TREE, ObJsonInType::JSON_BIN, j_bin)); + common::ObString result; + ASSERT_EQ(OB_SUCCESS, j_bin->get_raw_binary(result, &allocator)); + ASSERT_EQ(ObJsonNodeType::J_OBJECT, j_bin->json_type()); + ASSERT_TRUE(j_bin->is_bin()); + + ObIJsonBase *child_bin = nullptr; + ASSERT_EQ(OB_SUCCESS, j_bin->get_object_value(ObString("my_inline_number"), child_bin)); + ASSERT_EQ(2, child_bin->get_int()); + ObString raw_bin; + ASSERT_EQ(OB_SUCCESS, child_bin->get_raw_binary(raw_bin, &allocator)); + } + + { + common::ObString j_text("[\"hello my_str\", 2, null, 655450]"); + ObArenaAllocator allocator(ObModIds::TEST); + ObIJsonBase *j_bin = NULL; + ASSERT_EQ(OB_SUCCESS, ObJsonBaseFactory::get_json_base(&allocator, j_text, + ObJsonInType::JSON_TREE, ObJsonInType::JSON_BIN, j_bin)); + common::ObString result; + ASSERT_EQ(OB_SUCCESS, j_bin->get_raw_binary(result, &allocator)); + ASSERT_EQ(ObJsonNodeType::J_ARRAY, j_bin->json_type()); + ASSERT_TRUE(j_bin->is_bin()); + } +} + +TEST_F(TestJsonBin, test_bin_construct_from_nested_bin) +{ + ObArenaAllocator allocator(ObModIds::TEST); + { + common::ObString j_text("{\"my_obj\" : { \"my_str\" : \"hello my_str\", \"my_inline_number\" : 2, \"my_null\" : null, \"my_number\" : 655450}}"); + ObArenaAllocator allocator(ObModIds::TEST); + ObIJsonBase *j_bin = NULL; + ASSERT_EQ(OB_SUCCESS, ObJsonBaseFactory::get_json_base(&allocator, j_text, + ObJsonInType::JSON_TREE, ObJsonInType::JSON_BIN, j_bin)); + common::ObString result; + ASSERT_EQ(OB_SUCCESS, j_bin->get_raw_binary(result, &allocator)); + ASSERT_EQ(ObJsonNodeType::J_OBJECT, j_bin->json_type()); + ASSERT_TRUE(j_bin->is_bin()); + } + + { + common::ObString j_text("{\"my_array\" : [\"hello my_str\", 2, null, 655450]}"); + ObArenaAllocator allocator(ObModIds::TEST); + ObIJsonBase *j_bin = NULL; + ASSERT_EQ(OB_SUCCESS, ObJsonBaseFactory::get_json_base(&allocator, j_text, + ObJsonInType::JSON_TREE, ObJsonInType::JSON_BIN, j_bin)); + common::ObString result; + ASSERT_EQ(OB_SUCCESS, j_bin->get_raw_binary(result, &allocator)); + ASSERT_EQ(ObJsonNodeType::J_OBJECT, j_bin->json_type()); + ASSERT_TRUE(j_bin->is_bin()); + } +} + +TEST_F(TestJsonBin, test_empty_object_seek) +{ + ObArenaAllocator allocator(ObModIds::TEST); + { + ObString path_str("$.\"data\".\"a\""); + ObJsonPath test_path(path_str, &allocator); + ASSERT_EQ(OB_SUCCESS, test_path.parse_path()); + + common::ObString j_text("{\"data\" : {}}"); + ObIJsonBase *j_bin = nullptr; + ASSERT_EQ(OB_SUCCESS, + ObJsonBaseFactory::get_json_base(&allocator, j_text, + ObJsonInType::JSON_TREE, ObJsonInType::JSON_BIN, j_bin)); + common::ObString result; + ASSERT_EQ(OB_SUCCESS, j_bin->get_raw_binary(result, &allocator)); + ASSERT_EQ(ObJsonNodeType::J_OBJECT, j_bin->json_type()); + ASSERT_TRUE(j_bin->is_bin()); + + ObJsonSeekResult hit; + int cnt = test_path.path_node_cnt(); + ASSERT_EQ(OB_SUCCESS, j_bin->seek(test_path, cnt, false, false, hit)); + ASSERT_EQ(0, hit.size()); + } +} + +TEST_F(TestJsonBin, test_empty_array_seek) +{ + ObArenaAllocator allocator(ObModIds::TEST); + { + ObString path_str("$.\"data[0]\""); + ObJsonPath test_path(path_str, &allocator); + ASSERT_EQ(OB_SUCCESS, test_path.parse_path()); + + common::ObString j_text("{\"data\" : []}"); + ObIJsonBase *j_bin = nullptr; + ASSERT_EQ(OB_SUCCESS, + ObJsonBaseFactory::get_json_base(&allocator, j_text, + ObJsonInType::JSON_TREE, ObJsonInType::JSON_BIN, j_bin)); + common::ObString result; + ASSERT_EQ(OB_SUCCESS, j_bin->get_raw_binary(result, &allocator)); + ASSERT_EQ(ObJsonNodeType::J_OBJECT, j_bin->json_type()); + ASSERT_TRUE(j_bin->is_bin()); + + ObJsonSeekResult hit; + int cnt = test_path.path_node_cnt(); + ASSERT_EQ(OB_SUCCESS, j_bin->seek(test_path, cnt, false, false, hit)); + ASSERT_EQ(0, hit.size()); + } +} + + +TEST_F(TestJsonBin, test_array_array) +{ + ObArenaAllocator allocator(ObModIds::TEST); + { + const char *str = "[[\"sfsdfasdfasdfasdfasdfasdfasdfasdf\", \"sdfsdfsdfsdfsdfsdsdfsfsdfsdfsdfsd\"], [\"sfsdfasdfasdfasdfasdfasdfasdfasdf\", \"sdfsdfsdfsdfsdfsdsdfsfsdfsdfsdfsd\"], 217]"; + common::ObString j_text(str); + ObIJsonBase *j_bin = nullptr; + ASSERT_EQ(OB_SUCCESS, + ObJsonBaseFactory::get_json_base(&allocator, j_text, + ObJsonInType::JSON_TREE, ObJsonInType::JSON_BIN, j_bin)); + common::ObString result; + ASSERT_EQ(OB_SUCCESS, j_bin->get_raw_binary(result, &allocator)); + ASSERT_EQ(ObJsonNodeType::J_ARRAY, j_bin->json_type()); + ASSERT_TRUE(j_bin->is_bin()); + + ObJsonBuffer print_buf(&allocator); + ASSERT_EQ(OB_SUCCESS, j_bin->print(print_buf, false)); + ASSERT_EQ(std::string(j_text.ptr(), j_text.length()), std::string(print_buf.string().ptr(), print_buf.string().length())); + + ObString raw_bin; + ASSERT_EQ(OB_SUCCESS, j_bin->get_raw_binary(raw_bin, &allocator)); + + ObJsonBin new_bin(raw_bin.ptr(), raw_bin.length(), &allocator); + ASSERT_EQ(OB_SUCCESS, new_bin.reset_iter()); + print_buf.reset(); + ASSERT_EQ(OB_SUCCESS, new_bin.print(print_buf, false)); + ASSERT_EQ(std::string(j_text.ptr(), j_text.length()), std::string(print_buf.string().ptr(), print_buf.string().length())); + + } +} + + +TEST_F(TestJsonBin, test_bin_insert) +{ + common::ObString j_text("{ \"greeting\" : [\"test\", 1.1, 2, -10, true, null], \"farewell\" : 2, \"json_text\" : 3 }"); + ObArenaAllocator allocator(ObModIds::TEST); + ObIJsonBase *j_bin = NULL; + ASSERT_EQ(OB_SUCCESS, ObJsonBaseFactory::get_json_base(&allocator, j_text, + ObJsonInType::JSON_TREE, ObJsonInType::JSON_BIN, j_bin)); + common::ObString result; + ASSERT_EQ(OB_SUCCESS, j_bin->get_raw_binary(result, &allocator)); + + // root + ObJsonBin& bin = *static_cast(j_bin); //(result.ptr(), result.length(), &allocator); + bin.set_seek_flag(false); + ASSERT_EQ(OB_SUCCESS, bin.reset_iter()); + ASSERT_EQ(3, bin.element_count()); + ASSERT_EQ(ObJsonNodeType::J_OBJECT, bin.json_type()); + + ObString lkey("greeting"); + bin.lookup(lkey); + ASSERT_EQ(ObJsonNodeType::J_ARRAY, bin.json_type()); + ASSERT_EQ(6, bin.element_count()); + + ObString new_value_str("abcd"); + ObJsonString new_value_node(new_value_str.ptr(), new_value_str.length()); + ObIJsonBase *j_bin_str = nullptr; + ASSERT_EQ(OB_SUCCESS, ObJsonBaseFactory::transform( + &allocator, + &new_value_node, + ObJsonInType::JSON_BIN, + j_bin_str)); + ASSERT_EQ(ObJsonNodeType::J_STRING, j_bin_str->json_type()); + ASSERT_TRUE(j_bin_str->is_bin()); + + ASSERT_EQ(OB_SUCCESS, init_update_ctx(allocator, &bin)); + ObJsonBinUpdateCtx &update_ctx = *bin.get_update_ctx(); + + ASSERT_EQ(OB_SUCCESS, bin.array_insert(3, j_bin_str)); + ASSERT_FALSE(update_ctx.is_rebuild_all_); + ASSERT_EQ(2, update_ctx.binary_diffs_.count()); + check_diff_valid(allocator, result, update_ctx); + ASSERT_EQ(ObJsonNodeType::J_ARRAY, bin.json_type()); + ASSERT_EQ(7, bin.element_count()); + ASSERT_EQ(OB_SUCCESS, bin.element(3)); + ASSERT_EQ(ObJsonNodeType::J_STRING, bin.json_type()); + ASSERT_EQ(std::string(new_value_str.ptr(), new_value_str.length()), std::string(bin.get_data(), bin.get_data_length())); +} + +TEST_F(TestJsonBin, test_inline_replace) +{ + common::ObString j_text("{ \"name\" : \"Mike\", \"age\" : 30, \"ids\" : [1, 200, 3 , 400, true, 100.1, null, 20000, false] }"); + ObArenaAllocator allocator(ObModIds::TEST); + ObIJsonBase *j_bin = nullptr; + ASSERT_EQ(OB_SUCCESS, ObJsonBaseFactory::get_json_base(&allocator, j_text, + ObJsonInType::JSON_TREE, ObJsonInType::JSON_BIN, j_bin)); + common::ObString result; + ASSERT_EQ(OB_SUCCESS, j_bin->get_raw_binary(result, &allocator)); + + // root + ObJsonBin *bin = static_cast(j_bin); + bin->set_seek_flag(false); + uint64_t root_member_count = bin->element_count(); + ASSERT_EQ(3, root_member_count); + ASSERT_EQ(ObJsonNodeType::J_OBJECT, bin->json_type()); + + ASSERT_EQ(OB_SUCCESS, init_update_ctx(allocator, bin)); + ObJsonBinUpdateCtx &update_ctx = *bin->get_update_ctx(); + + { + ObString name_key("name"); + ASSERT_EQ(OB_SUCCESS, bin->lookup(name_key)); + ASSERT_EQ(ObJsonNodeType::J_STRING, bin->json_type()); + ObString ele0("Mike"); + ObString data(bin->get_data_length(), bin->get_data()); + ASSERT_EQ(0, ele0.compare(data)); + + ObJsonNull j_null(true); + ObIJsonBase *j_tree_null = &j_null; + ObIJsonBase *j_bin_null = nullptr; + + ASSERT_EQ(OB_SUCCESS, ObJsonBaseFactory::transform(&allocator, j_tree_null, + ObJsonInType::JSON_BIN, j_bin_null)); + ASSERT_EQ(ObJsonNodeType::J_NULL, j_bin_null->json_type()); + ASSERT_TRUE(j_bin_null->is_bin()); + + ObIJsonBase *j_bin_replace = nullptr; + ASSERT_EQ(OB_SUCCESS, bin->move_parent_iter()); + ASSERT_EQ(OB_SUCCESS, bin->get_object_value(name_key, j_bin_replace)); + ASSERT_EQ(OB_SUCCESS, bin->replace(j_bin_replace, j_bin_null)); + ASSERT_FALSE(update_ctx.is_rebuild_all_); + ASSERT_EQ(1, update_ctx.binary_diffs_.count()); + check_diff_valid(allocator, result, update_ctx); + ASSERT_EQ(OB_SUCCESS, bin->lookup(name_key)); + ASSERT_EQ(ObJsonNodeType::J_NULL, bin->json_type()); + } + + { + ASSERT_EQ(OB_SUCCESS, bin->move_parent_iter()); + ObString age_key("age"); + ASSERT_EQ(OB_SUCCESS, bin->lookup(age_key)); + ASSERT_EQ(ObJsonNodeType::J_INT, bin->json_type()); + ASSERT_EQ(30, bin->get_int()); + + ObJsonInt j_int(35); + ObIJsonBase *j_tree_int = &j_int; + ObIJsonBase *j_bin_int = nullptr; + ASSERT_EQ(OB_SUCCESS, ObJsonBaseFactory::transform(&allocator, j_tree_int, + ObJsonInType::JSON_BIN, j_bin_int)); + ASSERT_EQ(OB_SUCCESS, bin->move_parent_iter()); + + ObIJsonBase *j_bin_replace = nullptr; + ASSERT_EQ(OB_SUCCESS, bin->get_object_value(age_key, j_bin_replace)); + ASSERT_EQ(OB_SUCCESS, bin->replace(j_bin_replace, j_bin_int)); + ASSERT_FALSE(update_ctx.is_rebuild_all_); + ASSERT_EQ(2, update_ctx.binary_diffs_.count()); + check_diff_valid(allocator, result, update_ctx); + ASSERT_EQ(OB_SUCCESS, bin->lookup(age_key)); + ASSERT_EQ(ObJsonNodeType::J_INT, bin->json_type()); + ASSERT_EQ(j_int.get_int(), bin->get_int()); + } + + { + ASSERT_EQ(OB_SUCCESS, bin->move_parent_iter()); + ObString ids_key("ids"); + ASSERT_EQ(OB_SUCCESS, bin->lookup(ids_key)); + ASSERT_EQ(ObJsonNodeType::J_ARRAY, bin->json_type()); + ASSERT_EQ(9, bin->element_count()); + + { + ASSERT_EQ(OB_SUCCESS, bin->element(0)); + ASSERT_EQ(ObJsonNodeType::J_INT, bin->json_type()); + ASSERT_EQ(1, bin->get_int()); + + ObJsonInt j_int(15); + ObIJsonBase *j_tree_int = &j_int; + ObIJsonBase *j_bin_int = NULL; + ASSERT_EQ(OB_SUCCESS, ObJsonBaseFactory::transform(&allocator, j_tree_int, + ObJsonInType::JSON_BIN, j_bin_int)); + ASSERT_EQ(OB_SUCCESS, bin->move_parent_iter()); + + ObIJsonBase *j_bin_replace = nullptr; + ASSERT_EQ(OB_SUCCESS, bin->get_array_element(0, j_bin_replace)); + ASSERT_EQ(OB_SUCCESS, bin->replace(j_bin_replace, j_bin_int)); + ASSERT_FALSE(update_ctx.is_rebuild_all_); + ASSERT_EQ(3, update_ctx.binary_diffs_.count()); + check_diff_valid(allocator, result, update_ctx); + ASSERT_EQ(OB_SUCCESS, bin->element(0)); + ASSERT_EQ(ObJsonNodeType::J_INT, bin->json_type()); + ASSERT_EQ(j_int.get_int(), bin->get_int()); + } + } +} + +TEST_F(TestJsonBin, test_inplace_replace) +{ + common::ObString j_text("{ \"name\" : \"Mike\", \"money\" : 3000.5, \"ids\" : [1, 200, 3 , 400, true, 100.1, null, 20000, false] }"); + ObArenaAllocator allocator(ObModIds::TEST); + ObIJsonBase *j_bin = nullptr; + ASSERT_EQ(OB_SUCCESS, ObJsonBaseFactory::get_json_base(&allocator, j_text, + ObJsonInType::JSON_TREE, ObJsonInType::JSON_BIN, j_bin)); + common::ObString result; + ASSERT_EQ(OB_SUCCESS, j_bin->get_raw_binary(result, &allocator)); + + // root + ObJsonBin *bin = static_cast(j_bin); + bin->set_seek_flag(false); + uint64_t root_member_count = bin->element_count(); + ASSERT_EQ(3, root_member_count); + ASSERT_EQ(ObJsonNodeType::J_OBJECT, bin->json_type()); + + ASSERT_EQ(OB_SUCCESS, init_update_ctx(allocator, bin)); + ObJsonBinUpdateCtx &update_ctx = *bin->get_update_ctx(); + + { + ObString name_key("name"); + ASSERT_EQ(OB_SUCCESS, bin->lookup(name_key)); + ASSERT_EQ(ObJsonNodeType::J_STRING, bin->json_type()); + ObString ele0("Mike"); + ObString data(bin->get_data_length(), bin->get_data()); + ASSERT_EQ(0, ele0.compare(data)); + + ObJsonString j_str("John", strlen("John")); + ObIJsonBase *j_tree_str = &j_str; + ObIJsonBase *j_bin_str = nullptr; + + ASSERT_EQ(OB_SUCCESS, ObJsonBaseFactory::transform(&allocator, j_tree_str, + ObJsonInType::JSON_BIN, j_bin_str)); + ASSERT_EQ(ObJsonNodeType::J_STRING, j_bin_str->json_type()); + ASSERT_TRUE(j_bin_str->is_bin()); + + ObIJsonBase *j_bin_replace = nullptr; + ASSERT_EQ(OB_SUCCESS, bin->move_parent_iter()); + ASSERT_EQ(OB_SUCCESS, bin->get_object_value(name_key, j_bin_replace)); + ASSERT_EQ(OB_SUCCESS, bin->replace(j_bin_replace, j_bin_str)); + ASSERT_FALSE(update_ctx.is_rebuild_all_); + ASSERT_EQ(1, update_ctx.binary_diffs_.count()); + check_diff_valid(allocator, result, update_ctx); + ASSERT_EQ(OB_SUCCESS, bin->lookup(name_key)); + ASSERT_EQ(ObJsonNodeType::J_STRING, bin->json_type()); + ObString ele1("John"); + ObString data1(bin->get_data_length(), bin->get_data()); + ASSERT_EQ(0, ele1.compare(data1)); + } + + { + ASSERT_EQ(OB_SUCCESS, bin->move_parent_iter()); + ObString money_key("money"); + ASSERT_EQ(OB_SUCCESS, bin->lookup(money_key)); + ASSERT_EQ(ObJsonNodeType::J_DOUBLE, bin->json_type()); + ASSERT_FLOAT_EQ(3000.5, bin->get_double()); + + ObJsonDouble j_double(41234.5); + ObIJsonBase *j_tree_double = &j_double; + ObIJsonBase *j_bin_double = nullptr; + ASSERT_EQ(OB_SUCCESS, ObJsonBaseFactory::transform(&allocator, j_tree_double, + ObJsonInType::JSON_BIN, j_bin_double)); + ASSERT_EQ(OB_SUCCESS, bin->move_parent_iter()); + + ObIJsonBase *j_bin_replace = nullptr; + ASSERT_EQ(OB_SUCCESS, bin->get_object_value(money_key, j_bin_replace)); + ASSERT_EQ(OB_SUCCESS, bin->replace(j_bin_replace, j_bin_double)); + ASSERT_FALSE(update_ctx.is_rebuild_all_); + ASSERT_EQ(2, update_ctx.binary_diffs_.count()); + check_diff_valid(allocator, result, update_ctx); + ASSERT_EQ(OB_SUCCESS, bin->lookup(money_key)); + ASSERT_EQ(ObJsonNodeType::J_DOUBLE, bin->json_type()); + ASSERT_FLOAT_EQ(j_double.get_double(), bin->get_double()); + } + + { + ASSERT_EQ(OB_SUCCESS, bin->move_parent_iter()); + ObString ids_key("ids"); + ASSERT_EQ(OB_SUCCESS, bin->lookup(ids_key)); + ASSERT_EQ(ObJsonNodeType::J_ARRAY, bin->json_type()); + ASSERT_EQ(9, bin->element_count()); + + { + ASSERT_EQ(OB_SUCCESS, bin->element(7)); + ASSERT_EQ(ObJsonNodeType::J_INT, bin->json_type()); + ASSERT_EQ(20000, bin->get_int()); + + ObJsonInt j_int(300); + ObIJsonBase *j_tree_int = &j_int; + ObIJsonBase *j_bin_int = NULL; + ASSERT_EQ(OB_SUCCESS, ObJsonBaseFactory::transform(&allocator, j_tree_int, + ObJsonInType::JSON_BIN, j_bin_int)); + ASSERT_EQ(OB_SUCCESS, bin->move_parent_iter()); + + ObIJsonBase *j_bin_replace = nullptr; + ASSERT_EQ(OB_SUCCESS, bin->get_array_element(7, j_bin_replace)); + ASSERT_EQ(OB_SUCCESS, bin->replace(j_bin_replace, j_bin_int)); + ASSERT_FALSE(update_ctx.is_rebuild_all_); + ASSERT_EQ(3, update_ctx.binary_diffs_.count()); + check_diff_valid(allocator, result, update_ctx); + ASSERT_EQ(OB_SUCCESS, bin->element(7)); + ASSERT_EQ(ObJsonNodeType::J_INT, bin->json_type()); + ASSERT_EQ(j_int.get_int(), bin->get_int()); + } + } +} + +TEST_F(TestJsonBin, test_append_replace) +{ + common::ObString j_text("{ \"name\" : \"Mike\", \"age\" : 30, \"ids\" : [1, 200, 3 , 400, true, 100.1, null, 20000, false] }"); + ObArenaAllocator allocator(ObModIds::TEST); + ObIJsonBase *j_bin = nullptr; + ASSERT_EQ(OB_SUCCESS, ObJsonBaseFactory::get_json_base(&allocator, j_text, + ObJsonInType::JSON_TREE, ObJsonInType::JSON_BIN, j_bin)); + common::ObString result; + ASSERT_EQ(OB_SUCCESS, j_bin->get_raw_binary(result, &allocator)); + + // root + ObJsonBin *bin = static_cast(j_bin); + bin->set_seek_flag(false); + uint64_t root_member_count = bin->element_count(); + ASSERT_EQ(3, root_member_count); + ASSERT_EQ(ObJsonNodeType::J_OBJECT, bin->json_type()); + + ASSERT_EQ(OB_SUCCESS, init_update_ctx(allocator, bin)); + ObJsonBinUpdateCtx &update_ctx = *bin->get_update_ctx(); + + { + ObString name_key("name"); + ASSERT_EQ(OB_SUCCESS, bin->lookup(name_key)); + ASSERT_EQ(ObJsonNodeType::J_STRING, bin->json_type()); + ObString ele0("Mike"); + ObString data(bin->get_data_length(), bin->get_data()); + ASSERT_EQ(0, ele0.compare(data)); + + ObJsonString j_str("JohnSnow", strlen("JohnSnow")); + ObIJsonBase *j_tree_str = &j_str; + ObIJsonBase *j_bin_str = nullptr; + + ASSERT_EQ(OB_SUCCESS, ObJsonBaseFactory::transform(&allocator, j_tree_str, + ObJsonInType::JSON_BIN, j_bin_str)); + ASSERT_EQ(ObJsonNodeType::J_STRING, j_bin_str->json_type()); + ASSERT_TRUE(j_bin_str->is_bin()); + + ObIJsonBase *j_bin_replace = nullptr; + ASSERT_EQ(OB_SUCCESS, bin->move_parent_iter()); + ASSERT_EQ(OB_SUCCESS, bin->get_object_value(name_key, j_bin_replace)); + ASSERT_EQ(OB_SUCCESS, bin->replace(j_bin_replace, j_bin_str)); + ASSERT_FALSE(update_ctx.is_rebuild_all_); + ASSERT_EQ(2, update_ctx.binary_diffs_.count()); + check_diff_valid(allocator, result, update_ctx); + ASSERT_EQ(OB_SUCCESS, bin->lookup(name_key)); + ASSERT_EQ(ObJsonNodeType::J_STRING, bin->json_type()); + ObString ele1("JohnSnow"); + ObString data1(bin->get_data_length(), bin->get_data()); + ASSERT_EQ(0, ele1.compare(data1)); + } + + { + ASSERT_EQ(OB_SUCCESS, bin->move_parent_iter()); + ObString age_key("age"); + ASSERT_EQ(OB_SUCCESS, bin->lookup(age_key)); + ASSERT_EQ(ObJsonNodeType::J_INT, bin->json_type()); + ASSERT_FLOAT_EQ(30, bin->get_int()); + + ObJsonInt j_int(3000); + ObIJsonBase *j_tree_int = &j_int; + ObIJsonBase *j_bin_int = nullptr; + ASSERT_EQ(OB_SUCCESS, ObJsonBaseFactory::transform(&allocator, j_tree_int, + ObJsonInType::JSON_BIN, j_bin_int)); + ASSERT_EQ(OB_SUCCESS, bin->move_parent_iter()); + + ObIJsonBase *j_bin_replace = nullptr; + ASSERT_EQ(OB_SUCCESS, bin->get_object_value(age_key, j_bin_replace)); + ASSERT_EQ(OB_SUCCESS, bin->replace(j_bin_replace, j_bin_int)); + ASSERT_FALSE(update_ctx.is_rebuild_all_); + ASSERT_EQ(4, update_ctx.binary_diffs_.count()); + check_diff_valid(allocator, result, update_ctx); + ASSERT_EQ(OB_SUCCESS, bin->lookup(age_key)); + ASSERT_EQ(ObJsonNodeType::J_INT, bin->json_type()); + ASSERT_FLOAT_EQ(j_int.get_int(), bin->get_int()); + } + + { + ASSERT_EQ(OB_SUCCESS, bin->move_parent_iter()); + ObString ids_key("ids"); + ASSERT_EQ(OB_SUCCESS, bin->lookup(ids_key)); + ASSERT_EQ(ObJsonNodeType::J_ARRAY, bin->json_type()); + ASSERT_EQ(9, bin->element_count()); + + { + ASSERT_EQ(OB_SUCCESS, bin->element(4)); + ASSERT_EQ(ObJsonNodeType::J_BOOLEAN, bin->json_type()); + ASSERT_TRUE(bin->get_boolean()); + + ObJsonInt j_int(300); + ObIJsonBase *j_tree_int = &j_int; + ObIJsonBase *j_bin_int = NULL; + ASSERT_EQ(OB_SUCCESS, ObJsonBaseFactory::transform(&allocator, j_tree_int, + ObJsonInType::JSON_BIN, j_bin_int)); + ASSERT_EQ(OB_SUCCESS, bin->move_parent_iter()); + + ObIJsonBase *j_bin_replace = nullptr; + ASSERT_EQ(OB_SUCCESS, bin->get_array_element(4, j_bin_replace)); + ASSERT_EQ(OB_SUCCESS, bin->replace(j_bin_replace, j_bin_int)); + ASSERT_FALSE(update_ctx.is_rebuild_all_); + ASSERT_EQ(6, update_ctx.binary_diffs_.count()); + check_diff_valid(allocator, result, update_ctx); + ASSERT_EQ(OB_SUCCESS, bin->element(4)); + ASSERT_EQ(ObJsonNodeType::J_INT, bin->json_type()); + ASSERT_EQ(j_int.get_int(), bin->get_int()); + } + } +} + + +TEST_F(TestJsonBin, test_remove_diff) +{ + common::ObString j_text("{ \"name\" : \"Mike\", \"age\" : 30, \"ids\" : [1, 200, 3 , 400, true, 100.1, null, 20000, false] }"); + ObArenaAllocator allocator(ObModIds::TEST); + ObIJsonBase *j_bin = nullptr; + ASSERT_EQ(OB_SUCCESS, ObJsonBaseFactory::get_json_base(&allocator, j_text, + ObJsonInType::JSON_TREE, ObJsonInType::JSON_BIN, j_bin)); + common::ObString result; + ASSERT_EQ(OB_SUCCESS, j_bin->get_raw_binary(result, &allocator)); + + // root + ObJsonBin *bin = static_cast(j_bin); + bin->set_seek_flag(false); + uint64_t root_member_count = bin->element_count(); + ASSERT_EQ(3, root_member_count); + ASSERT_EQ(ObJsonNodeType::J_OBJECT, bin->json_type()); + + ASSERT_EQ(OB_SUCCESS, init_update_ctx(allocator, bin)); + ObJsonBinUpdateCtx &update_ctx = *bin->get_update_ctx(); + + { + ObString name_key("name"); + ASSERT_EQ(OB_SUCCESS, bin->lookup(name_key)); + ASSERT_EQ(ObJsonNodeType::J_STRING, bin->json_type()); + ObString ele0("Mike"); + ObString data(bin->get_data_length(), bin->get_data()); + ASSERT_EQ(0, ele0.compare(data)); + ASSERT_EQ(OB_SUCCESS, bin->move_parent_iter()); + ASSERT_EQ(OB_SUCCESS, bin->remove(name_key)); + ASSERT_FALSE(update_ctx.is_rebuild_all_); + ASSERT_EQ(1, update_ctx.binary_diffs_.count()); + check_diff_valid(allocator, result, update_ctx); + ASSERT_EQ(OB_SEARCH_NOT_FOUND, bin->lookup(name_key)); + } + + { + ObString age_key("age"); + ASSERT_EQ(OB_SUCCESS, bin->lookup(age_key)); + ASSERT_EQ(ObJsonNodeType::J_INT, bin->json_type()); + ASSERT_FLOAT_EQ(30, bin->get_int()); + ASSERT_EQ(OB_SUCCESS, bin->move_parent_iter()); + ASSERT_EQ(OB_SUCCESS, bin->remove(age_key)); + ASSERT_FALSE(update_ctx.is_rebuild_all_); + ASSERT_EQ(2, update_ctx.binary_diffs_.count()); + check_diff_valid(allocator, result, update_ctx); + ASSERT_EQ(OB_SEARCH_NOT_FOUND, bin->lookup(age_key)); + } + + { + ObString ids_key("ids"); + ASSERT_EQ(OB_SUCCESS, bin->lookup(ids_key)); + ASSERT_EQ(ObJsonNodeType::J_ARRAY, bin->json_type()); + ASSERT_EQ(9, bin->element_count()); + + { + ASSERT_EQ(OB_SUCCESS, bin->element(4)); + ASSERT_EQ(ObJsonNodeType::J_BOOLEAN, bin->json_type()); + ASSERT_TRUE(bin->get_boolean()); + ASSERT_EQ(OB_SUCCESS, bin->move_parent_iter()); + ASSERT_EQ(OB_SUCCESS, bin->remove(4)); + ASSERT_FALSE(update_ctx.is_rebuild_all_); + ASSERT_EQ(3, update_ctx.binary_diffs_.count()); + check_diff_valid(allocator, result, update_ctx); + ASSERT_EQ(OB_SUCCESS, bin->element(4)); + ASSERT_EQ(ObJsonNodeType::J_DOUBLE, bin->json_type()); + ASSERT_EQ(100.1, bin->get_double()); + } + + { + ASSERT_EQ(OB_SUCCESS, bin->move_parent_iter()); + ASSERT_EQ(OB_SUCCESS, bin->element(4)); + ASSERT_EQ(ObJsonNodeType::J_DOUBLE, bin->json_type()); + ASSERT_EQ(100.1, bin->get_double()); + ASSERT_EQ(OB_SUCCESS, bin->move_parent_iter()); + ASSERT_EQ(OB_SUCCESS, bin->remove(4)); + ASSERT_FALSE(update_ctx.is_rebuild_all_); + ASSERT_EQ(4, update_ctx.binary_diffs_.count()); + check_diff_valid(allocator, result, update_ctx); + ASSERT_EQ(OB_SUCCESS, bin->element(4)); + ASSERT_EQ(ObJsonNodeType::J_NULL, bin->json_type()); + } + + } +} + +TEST_F(TestJsonBin, test_insert_diff) +{ + common::ObString j_text("{ \"name\" : \"Mike\", \"age\" : 30, \"ids\" : [1, 200, 3 , 400, true, 100.1, null, 20000, false] }"); + ObArenaAllocator allocator(ObModIds::TEST); + ObIJsonBase *j_bin = nullptr; + ASSERT_EQ(OB_SUCCESS, ObJsonBaseFactory::get_json_base(&allocator, j_text, + ObJsonInType::JSON_TREE, ObJsonInType::JSON_BIN, j_bin)); + common::ObString result; + ASSERT_EQ(OB_SUCCESS, j_bin->get_raw_binary(result, &allocator)); + + // root + ObJsonBin *bin = static_cast(j_bin); + bin->set_seek_flag(false); + uint64_t root_member_count = bin->element_count(); + ASSERT_EQ(3, root_member_count); + ASSERT_EQ(ObJsonNodeType::J_OBJECT, bin->json_type()); + + ASSERT_EQ(OB_SUCCESS, init_update_ctx(allocator, bin)); + ObJsonBinUpdateCtx &update_ctx = *bin->get_update_ctx(); + + { + ObString sex_key("sex"); + ObJsonString j_str("male", strlen("male")); + ObIJsonBase *j_tree_str = &j_str; + ObIJsonBase *j_bin_str = nullptr; + + ASSERT_EQ(OB_SUCCESS, ObJsonBaseFactory::transform(&allocator, j_tree_str, + ObJsonInType::JSON_BIN, j_bin_str)); + ASSERT_EQ(ObJsonNodeType::J_STRING, j_bin_str->json_type()); + ASSERT_TRUE(j_bin_str->is_bin()); + + ASSERT_EQ(OB_SUCCESS, bin->object_add(sex_key, j_bin_str)); + ASSERT_EQ(OB_SUCCESS, bin->lookup(sex_key)); + ASSERT_EQ(ObJsonNodeType::J_STRING, bin->json_type()); + ObString ele0("male"); + ObString data(bin->get_data_length(), bin->get_data()); + ASSERT_EQ(0, ele0.compare(data)); + ASSERT_EQ(OB_SUCCESS, bin->move_parent_iter()); + //LOG_INFO("diff", K(update_ctx.cur_node_diff_)); + ASSERT_TRUE(update_ctx.is_rebuild_all_); + update_ctx.is_rebuild_all_ = false; + ObString curr_data; + ASSERT_EQ(OB_SUCCESS, update_ctx.current_data(curr_data)); + ASSERT_EQ(OB_SUCCESS, ob_write_string(allocator, curr_data, result)); + } + + { + ObString ids_key("ids"); + ASSERT_EQ(OB_SUCCESS, bin->lookup(ids_key)); + ASSERT_EQ(ObJsonNodeType::J_ARRAY, bin->json_type()); + ASSERT_EQ(9, bin->element_count()); + + { + ASSERT_EQ(OB_SUCCESS, bin->element(0)); + ASSERT_EQ(ObJsonNodeType::J_INT, bin->json_type()); + ASSERT_EQ(1, bin->get_int()); + + ObJsonInt j_int(300); + ObIJsonBase *j_tree_int = &j_int; + ObIJsonBase *j_bin_int = NULL; + ASSERT_EQ(OB_SUCCESS, ObJsonBaseFactory::transform(&allocator, j_tree_int, + ObJsonInType::JSON_BIN, j_bin_int)); + + ASSERT_EQ(OB_SUCCESS, bin->move_parent_iter()); + ASSERT_EQ(OB_SUCCESS, bin->array_insert(0, j_bin_int)); + + ASSERT_EQ(OB_SUCCESS, bin->element(0)); + ASSERT_EQ(ObJsonNodeType::J_INT, bin->json_type()); + ASSERT_EQ(j_int.get_int(), bin->get_int()); + ASSERT_EQ(OB_SUCCESS, bin->move_parent_iter()); + //LOG_INFO("diff", K(update_ctx.cur_node_diff_)); + ASSERT_EQ(2, update_ctx.binary_diffs_.count()); + ASSERT_FALSE(update_ctx.is_rebuild_all_); + check_diff_valid(allocator, result, update_ctx); + } + ASSERT_EQ(10, bin->element_count()); + { + ASSERT_EQ(OB_SUCCESS, bin->element(3)); + ASSERT_EQ(ObJsonNodeType::J_INT, bin->json_type()); + ASSERT_EQ(3, bin->get_int()); + + ObJsonString j_str("weight", strlen("weight")); + ObIJsonBase *j_tree_str = &j_str; + ObIJsonBase *j_bin_str = nullptr; + + ASSERT_EQ(OB_SUCCESS, ObJsonBaseFactory::transform(&allocator, j_tree_str, + ObJsonInType::JSON_BIN, j_bin_str)); + ASSERT_EQ(ObJsonNodeType::J_STRING, j_bin_str->json_type()); + ASSERT_TRUE(j_bin_str->is_bin()); + + ASSERT_EQ(OB_SUCCESS, bin->move_parent_iter()); + ASSERT_EQ(OB_SUCCESS, bin->array_insert(3, j_bin_str)); + + ASSERT_EQ(OB_SUCCESS, bin->element(3)); + ASSERT_EQ(ObJsonNodeType::J_STRING, bin->json_type()); + ObString ele0("weight"); + ObString data(bin->get_data_length(), bin->get_data()); + ASSERT_EQ(0, ele0.compare(data)); + ASSERT_EQ(OB_SUCCESS, bin->move_parent_iter()); + //LOG_INFO("diff", K(update_ctx.cur_node_diff_)); + ASSERT_EQ(4, update_ctx.binary_diffs_.count()); + ASSERT_FALSE(update_ctx.is_rebuild_all_); + check_diff_valid(allocator, result, update_ctx); + } + ASSERT_EQ(11, bin->element_count()); + { + ASSERT_EQ(OB_SUCCESS, bin->element(10)); + ASSERT_EQ(ObJsonNodeType::J_BOOLEAN, bin->json_type()); + ASSERT_FALSE(bin->get_boolean()); + + ObJsonString j_str("weight2", strlen("weight2")); + ObIJsonBase *j_tree_str = &j_str; + ObIJsonBase *j_bin_str = nullptr; + + ASSERT_EQ(OB_SUCCESS, ObJsonBaseFactory::transform(&allocator, j_tree_str, + ObJsonInType::JSON_BIN, j_bin_str)); + ASSERT_EQ(ObJsonNodeType::J_STRING, j_bin_str->json_type()); + ASSERT_TRUE(j_bin_str->is_bin()); + + ASSERT_EQ(OB_SUCCESS, bin->move_parent_iter()); + ASSERT_EQ(OB_SUCCESS, bin->array_insert(10, j_bin_str)); + + ASSERT_EQ(OB_SUCCESS, bin->element(10)); + ASSERT_EQ(ObJsonNodeType::J_STRING, bin->json_type()); + ObString ele0("weight2"); + ObString data(bin->get_data_length(), bin->get_data()); + ASSERT_EQ(0, ele0.compare(data)); + ASSERT_EQ(OB_SUCCESS, bin->move_parent_iter()); + //LOG_INFO("diff", K(update_ctx.cur_node_diff_)); + ASSERT_EQ(6, update_ctx.binary_diffs_.count()); + ASSERT_FALSE(update_ctx.is_rebuild_all_); + check_diff_valid(allocator, result, update_ctx); + } + ASSERT_EQ(12, bin->element_count()); + { + ASSERT_EQ(OB_SUCCESS, bin->element(11)); + ASSERT_EQ(ObJsonNodeType::J_BOOLEAN, bin->json_type()); + ASSERT_FALSE(bin->get_boolean()); + } + } +} + +TEST_F(TestJsonBin, test_replace_parent_and_child) +{ + common::ObString j_text("{ \"name\" : \"Mike\", \"age\" : 30, \"order\" : {\"stcok\" : 1000, \"name\": \"myxxxx\"} }"); + ObArenaAllocator allocator(ObModIds::TEST); + ObIJsonBase *j_bin = nullptr; + ASSERT_EQ(OB_SUCCESS, ObJsonBaseFactory::get_json_base(&allocator, j_text, + ObJsonInType::JSON_TREE, ObJsonInType::JSON_BIN, j_bin)); + common::ObString result; + ASSERT_EQ(OB_SUCCESS, j_bin->get_raw_binary(result, &allocator)); + + // root + ObJsonBin *bin = static_cast(j_bin); + bin->set_seek_flag(false); + uint64_t root_member_count = bin->element_count(); + ASSERT_EQ(3, root_member_count); + ASSERT_EQ(ObJsonNodeType::J_OBJECT, bin->json_type()); + + ASSERT_EQ(OB_SUCCESS, init_update_ctx(allocator, bin)); + ObJsonBinUpdateCtx &update_ctx = *bin->get_update_ctx(); + + { + ObString order_key("order"); + ASSERT_EQ(OB_SUCCESS, bin->lookup(order_key)); + ASSERT_EQ(ObJsonNodeType::J_OBJECT, bin->json_type()); + ASSERT_EQ(2, bin->element_count()); + + { + ObJsonString j_str("weightxxxxx", strlen("weightxxxxx")); + ObIJsonBase *j_tree_str = &j_str; + ObIJsonBase *j_bin_str = nullptr; + ASSERT_EQ(OB_SUCCESS, ObJsonBaseFactory::transform(&allocator, j_tree_str, + ObJsonInType::JSON_BIN, j_bin_str)); + ASSERT_EQ(ObJsonNodeType::J_STRING, j_bin_str->json_type()); + ASSERT_TRUE(j_bin_str->is_bin()); + ObIJsonBase *j_bin_replace = nullptr; + ASSERT_EQ(OB_SUCCESS, bin->get_object_value(ObString("name"), j_bin_replace)); + ASSERT_EQ(OB_SUCCESS, bin->replace(j_bin_replace, j_bin_str)); + ASSERT_FALSE(update_ctx.is_rebuild_all_); + ASSERT_EQ(2, update_ctx.binary_diffs_.count()); + check_diff_valid(allocator, result, update_ctx); + ASSERT_EQ(OB_SUCCESS, bin->lookup(ObString("name"))); + ASSERT_EQ(ObJsonNodeType::J_STRING, bin->json_type()); + ObString ele0("weightxxxxx"); + ObString data(bin->get_data_length(), bin->get_data()); + ASSERT_EQ(0, ele0.compare(data)); + ASSERT_EQ(OB_SUCCESS, bin->move_parent_iter()); + } + } +} + +TEST_F(TestJsonBin, test_decimal) +{ + ObArenaAllocator allocator(ObModIds::TEST); + number::ObNumber res_nmb; + ObString nmb_str("3.14"); + ObPrecision res_precision = -1; + ObScale res_scale = -1; + ASSERT_EQ(OB_SUCCESS, res_nmb.from_sci_opt(nmb_str.ptr(), nmb_str.length(), allocator, &res_precision, &res_scale)); + + ObJsonDecimal j_dec(res_nmb, res_precision, res_scale); + ObIJsonBase *j_tree_dec = &j_dec; + ObIJsonBase *j_bin_dec = nullptr; + ASSERT_EQ(OB_SUCCESS, ObJsonBaseFactory::transform(&allocator, j_tree_dec, + ObJsonInType::JSON_BIN, j_bin_dec)); + + common::ObString result; + ASSERT_EQ(OB_SUCCESS, j_bin_dec->get_raw_binary(result, &allocator)); + + ObJsonBin bin(result.ptr(), result.length(), &allocator); + ASSERT_EQ(OB_SUCCESS, bin.reset_iter()); +} + +TEST_F(TestJsonBin, test_double) +{ + ObArenaAllocator allocator(ObModIds::TEST); + ObJsonDouble j_double(3.1415); + ObIJsonBase *j_tree_double = &j_double; + ObIJsonBase *j_bin_double = NULL; + // TREE -> BIN + ASSERT_EQ(OB_SUCCESS, ObJsonBaseFactory::transform(&allocator, j_tree_double, + ObJsonInType::JSON_BIN, j_bin_double)); + common::ObString result; + ASSERT_EQ(OB_SUCCESS, j_bin_double->get_raw_binary(result, &allocator)); + + ObJsonBin bin(result.ptr(), result.length(), &allocator); + ASSERT_EQ(OB_SUCCESS, bin.reset_iter()); +} + +TEST_F(TestJsonBin, test_get_parent) +{ + ObArenaAllocator allocator(ObModIds::TEST); + + { + common::ObString j_text("{ \"name\" : \"Mike\", \"age\" : 30, \"order\" : {\"stcok\" : 100, \"name\": \"myxxxx\"} }"); + ObArenaAllocator allocator(ObModIds::TEST); + ObIJsonBase *j_bin = nullptr; + ASSERT_EQ(OB_SUCCESS, ObJsonBaseFactory::get_json_base(&allocator, j_text, + ObJsonInType::JSON_TREE, ObJsonInType::JSON_BIN, j_bin)); + common::ObString result; + ASSERT_EQ(OB_SUCCESS, j_bin->get_raw_binary(result, &allocator)); + ObJsonBin *bin = static_cast(j_bin); + bin->set_seek_flag(false); + ASSERT_EQ(OB_SUCCESS, init_update_ctx(allocator, bin)); + ObJsonBinUpdateCtx &update_ctx = *bin->get_update_ctx(); + + common::ObString path_str("$.order.stcok"); + ObJsonPath test_path(path_str, &allocator); + ASSERT_EQ(OB_SUCCESS, test_path.parse_path()); + + ObJsonSeekResult hit; + int cnt = test_path.path_node_cnt(); + ASSERT_EQ(OB_SUCCESS, j_bin->seek(test_path, cnt, false, false, hit)); + ASSERT_EQ(1, hit.size()); + ObIJsonBase *j_child_bin = hit[0]; + ObIJsonBase *j_parent_bin = nullptr; + ASSERT_EQ(OB_SUCCESS, j_child_bin->get_parent(j_parent_bin)); + ASSERT_NE(nullptr, j_parent_bin); + ASSERT_TRUE(j_parent_bin->is_bin()); + + ObJsonString j_str("weightxxxxx", strlen("weightxxxxx")); + ObIJsonBase *j_tree_str = &j_str; + ObIJsonBase *j_bin_str = nullptr; + ASSERT_EQ(OB_SUCCESS, ObJsonBaseFactory::transform(&allocator, j_tree_str, + ObJsonInType::JSON_BIN, j_bin_str)); + ASSERT_EQ(ObJsonNodeType::J_STRING, j_bin_str->json_type()); + ASSERT_TRUE(j_bin_str->is_bin()); + ASSERT_EQ(OB_SUCCESS, j_parent_bin->replace(j_child_bin, j_bin_str)); + ASSERT_EQ(OB_SUCCESS, j_bin->get_raw_binary(result, &allocator)); + } + + { + common::ObString j_text("{ \"name\" : \"Mike\", \"age\" : 30, \"order\" : {\"stcok\" : 100, \"name\": \"myxxxx\"} }"); + ObArenaAllocator allocator(ObModIds::TEST); + ObIJsonBase *j_bin = nullptr; + ASSERT_EQ(OB_SUCCESS, ObJsonBaseFactory::get_json_base(&allocator, j_text, + ObJsonInType::JSON_TREE, ObJsonInType::JSON_BIN, j_bin)); + common::ObString result; + ASSERT_EQ(OB_SUCCESS, j_bin->get_raw_binary(result, &allocator)); + ObJsonBin *bin = static_cast(j_bin); + bin->set_seek_flag(false); + ASSERT_EQ(OB_SUCCESS, init_update_ctx(allocator, bin)); + ObJsonBinUpdateCtx &update_ctx = *bin->get_update_ctx(); + + common::ObString path_str("$.age"); + ObJsonPath test_path(path_str, &allocator); + ASSERT_EQ(OB_SUCCESS, test_path.parse_path()); + + ObJsonSeekResult hit; + int cnt = test_path.path_node_cnt(); + ASSERT_EQ(OB_SUCCESS, j_bin->seek(test_path, cnt, false, false, hit)); + ASSERT_EQ(1, hit.size()); + ObIJsonBase *j_child_bin = hit[0]; + ObIJsonBase *j_parent_bin = nullptr; + ASSERT_EQ(OB_SUCCESS, j_child_bin->get_parent(j_parent_bin)); + ASSERT_NE(nullptr, j_parent_bin); + ASSERT_TRUE(j_parent_bin->is_bin()); + + ObJsonInt j_int(200); + ObIJsonBase *j_tree_int = &j_int; + ObIJsonBase *j_bin_int = nullptr; + ASSERT_EQ(OB_SUCCESS, ObJsonBaseFactory::transform(&allocator, j_tree_int, + ObJsonInType::JSON_BIN, j_bin_int)); + ASSERT_TRUE(j_bin_int->is_bin()); + ASSERT_EQ(OB_SUCCESS, j_parent_bin->replace(j_child_bin, j_bin_int)); + ASSERT_EQ(OB_SUCCESS, j_bin->get_raw_binary(result, &allocator)); + } +} + +TEST_F(TestJsonBin, test_type) +{ + ObArenaAllocator allocator(ObModIds::TEST); + ObJsonObject tree(&allocator); + ObJsonBin bin(&allocator); + // ObJsonNull j_null_true(true); + ObJsonNull j_null_false(false); + // ASSERT_EQ(OB_SUCCESS, tree.add("j_null_true", &j_null_true)); + ASSERT_EQ(OB_SUCCESS, tree.add("j_null_false", &j_null_false)); + + ObJsonBoolean j_bool_false(false); + ASSERT_EQ(OB_SUCCESS, tree.add("j_bool_false", &j_bool_false)); + ObJsonBoolean j_bool_true(false); + ASSERT_EQ(OB_SUCCESS, tree.add("j_bool_true", &j_bool_true)); + + ObJsonInt j_int_0(0); + ASSERT_EQ(OB_SUCCESS, tree.add("j_int_0", &j_int_0)); + ObJsonInt j_int_1(1); + ASSERT_EQ(OB_SUCCESS, tree.add("j_int_1", &j_int_1)); + ObJsonInt j_int_2(3); + ASSERT_EQ(OB_SUCCESS, tree.add("j_int_2", &j_int_2)); + ObJsonInt j_int_3(100); + ASSERT_EQ(OB_SUCCESS, tree.add("j_int_3", &j_int_3)); + ObJsonInt j_int_4(256); + ASSERT_EQ(OB_SUCCESS, tree.add("j_int_4", &j_int_4)); + ObJsonInt j_int_5(6000); + ASSERT_EQ(OB_SUCCESS, tree.add("j_int_5", &j_int_5)); + ObJsonInt j_int_6(4234234); + ASSERT_EQ(OB_SUCCESS, tree.add("j_int_6", &j_int_6)); + ObJsonInt j_int_7(4234234123214321421L); + ASSERT_EQ(OB_SUCCESS, tree.add("j_int_7", &j_int_7)); + ObJsonInt j_int_8(-1); + ASSERT_EQ(OB_SUCCESS, tree.add("j_int_8", &j_int_8)); + ObJsonInt j_int_9(-3); + ASSERT_EQ(OB_SUCCESS, tree.add("j_int_9", &j_int_9)); + ObJsonInt j_int_10(-100); + ASSERT_EQ(OB_SUCCESS, tree.add("j_int_10", &j_int_10)); + ObJsonInt j_int_11(-256); + ASSERT_EQ(OB_SUCCESS, tree.add("j_int_11", &j_int_11)); + ObJsonInt j_int_12(-6000); + ASSERT_EQ(OB_SUCCESS, tree.add("j_int_12", &j_int_12)); + ObJsonInt j_int_13(-4234234); + ASSERT_EQ(OB_SUCCESS, tree.add("j_int_13", &j_int_13)); + ObJsonInt j_int_14(-4234234123214321421L); + ASSERT_EQ(OB_SUCCESS, tree.add("j_int_14", &j_int_14)); + ObJsonOInt j_int_15(INT_MIN); + ASSERT_EQ(OB_SUCCESS, tree.add("j_int_15", &j_int_15)); + ObJsonOInt j_int_16(INT_MAX); + ASSERT_EQ(OB_SUCCESS, tree.add("j_int_16", &j_int_16)); + + ObJsonOInt j_oint_0(0); + ASSERT_EQ(OB_SUCCESS, tree.add("j_oint_0", &j_oint_0)); + ObJsonOInt j_oint_1(1); + ASSERT_EQ(OB_SUCCESS, tree.add("j_oint_1", &j_oint_1)); + ObJsonOInt j_oint_2(3); + ASSERT_EQ(OB_SUCCESS, tree.add("j_oint_2", &j_oint_2)); + ObJsonOInt j_oint_3(100); + ASSERT_EQ(OB_SUCCESS, tree.add("j_oint_3", &j_oint_3)); + ObJsonOInt j_oint_4(256); + ASSERT_EQ(OB_SUCCESS, tree.add("j_oint_4", &j_oint_4)); + ObJsonOInt j_oint_5(6000); + ASSERT_EQ(OB_SUCCESS, tree.add("j_oint_6", &j_oint_5)); + ObJsonOInt j_oint_6(4234234); + ASSERT_EQ(OB_SUCCESS, tree.add("j_oint_6", &j_oint_6)); + ObJsonOInt j_oint_7(4234234123214321421L); + ASSERT_EQ(OB_SUCCESS, tree.add("j_oint_7", &j_oint_7)); + ObJsonOInt j_oint_8(-1); + ASSERT_EQ(OB_SUCCESS, tree.add("j_oint_8", &j_oint_8)); + ObJsonOInt j_oint_9(-3); + ASSERT_EQ(OB_SUCCESS, tree.add("j_oint_9", &j_oint_9)); + ObJsonOInt j_oint_10(-100); + ASSERT_EQ(OB_SUCCESS, tree.add("j_oint_10", &j_oint_10)); + ObJsonOInt j_oint_11(-256); + ASSERT_EQ(OB_SUCCESS, tree.add("j_oint_11", &j_oint_11)); + ObJsonOInt j_oint_12(-6000); + ASSERT_EQ(OB_SUCCESS, tree.add("j_oint_12", &j_oint_12)); + ObJsonOInt j_oint_13(-4234234); + ASSERT_EQ(OB_SUCCESS, tree.add("j_oint_13", &j_oint_13)); + ObJsonOInt j_oint_14(-4234234123214321421L); + ASSERT_EQ(OB_SUCCESS, tree.add("j_oint_14", &j_oint_14)); + ObJsonOInt j_oint_15(INT_MIN); + ASSERT_EQ(OB_SUCCESS, tree.add("j_oint_15", &j_oint_15)); + ObJsonOInt j_oint_16(INT_MAX); + ASSERT_EQ(OB_SUCCESS, tree.add("j_oint_16", &j_oint_16)); + + ObJsonUint j_uint_0(0); + ASSERT_EQ(OB_SUCCESS, tree.add("j_uint_0", &j_uint_0)); + ObJsonUint j_uint_1(1); + ASSERT_EQ(OB_SUCCESS, tree.add("j_uint_1", &j_uint_1)); + ObJsonUint j_uint_2(3); + ASSERT_EQ(OB_SUCCESS, tree.add("j_uint_2", &j_uint_2)); + ObJsonUint j_uint_3(100); + ASSERT_EQ(OB_SUCCESS, tree.add("j_uint_3", &j_uint_3)); + ObJsonUint j_uint_4(256); + ASSERT_EQ(OB_SUCCESS, tree.add("j_uint_4", &j_uint_4)); + ObJsonUint j_uint_5(6000); + ASSERT_EQ(OB_SUCCESS, tree.add("j_uint_5", &j_uint_5)); + ObJsonUint j_uint_6(4234234); + ASSERT_EQ(OB_SUCCESS, tree.add("j_uint_6", &j_uint_6)); + ObJsonUint j_uint_7(4234234123214321421L); + ASSERT_EQ(OB_SUCCESS, tree.add("j_uint_7", &j_uint_7)); + ObJsonUint j_uint_8(UINT_MAX); + ASSERT_EQ(OB_SUCCESS, tree.add("j_uint_8", &j_uint_8)); + ObJsonUint j_uint_9(ULLONG_MAX); + ASSERT_EQ(OB_SUCCESS, tree.add("j_uint_9", &j_uint_9)); + + ObJsonOLong j_olong_0(0); + ASSERT_EQ(OB_SUCCESS, tree.add("j_olong_0", &j_olong_0)); + ObJsonOLong j_olong_1(1); + ASSERT_EQ(OB_SUCCESS, tree.add("j_olong_1", &j_olong_1)); + ObJsonOLong j_olong_2(3); + ASSERT_EQ(OB_SUCCESS, tree.add("j_olong_2", &j_olong_2)); + ObJsonOLong j_olong_3(100); + ASSERT_EQ(OB_SUCCESS, tree.add("j_olong_3", &j_olong_3)); + ObJsonOLong j_olong_4(256); + ASSERT_EQ(OB_SUCCESS, tree.add("j_olong_4", &j_olong_4)); + ObJsonOLong j_olong_5(6000); + ASSERT_EQ(OB_SUCCESS, tree.add("j_olong_5", &j_olong_5)); + ObJsonOLong j_olong_6(4234234); + ASSERT_EQ(OB_SUCCESS, tree.add("j_olong_6", &j_olong_6)); + ObJsonOLong j_olong_7(4234234123214321421L); + ASSERT_EQ(OB_SUCCESS, tree.add("j_olong_7", &j_olong_7)); + ObJsonOLong j_olong_8(UINT_MAX); + ASSERT_EQ(OB_SUCCESS, tree.add("j_olong_8", &j_olong_8)); + ObJsonOLong j_olong_9(ULLONG_MAX); + ASSERT_EQ(OB_SUCCESS, tree.add("j_olong_9", &j_olong_9)); + + number::ObNumber res_nmb; + ObString nmb_str("3.14"); + ObPrecision res_precision = -1; + ObScale res_scale = -1; + ASSERT_EQ(OB_SUCCESS, res_nmb.from_sci_opt(nmb_str.ptr(), nmb_str.length(), allocator, &res_precision, &res_scale)); + + ObJsonDecimal j_dec_1(res_nmb, res_precision, res_scale); + ASSERT_EQ(OB_SUCCESS, tree.add("j_dec_1", &j_dec_1)); + + nmb_str.assign_ptr("-1.1234e9", STRLEN("-1.1234e9")); + res_precision = 10; + res_scale = 4; + ASSERT_EQ(OB_SUCCESS, res_nmb.from_sci_opt(nmb_str.ptr(), nmb_str.length(), allocator, &res_precision, &res_scale)); + ObJsonDecimal j_dec_2(res_nmb, res_precision, res_scale); + ASSERT_EQ(OB_SUCCESS, tree.add("j_dec_2", &j_dec_2)); + + ObJsonDouble j_double_0(0); + ASSERT_EQ(OB_SUCCESS, tree.add("j_double_0", &j_double_0)); + ObJsonDouble j_double_1(1.12222222222222); + ASSERT_EQ(OB_SUCCESS, tree.add("j_double_1", &j_double_1)); + ObJsonDouble j_double_2(112132132131223.32132132132131232122); + ASSERT_EQ(OB_SUCCESS, tree.add("j_double_2", &j_double_2)); + ObJsonDouble j_double_3(1.121321321321321e9); + ASSERT_EQ(OB_SUCCESS, tree.add("j_double_3", &j_double_3)); + ObJsonDouble j_double_4(0.1212e-9); + ASSERT_EQ(OB_SUCCESS, tree.add("j_double_4", &j_double_4)); + ObJsonDouble j_double_5(-0.12213222e-9); + ASSERT_EQ(OB_SUCCESS, tree.add("j_double_5", &j_double_5)); + ObJsonDouble j_double_6(-1.21321321321321); + ASSERT_EQ(OB_SUCCESS, tree.add("j_double_6", &j_double_6)); + ObJsonDouble j_double_7(-21321321321.32132132132131232122); + ASSERT_EQ(OB_SUCCESS, tree.add("j_double_7", &j_double_7)); + ObJsonDouble j_double_8(2.12213212222); + ASSERT_EQ(OB_SUCCESS, tree.add("j_double_8", &j_double_8)); + + ObJsonOFloat j_ofloat_0(0); + ASSERT_EQ(OB_SUCCESS, tree.add("j_ofloat_0", &j_ofloat_0)); + ObJsonOFloat j_ofloat_1(1.12222222222222); + ASSERT_EQ(OB_SUCCESS, tree.add("j_ofloat_1", &j_ofloat_1)); + ObJsonOFloat j_ofloat_2(112132132131223.32132132132131232122); + ASSERT_EQ(OB_SUCCESS, tree.add("j_ofloat_2", &j_ofloat_2)); + ObJsonOFloat j_ofloat_3(1.121321321321321e9); + ASSERT_EQ(OB_SUCCESS, tree.add("j_ofloat_3", &j_ofloat_3)); + ObJsonOFloat j_ofloat_4(0.1212e-9); + ASSERT_EQ(OB_SUCCESS, tree.add("j_ofloat_4", &j_ofloat_4)); + ObJsonOFloat j_ofloat_5(-0.12213222e-9); + ASSERT_EQ(OB_SUCCESS, tree.add("j_ofloat_5", &j_ofloat_5)); + ObJsonOFloat j_ofloat_6(-1.21321321321321); + ASSERT_EQ(OB_SUCCESS, tree.add("j_ofloat_6", &j_ofloat_6)); + ObJsonOFloat j_ofloat_7(-21321321321.32132132132131232122); + ASSERT_EQ(OB_SUCCESS, tree.add("j_ofloat_7", &j_ofloat_7)); + ObJsonOFloat j_ofloat_8(2.12213212222); + ASSERT_EQ(OB_SUCCESS, tree.add("j_ofloat_8", &j_ofloat_8)); + + ObJsonOpaque j_opaque_0(ObString("my opaque"), ObObjType::ObVarcharType); + ASSERT_EQ(OB_SUCCESS, tree.add("j_opaque_0", &j_opaque_0)); + + ObTime ob_time_0(DT_TYPE_DATETIME); + ASSERT_EQ(OB_SUCCESS, ObTimeConverter::datetime_to_ob_time(946684800000000, NULL, ob_time_0)); + ObJsonDatetime j_datetime_0(ObJsonNodeType::J_DATETIME, ob_time_0); + ASSERT_EQ(OB_SUCCESS, tree.add("j_datetime_0", &j_datetime_0)); + + ObIJsonBase *j_tree = &tree; + ObIJsonBase *j_bin = &bin; + ASSERT_EQ(OB_SUCCESS, bin.parse_tree(&tree)); + ObJsonBuffer j_tree_buffer(&allocator); + ObJsonBuffer j_bin_buffer(&allocator); + ASSERT_EQ(OB_SUCCESS, j_tree->print(j_tree_buffer, false)); + ASSERT_EQ(OB_SUCCESS, j_bin->print(j_bin_buffer, false)); + ObString j_tree_str = j_tree_buffer.string(); + ObString j_bin_str = j_bin_buffer.string(); + ASSERT_EQ(std::string(j_tree_str.ptr(), j_tree_str.length()), std::string(j_bin_str.ptr(), j_bin_str.length())); + ObJsonBuffer j_tree_buffer_2(&allocator); + ObJsonNode *tree_2 = nullptr; + ASSERT_EQ(OB_SUCCESS, bin.to_tree(tree_2)); + ASSERT_EQ(OB_SUCCESS, tree_2->print(j_tree_buffer_2, false)); + ObString j_tree_str_2 = j_tree_buffer_2.string(); + ASSERT_EQ(std::string(j_tree_str.ptr(), j_tree_str.length()), std::string(j_tree_str_2.ptr(), j_tree_str_2.length())); + + ObJsonBuffer j_bin_buffer_2(&allocator); + ASSERT_EQ(OB_SUCCESS, bin.rebuild(j_bin_buffer_2)); + ObJsonBin bin2; + ObJsonBinCtx bin_ctx_2; + ASSERT_EQ(OB_SUCCESS, bin2.reset(j_bin_buffer_2.string(), 0, &bin_ctx_2)); + ObJsonBuffer j_bin_buffer_2_1(&allocator); + ASSERT_EQ(OB_SUCCESS, bin2.print(j_bin_buffer_2_1, false)); + ObString j_bin_str_2 = j_bin_buffer_2_1.string(); + ASSERT_EQ(std::string(j_tree_str.ptr(), j_tree_str.length()), std::string(j_bin_str_2.ptr(), j_bin_str_2.length())); + +} + +static void json_set(ObIAllocator& allocator, ObString& j_text, std::vector> &updates) +{ + ObIJsonBase *j_bin = nullptr; + ASSERT_EQ(OB_SUCCESS, ObJsonBaseFactory::get_json_base(&allocator, j_text, + ObJsonInType::JSON_TREE, ObJsonInType::JSON_BIN, j_bin)); + ObJsonBin *bin = static_cast(j_bin); + bin->set_seek_flag(false); + ASSERT_EQ(OB_SUCCESS, init_update_ctx(allocator, bin)); + ObJsonBinUpdateCtx &update_ctx = *bin->get_update_ctx(); + common::ObString result; + ASSERT_EQ(OB_SUCCESS, j_bin->get_raw_binary(result, &allocator)); + + ObJsonPathCache path_cache(&allocator); + for (int i = 0 ; i < updates.size(); ++i) { + ObString& path = updates[i].first; + ObString& value = updates[i].second; + ObJsonPath* json_path = nullptr; + ObIJsonBase *j_new_node = nullptr; + int path_idx = path_cache.size(); + ASSERT_EQ(OB_SUCCESS, path_cache.find_and_add_cache(json_path, path, path_idx)); + ASSERT_EQ(path_cache.path_stat_at(path_idx), ObPathParseStat::OK_NOT_NULL); + ObJsonSeekResult hit; + ASSERT_EQ(OB_SUCCESS, j_bin->seek(*json_path, json_path->path_node_cnt(), true, false, hit)); + ASSERT_EQ(1, hit.size()); + ObIJsonBase *j_parent = nullptr; + ObIJsonBase *j_old_node = hit[0]; + ASSERT_EQ(OB_SUCCESS, ObJsonBaseFactory::get_json_base(&allocator, value, + ObJsonInType::JSON_TREE, ObJsonInType::JSON_BIN, j_new_node)); + ASSERT_EQ(OB_SUCCESS, j_old_node->get_parent(j_parent)); + if(OB_NOT_NULL(j_parent)) { + ASSERT_EQ(OB_SUCCESS, j_parent->replace(j_old_node, j_new_node)); + } else { + ASSERT_EQ(OB_SUCCESS, j_bin->replace(j_old_node, j_new_node)); + } + } + check_diff_valid(allocator, result, update_ctx); + check_json_diff_valid(allocator, j_text, update_ctx, 2); +} + +TEST_F(TestJsonBin, test_nested_append_update) +{ + ObArenaAllocator allocator(ObModIds::TEST); + common::ObString j_text("{ \"name\" : \"Mike\", \"age\" : 30, \"like\" : [] , \"name1\" : \"Mike1\", \"name2\" : \"Mike2\",\"name3\" : \"Mike3\",\"name4\" : \"Mike4\",\"name5\" : \"Mike5\"}"); + std::vector> updates; + updates.push_back({ObString("$.like"), ObString("[1, 2, 3]")}); + updates.push_back({ObString("$.like[1]"), ObString("{\"K\":\"V\"}")}); + json_set(allocator, j_text, updates); +} + + +TEST_F(TestJsonBin, test_nested_append_update_2) +{ + ObArenaAllocator allocator(ObModIds::TEST); + common::ObString j_text("{ \"name\" : \"Mike\", \"age\" : 30, \"like\" : [] , \"name1\" : \"Mike1\", \"name2\" : \"Mike2\",\"name3\" : \"Mike3\",\"name4\" : \"Mike4\",\"name5\" : \"Mike5\"}"); + std::vector> updates; + updates.push_back({ObString("$.like"), ObString("[\"x\", [\"a\", \"b\", \"c\"], \"z\"]")}); + updates.push_back({ObString("$.like[1][1]"), ObString("{\"K\":\"V\"}")}); + json_set(allocator, j_text, updates); +} + + +TEST_F(TestJsonBin, test_array_remove) +{ + ObArenaAllocator allocator(ObModIds::TEST); + + common::ObString j_text("{ \"name\" : \"Mike\", \"age\" : 30, \"like\" : [1, 2, 3] }"); + ObIJsonBase *j_bin = nullptr; + ASSERT_EQ(OB_SUCCESS, ObJsonBaseFactory::get_json_base(&allocator, j_text, + ObJsonInType::JSON_TREE, ObJsonInType::JSON_BIN, j_bin)); + ObJsonBin *bin = static_cast(j_bin); + bin->set_seek_flag(false); + ASSERT_EQ(OB_SUCCESS, init_update_ctx(allocator, bin)); + ObJsonBinUpdateCtx &update_ctx = *bin->get_update_ctx(); + + common::ObString result; + ASSERT_EQ(OB_SUCCESS, j_bin->get_raw_binary(result, &allocator)); + + common::ObString path_str("$.like[1]"); + ObJsonPath test_path(path_str, &allocator); + ASSERT_EQ(OB_SUCCESS, test_path.parse_path()); + + ObJsonSeekResult hit; + int cnt = test_path.path_node_cnt(); + ASSERT_EQ(OB_SUCCESS, j_bin->seek(test_path, cnt, false, false, hit)); + ASSERT_EQ(1, hit.size()); + ObIJsonBase *j_child_bin = hit[0]; + ObIJsonBase *j_parent_bin = nullptr; + ASSERT_EQ(OB_SUCCESS, j_child_bin->get_parent(j_parent_bin)); + ASSERT_NE(nullptr, j_parent_bin); + ASSERT_EQ(OB_SUCCESS, j_parent_bin->array_remove(1)); + + ASSERT_FALSE(update_ctx.is_rebuild_all_); + ASSERT_EQ(1, update_ctx.binary_diffs_.count()); + check_diff_valid(allocator, result, update_ctx); + ASSERT_EQ(OB_SUCCESS, bin->lookup(ObString("like"))); + ASSERT_EQ(ObJsonNodeType::J_ARRAY, bin->json_type()); + + ObJsonBuffer j_bin_buffer(&allocator); + ASSERT_EQ(OB_SUCCESS, bin->print(j_bin_buffer, false)); + ASSERT_EQ("[1, 3]", std::string(j_bin_buffer.ptr(), j_bin_buffer.length())); +} + +TEST_F(TestJsonBin, test_array_remove_2) +{ + ObArenaAllocator allocator(ObModIds::TEST); + + common::ObString j_text("[ \"name\" , \"Mike\", \"age\" , 30, \"like\", [1, 2, 3] ]"); + ObIJsonBase *j_bin = nullptr; + ASSERT_EQ(OB_SUCCESS, ObJsonBaseFactory::get_json_base(&allocator, j_text, + ObJsonInType::JSON_TREE, ObJsonInType::JSON_BIN, j_bin)); + ObJsonBin *bin = static_cast(j_bin); + bin->set_seek_flag(false); + ASSERT_EQ(OB_SUCCESS, init_update_ctx(allocator, bin)); + ObJsonBinUpdateCtx &update_ctx = *bin->get_update_ctx(); + + common::ObString result; + ASSERT_EQ(OB_SUCCESS, j_bin->get_raw_binary(result, &allocator)); + ASSERT_EQ(OB_SUCCESS, j_bin->array_remove(1)); + + ASSERT_TRUE(update_ctx.is_rebuild_all_); + ASSERT_EQ(ObJsonNodeType::J_ARRAY, j_bin->json_type()); + + ObJsonBuffer j_bin_buffer(&allocator); + ASSERT_EQ(OB_SUCCESS, bin->print(j_bin_buffer, false)); + ASSERT_EQ("[\"name\", \"age\", 30, \"like\", [1, 2, 3]]", std::string(j_bin_buffer.ptr(), j_bin_buffer.length())); +} + + } // namespace common } // namespace oceanbase int main(int argc, char** argv) { + oceanbase::common::ObLogger::get_logger().set_log_level("INFO"); + OB_LOGGER.set_log_level("INFO"); ::testing::InitGoogleTest(&argc, argv); /* system("rm -f test_json_bin.log"); diff --git a/unittest/share/test_json_schema.cpp b/unittest/share/test_json_schema.cpp new file mode 100644 index 0000000000..a5121b4797 --- /dev/null +++ b/unittest/share/test_json_schema.cpp @@ -0,0 +1,1688 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#include +#define private public +#include "lib/json_type/ob_json_tree.h" +#include "lib/json_type/ob_json_parse.h" +#include "lib/timezone/ob_timezone_info.h" +#include "lib/json_type/ob_json_schema.h" +#undef private + +#include +using namespace std; +namespace oceanbase { +namespace common { + +class TestJsonSchema : public ::testing::Test { +public: + TestJsonSchema() + {} + ~TestJsonSchema() + {} + virtual void SetUp() + {} + virtual void TearDown() + {} + + static void SetUpTestCase() + {} + + static void TearDownTestCase() + {} + +private: + // disallow copy + DISALLOW_COPY_AND_ASSIGN(TestJsonSchema); +}; + +ObString j_schema_to_str("{\"$id\": \"httpexample.com/schemas/customer\", \"type\": \"object\", \"$defs\": {\"name\": {\"type\": \"string\"}}, \"required\": [\"first_name\", \"last_name\"], \"properties\": {\"last_name\": {\"$ref\": \"#/$defs/name\"}, \"first_name\": {\"$ref\": \"#/$defs/name\"}}}"); +TEST_F(TestJsonSchema, test_parse_json_schema_ref) +{ + ObArenaAllocator allocator(ObModIds::TEST); + ObIJsonBase *j_tree = NULL; + ObIJsonBase *j_bin = NULL; + ObJsonSeekResult hit; + ObJsonBuffer j_buf(&allocator); + common::ObString j_text("{\"$id\": \"httpexample.com/schemas/customer\"," + "\"type\": \"object\"," + "\"properties\": {\"first_name\": { \"$ref\": \"#/$defs/name\" }," + "\"last_name\": { \"$ref\": \"#/$defs/name\" }}," + "\"required\": [\"first_name\", \"last_name\"]," + "\"$defs\": {\"name\": { \"type\": \"string\" }}}"); + + ASSERT_EQ(OB_SUCCESS, ObJsonBaseFactory::get_json_base(&allocator, j_text, + ObJsonInType::JSON_TREE, ObJsonInType::JSON_TREE, j_tree, ObJsonParser::JSN_SCHEMA_FLAG)); + ASSERT_EQ(OB_SUCCESS, j_tree->print(j_buf, false)); + ASSERT_EQ(j_schema_to_str, ObString(j_buf.length(), j_buf.ptr())); + common::ObString j_text_wrong("{\"$id\": \"httpexample.com/schemas/customer\"," + "\"type\": \"object\"," + "\"properties\": {\"first_name\": { \"$ref\": \"#/$defs/name\" }," + "\"last_name\": { \"$ref\": \"/$defs/name\" }}," + "\"required\": [\"first_name\", \"last_name\"]," + "\"$defs\": {\"name\": { \"type\": \"string\" }}}"); + ASSERT_EQ(OB_ERR_UNSUPPROTED_REF_IN_JSON_SCHEMA, ObJsonBaseFactory::get_json_base(&allocator, j_text_wrong, + ObJsonInType::JSON_TREE, ObJsonInType::JSON_TREE, j_tree, ObJsonParser::JSN_SCHEMA_FLAG)); +} + +TEST_F(TestJsonSchema, test_parse_json_schema_dup_key) +{ + ObArenaAllocator allocator(ObModIds::TEST); + ObIJsonBase *j_tree = NULL; + ObIJsonBase *j_bin = NULL; + ObJsonSeekResult hit; + ObJsonBuffer j_buf(&allocator); + common::ObString j_text("{\"$id\": \"httpexample.com/schemas/customer\"," + "\"type\": \"object\"," + "\"type\": \"number\"," + "\"properties\": {\"first_name\": { \"$ref\": \"#/$defs/name\" }," + "\"last_name\": { \"$ref\": \"#/$defs/name\" }}," + "\"required\": [\"first_name\", \"last_name\"]," + "\"required\": [\"test_name\"]," + "\"$defs\": {\"name\": { \"type\": \"string\" }}}"); + ASSERT_EQ(OB_SUCCESS, ObJsonBaseFactory::get_json_base(&allocator, j_text, + ObJsonInType::JSON_TREE, ObJsonInType::JSON_TREE, j_tree, ObJsonParser::JSN_SCHEMA_FLAG)); + ASSERT_EQ(OB_SUCCESS,j_tree->print(j_buf, false)); + ASSERT_EQ(j_schema_to_str, ObString(j_buf.length(), j_buf.ptr())); + common::ObString j_text_wrong("{\"$id\": \"httpexample.com/schemas/customer\"," + "\"type\": \"object\"," + "\"properties\": {\"first_name\": { \"$ref\": \"#/$defs/name\" }," + "\"last_name\": { \"$ref\": \"#/$defs/name\" }}," + "\"last_name\": { \"$ref\": \"/$defs/name\" }}," + "\"required\": [\"first_name\", \"last_name\"]," + "\"$defs\": {\"name\": { \"type\": \"string\" }}}"); + ASSERT_EQ(OB_ERR_UNSUPPROTED_REF_IN_JSON_SCHEMA, ObJsonBaseFactory::get_json_base(&allocator, j_text_wrong, + ObJsonInType::JSON_TREE, ObJsonInType::JSON_TREE, j_tree, ObJsonParser::JSN_SCHEMA_FLAG)); +} + +# define STRING_TYPE_COUNT 2 +ObString string_type_str[STRING_TYPE_COUNT] = { + "{\"schema\": {\"type\": 4, \"pattern\": \"^S_\", \"maxLength\": 3, \"minLength\": 2}}", + "{}" +}; +TEST_F(TestJsonSchema, test_parse_string_type) +{ + ObArenaAllocator allocator(ObModIds::TEST); + ObIJsonBase *j_tree = NULL; + ObIJsonBase *j_bin = NULL; + ObJsonSeekResult hit; + ObJsonBuffer j_buf(&allocator); + common::ObString j_text("{\"type\": \"string\",\"pattern\":\"^S_\",\"minLength\": 2,\"maxLength\": 3}"); + ASSERT_EQ(OB_SUCCESS, ObJsonBaseFactory::get_json_base(&allocator, j_text, + ObJsonInType::JSON_TREE, ObJsonInType::JSON_TREE, j_tree, ObJsonParser::JSN_SCHEMA_FLAG)); + ObJsonSchemaTree json_schema(&allocator); + ASSERT_EQ(OB_SUCCESS, json_schema.build_schema_tree(j_tree)); + int schema_count = json_schema.schema_map_->element_count(); + ASSERT_EQ(STRING_TYPE_COUNT, schema_count); + for (int i = 0; i < schema_count; ++i) { + ObIJsonBase *value = nullptr; + ASSERT_EQ(OB_SUCCESS, json_schema.schema_map_->get_array_element(i, value)); + j_buf.reset(); + ASSERT_EQ(OB_SUCCESS, value->print(j_buf, false)); + cout<<"json_schema "<element_count(); + ASSERT_EQ(NUMBER_TYPE_COUNT, schema_count); + for (int i = 0; i < schema_count; ++i) { + ObIJsonBase *value = nullptr; + ASSERT_EQ(OB_SUCCESS, json_schema.schema_map_->get_array_element(i, value)); + j_buf.reset(); + ASSERT_EQ(OB_SUCCESS, value->print(j_buf, false)); + cout<<"json_schema "<element_count(); + ASSERT_EQ(NUMBER_TYPE_COUNT, schema_count); + for (int i = 0; i < schema_count; ++i) { + ObIJsonBase *value = nullptr; + ASSERT_EQ(OB_SUCCESS, json_schema.schema_map_->get_array_element(i, value)); + j_buf.reset(); + ASSERT_EQ(OB_SUCCESS, value->print(j_buf, false)); + cout<<"json_schema "<element_count(); + ASSERT_EQ(NULL_TYPE_COUNT, schema_count); + for (int i = 0; i < schema_count; ++i) { + ObIJsonBase *value = nullptr; + ASSERT_EQ(OB_SUCCESS, json_schema.schema_map_->get_array_element(i, value)); + j_buf.reset(); + ASSERT_EQ(OB_SUCCESS, value->print(j_buf, false)); + cout<<"json_schema "<element_count(); + ASSERT_EQ(NULL_TYPE_COUNT, schema_count); + for (int i = 0; i < schema_count; ++i) { + ObIJsonBase *value = nullptr; + ASSERT_EQ(OB_SUCCESS, json_schema.schema_map_->get_array_element(i, value)); + j_buf.reset(); + ASSERT_EQ(OB_SUCCESS, value->print(j_buf, false)); + cout<<"json_schema "<element_count(); + ASSERT_EQ(PRO_COUNT, schema_count); + for (int i = 0; i < schema_count; ++i) { + ObIJsonBase *value = nullptr; + ASSERT_EQ(OB_SUCCESS, json_schema.schema_map_->get_array_element(i, value)); + j_buf.reset(); + ASSERT_EQ(OB_SUCCESS, value->print(j_buf, false)); + cout<<"json_schema "<element_count(); + ASSERT_EQ(DEP_REUQUIRED_COUNT, schema_count); + for (int i = 0; i < schema_count; ++i) { + ObIJsonBase *value = nullptr; + ASSERT_EQ(OB_SUCCESS, json_schema.schema_map_->get_array_element(i, value)); + j_buf.reset(); + ASSERT_EQ(OB_SUCCESS, value->print(j_buf, false)); + cout<<"json_schema "<element_count(); + ASSERT_EQ(DEP_UNNESTED_COUNT, schema_count); + for (int i = 0; i < schema_count; ++i) { + ObIJsonBase *value = nullptr; + ASSERT_EQ(OB_SUCCESS, json_schema.schema_map_->get_array_element(i, value)); + j_buf.reset(); + ASSERT_EQ(OB_SUCCESS, value->print(j_buf, false)); + cout<<"json_schema "<element_count(); + ASSERT_EQ(DEP_NESTED_COUNT, schema_count); + for (int i = 0; i < schema_count; ++i) { + ObIJsonBase *value = nullptr; + ASSERT_EQ(OB_SUCCESS, json_schema.schema_map_->get_array_element(i, value)); + j_buf.reset(); + ASSERT_EQ(OB_SUCCESS, value->print(j_buf, false)); + cout<<"json_schema "<element_count(); + ASSERT_EQ(PATTERN_COUNT, schema_count); + for (int i = 0; i < schema_count; ++i) { + ObIJsonBase *value = nullptr; + ASSERT_EQ(OB_SUCCESS, json_schema.schema_map_->get_array_element(i, value)); + j_buf.reset(); + ASSERT_EQ(OB_SUCCESS, value->print(j_buf, false)); + cout<<"json_schema "<element_count(); + ASSERT_EQ(PATTERN_AND_PRO, schema_count); + for (int i = 0; i < schema_count; ++i) { + ObIJsonBase *value = nullptr; + ASSERT_EQ(OB_SUCCESS, json_schema.schema_map_->get_array_element(i, value)); + j_buf.reset(); + ASSERT_EQ(OB_SUCCESS, value->print(j_buf, false)); + cout<<"json_schema "<element_count(); + ASSERT_EQ(ADD_PRO_COUNT, schema_count); + for (int i = 0; i < schema_count; ++i) { + ObIJsonBase *value = nullptr; + ASSERT_EQ(OB_SUCCESS, json_schema.schema_map_->get_array_element(i, value)); + j_buf.reset(); + ASSERT_EQ(OB_SUCCESS, value->print(j_buf, false)); + cout<<"json_schema "<element_count(); + ASSERT_EQ(PATTERN_PRO_ADD, schema_count); + for (int i = 0; i < schema_count; ++i) { + ObIJsonBase *value = nullptr; + ASSERT_EQ(OB_SUCCESS, json_schema.schema_map_->get_array_element(i, value)); + j_buf.reset(); + ASSERT_EQ(OB_SUCCESS, value->print(j_buf, false)); + cout<<"json_schema "<element_count(); + ASSERT_EQ(ITEMS_COUNT, schema_count); + for (int i = 0; i < schema_count; ++i) { + ObIJsonBase *value = nullptr; + ASSERT_EQ(OB_SUCCESS, json_schema.schema_map_->get_array_element(i, value)); + j_buf.reset(); + ASSERT_EQ(OB_SUCCESS, value->print(j_buf, false)); + cout<<"json_schema "<element_count(); + ASSERT_EQ(TUPLE_ITEMS_COUNT, schema_count); + for (int i = 0; i < schema_count; ++i) { + ObIJsonBase *value = nullptr; + ASSERT_EQ(OB_SUCCESS, json_schema.schema_map_->get_array_element(i, value)); + j_buf.reset(); + ASSERT_EQ(OB_SUCCESS, value->print(j_buf, false)); + cout<<"json_schema "<element_count(); + ASSERT_EQ(ADD_ITEMS_COUNT, schema_count); + for (int i = 0; i < schema_count; ++i) { + ObIJsonBase *value = nullptr; + ASSERT_EQ(OB_SUCCESS, json_schema.schema_map_->get_array_element(i, value)); + j_buf.reset(); + ASSERT_EQ(OB_SUCCESS, value->print(j_buf, false)); + cout<<"json_schema "<element_count(); + ASSERT_EQ(ALLOF_UNNESTED_COUNT, schema_count); + for (int i = 0; i < schema_count; i++) { + ObIJsonBase *value = nullptr; + ASSERT_EQ(OB_SUCCESS, json_schema.schema_map_->get_array_element(i, value)); + j_buf.reset(); + ASSERT_EQ(OB_SUCCESS, value->print(j_buf, false)); + cout<<"json_schema "<element_count(); + ASSERT_EQ(ALLOF_NESTED_COUNT, schema_count); + for (int i = 0; i < schema_count; i++) { + ObIJsonBase *value = nullptr; + ASSERT_EQ(OB_SUCCESS, json_schema.schema_map_->get_array_element(i, value)); + j_buf.reset(); + ASSERT_EQ(OB_SUCCESS, value->print(j_buf, false)); + cout<<"json_schema "<element_count(); + ASSERT_EQ(NOT_NESTED_COUNT, schema_count); + for (int i = 0; i < schema_count; i++) { + ObIJsonBase *value = nullptr; + ASSERT_EQ(OB_SUCCESS, json_schema.schema_map_->get_array_element(i, value)); + j_buf.reset(); + ASSERT_EQ(OB_SUCCESS, value->print(j_buf, false)); + cout<<"json_schema "<element_count(); + ASSERT_EQ(COUNT_COMPLEX_SCHEMA, schema_count); + for (int i = 0; i < schema_count; i++) { + ObIJsonBase *value = nullptr; + ASSERT_EQ(OB_SUCCESS, json_schema.schema_map_->get_array_element(i, value)); + j_buf.reset(); + ASSERT_EQ(OB_SUCCESS, value->print(j_buf, false)); + cout<<"json_schema "<get_array_element(i, j_value)); + ASSERT_EQ(OB_SUCCESS,schema_validator.schema_validator(j_value, is_valid)); + //cout<get_array_element(i, j_value)); + ASSERT_EQ(OB_SUCCESS,schema_validator.schema_validator(j_value, is_valid)); + //cout<get_array_element(i, j_value)); + ASSERT_EQ(OB_SUCCESS,schema_validator.schema_validator(j_value, is_valid)); + //cout<get_array_element(i, j_value)); + ASSERT_EQ(OB_SUCCESS,schema_validator.schema_validator(j_value, is_valid)); + //cout<get_array_element(i, j_value)); + ASSERT_EQ(OB_SUCCESS,schema_validator.schema_validator(j_value, is_valid)); + //cout<get_array_element(i, j_value)); + ASSERT_EQ(OB_SUCCESS,schema_validator.schema_validator(j_value, is_valid)); + //cout<get_array_element(i, j_value)); + ASSERT_EQ(OB_SUCCESS,schema_validator.schema_validator(j_value, is_valid)); + //cout<get_array_element(i, j_value)); + ASSERT_EQ(OB_SUCCESS,schema_validator.schema_validator(j_value, is_valid)); + //cout<get_array_element(i, j_value)); + ASSERT_EQ(OB_SUCCESS,schema_validator.schema_validator(j_value, is_valid)); + //cout<get_array_element(i, j_value)); + ASSERT_EQ(OB_SUCCESS,schema_validator.schema_validator(j_value, is_valid)); + j_buf.reset(); + j_value->print(j_buf, false); + // cout<element_count(); ++i) { + ObIJsonBase *value = nullptr; + ASSERT_EQ(OB_SUCCESS, json_schema.schema_map_->get_array_element(i, value)); + j_buf.reset(); + ASSERT_EQ(OB_SUCCESS, value->print(j_buf, false)); + cout<<"json_schema "<get_array_element(i, j_value)); + ASSERT_EQ(OB_SUCCESS,schema_validator.schema_validator(j_value, is_valid)); + cout<get_array_element(i, j_value)); + ASSERT_EQ(OB_SUCCESS,schema_validator.schema_validator(j_value, is_valid)); + //cout<get_array_element(i, j_value)); + ASSERT_EQ(OB_SUCCESS, schema_validator.schema_validator(j_value, is_valid)); + //cout<get_array_element(i, j_value)); + ASSERT_EQ(OB_SUCCESS, schema_validator.schema_validator(j_value, is_valid)); + //cout<get_array_element(i, j_value)); + ASSERT_EQ(OB_SUCCESS, schema_validator.schema_validator(j_value, is_valid)); + //cout<get_array_element(i, j_value)); + ASSERT_EQ(OB_SUCCESS, schema_validator.schema_validator(j_value, is_valid)); + //cout<get_array_element(i, j_value)); + ASSERT_EQ(OB_SUCCESS, schema_validator.schema_validator(j_value, is_valid)); + //cout<get_array_element(i, j_value)); + ASSERT_EQ(OB_SUCCESS, schema_validator.schema_validator(j_value, is_valid)); + //cout<get_array_element(i, j_value)); + ASSERT_EQ(OB_SUCCESS, schema_validator.schema_validator(j_value, is_valid)); + //cout<element_count(); ++i) { + ObIJsonBase *value = nullptr; + ASSERT_EQ(OB_SUCCESS, json_schema.schema_map_->get_array_element(i, value)); + j_buf.reset(); + ASSERT_EQ(OB_SUCCESS, value->print(j_buf, false)); + //cout<<"json_schema "<get_array_element(i, j_value)); + ASSERT_EQ(OB_SUCCESS, schema_validator.schema_validator(j_value, is_valid)); + //cout<element_count(); ++i) { + ObIJsonBase *value = nullptr; + ASSERT_EQ(OB_SUCCESS, json_schema.schema_map_->get_array_element(i, value)); + j_buf.reset(); + ASSERT_EQ(OB_SUCCESS, value->print(j_buf, false)); + //cout<<"json_schema "< @@ -889,21 +891,21 @@ TEST_F(TestXmlBin, reader) ObString sub_value3("sub_value3"); ObXmlElement sub1_1(ObMulModeNodeType::M_DOCUMENT, ctx); - sub1_1.set_key(sub_key1); + sub1_1.set_xml_key(sub_key1); sub1_1.set_prefix(sub_value1); ObXmlElement sub1_2(ObMulModeNodeType::M_DOCUMENT, ctx); - sub1_2.set_key(sub_key2); + sub1_2.set_xml_key(sub_key2); sub1_2.set_prefix(sub_value2); ObXmlElement sub1_3(ObMulModeNodeType::M_DOCUMENT, ctx); - sub1_3.set_key(sub_key3); + sub1_3.set_xml_key(sub_key3); sub1_3.set_prefix(sub_value3); ObXmlElement sub1(ObMulModeNodeType::M_DOCUMENT, ctx); - sub1.set_key(key1); + sub1.set_xml_key(key1); sub1.set_prefix(value1); // sub children @@ -914,27 +916,27 @@ TEST_F(TestXmlBin, reader) ObXmlElement sub2(ObMulModeNodeType::M_DOCUMENT, ctx); - sub2.set_key(key2); + sub2.set_xml_key(key2); sub2.set_prefix(value2); ObXmlElement sub3_1(ObMulModeNodeType::M_DOCUMENT, ctx); - sub3_1.set_key(key3); + sub3_1.set_xml_key(key3); sub3_1.set_prefix(value3_1); ObXmlElement sub3_2(ObMulModeNodeType::M_DOCUMENT, ctx); - sub3_2.set_key(key3); + sub3_2.set_xml_key(key3); sub3_2.set_prefix(value3_2); ObXmlElement sub3(ObMulModeNodeType::M_DOCUMENT, ctx); - sub3.set_key(key3); + sub3.set_xml_key(key3); sub3.set_prefix(value3); ObXmlElement sub4(ObMulModeNodeType::M_DOCUMENT, ctx); - sub4.set_key(key4); + sub4.set_xml_key(key4); sub4.set_prefix(value4); ObXmlElement sub5(ObMulModeNodeType::M_DOCUMENT, ctx); - sub5.set_key(key5); + sub5.set_xml_key(key5); sub5.set_prefix(value5); @@ -947,15 +949,15 @@ TEST_F(TestXmlBin, reader) ObString value5_3("value5_3"); ObXmlElement sub5_1(ObMulModeNodeType::M_DOCUMENT, ctx); - sub5_1.set_key(key5_1); + sub5_1.set_xml_key(key5_1); sub5_1.set_prefix(value5_1); ObXmlElement sub5_2(ObMulModeNodeType::M_DOCUMENT, ctx); - sub5_2.set_key(key5_2); + sub5_2.set_xml_key(key5_2); sub5_2.set_prefix(value5_2); ObXmlElement sub5_3(ObMulModeNodeType::M_DOCUMENT, ctx); - sub5_3.set_key(key5_3); + sub5_3.set_xml_key(key5_3); sub5_3.set_prefix(value5_3); ASSERT_EQ(sub5.add_element(&sub5_1), OB_SUCCESS); @@ -964,7 +966,7 @@ TEST_F(TestXmlBin, reader) ObXmlElement element(ObMulModeNodeType::M_DOCUMENT, ctx); - element.set_key(element_key); + element.set_xml_key(element_key); element.set_prefix(element_value); ASSERT_EQ(element.add_element(&sub1), OB_SUCCESS); @@ -1128,6 +1130,7 @@ TEST_F(TestXmlBin, reader) TEST_F(TestXmlBin, test_simple_print_document) { + set_compat_mode(oceanbase::lib::Worker::CompatMode::ORACLE); int ret = 0; ObCollationType type = CS_TYPE_UTF8MB4_GENERAL_CI; common::ObString xml_text( @@ -1599,6 +1602,185 @@ TEST_F(TestXmlBin, read_by_key) } +# define NS_TEST_COUNT 3 +ObString ns_key[NS_TEST_COUNT] = {"", "f", "h"}; +ObString ns_value[NS_TEST_COUNT] = {"ns1", "ns2", "ns3"}; +TEST_F(TestXmlBin, test_add_extend) +{ + int ret = 0; + ObCollationType type = CS_TYPE_UTF8MB4_GENERAL_CI; + common::ObString xml_text( + "" + "" + "" + "" + "" + "" + "" + "" + "" + "" + ""); + ObString xml_text_entend(""); + + // parse xml text + ObArenaAllocator allocator(ObModIds::TEST); + ObXmlDocument* doc = nullptr; + ObMulModeMemCtx* ctx = nullptr; + ASSERT_EQ(ObXmlUtil::create_mulmode_tree_context(&allocator, ctx), OB_SUCCESS); + ret = ObXmlParserUtils::parse_document_text(ctx, xml_text, doc); + ObXmlBin xbin(ctx); + ASSERT_EQ(xbin.parse_tree(doc), 0); + + // seek + ObString str0 = "/a/f:b"; + ObString str1 = "/ns1:a/ns2:b"; + ObString default_ns(ns_value[0]); + ObPathVarObject pass(allocator); + for (int i = 1; i < NS_TEST_COUNT && OB_SUCC(ret); ++i) { + ObDatum* data = static_cast(allocator.alloc(sizeof(ObDatum))); + data = new(data) ObDatum(); + data->set_string(ns_value[i]); // default ns value + ASSERT_EQ(true, OB_NOT_NULL(data)); + ret = pass.add(ns_key[i], data); + } + ObJsonBuffer buf(&allocator); + ObPathExprIter pathiter_bin(&allocator); + pathiter_bin.init(ctx, str0, default_ns, &xbin, &pass, false); + ret = pathiter_bin.open(); + ret = pathiter_bin.path_node_->node_to_string(buf); + ObString str2(buf.ptr()); + ASSERT_EQ(str1, str2); + buf.reset(); + int idx = 0; + ObIMulModeBase* res; + ret = pathiter_bin.get_next_node(res); + ASSERT_EQ(OB_SUCCESS, ret); + res->print(buf, true); + + ObXmlBin* bin_res = static_cast(res); + + // ns_element + // element + ObXmlElement element_ns(ObMulModeNodeType::M_ELEMENT, ctx); + element_ns.init(); + ASSERT_EQ(ObMulModeNodeType::M_ELEMENT, element_ns.type()); + for (int i = 0; i < NS_TEST_COUNT && OB_SUCC(ret); ++i) { + ObXmlAttribute* ns1 = static_cast(allocator.alloc(sizeof(ObXmlAttribute))); + ns1 = new(ns1) ObXmlAttribute(ObMulModeNodeType::M_NAMESPACE, ctx); + ASSERT_EQ(true, OB_NOT_NULL(ns1)); + if (i != 0 ) { + ns1->set_xml_key(ns_key[i]); + } else { + ns1->set_xml_key("xmlns"); + } + ns1->set_value(ns_value[i]); + ASSERT_EQ(OB_SUCCESS, element_ns.add_attribute(ns1)); + } + + ASSERT_EQ(OB_SUCCESS, bin_res->append_extend(&element_ns)); + ASSERT_EQ(true, bin_res->check_extend()); + ASSERT_EQ(true, res->check_extend()); + buf.reset(); + bin_res->print(buf, true); + std::cout<<"extend str :"<" + "" + "" + "" + "" + "" + "" + "" + "" + "" + ""); + ObString xml_text_entend(""); + + // parse xml text + ObArenaAllocator allocator(ObModIds::TEST); + ObXmlDocument* doc = nullptr; + ObMulModeMemCtx* ctx = nullptr; + ASSERT_EQ(ObXmlUtil::create_mulmode_tree_context(&allocator, ctx), OB_SUCCESS); + ret = ObXmlParserUtils::parse_document_text(ctx, xml_text, doc); + ObXmlBin xbin(ctx); + ASSERT_EQ(xbin.parse_tree(doc), 0); + + // seek + ObString str0 = "/a/f:b"; + ObString str1 = "/ns1:a/ns2:b"; + ObString default_ns(ns_value[0]); + ObPathVarObject pass(allocator); + for (int i = 1; i < NS_TEST_COUNT && OB_SUCC(ret); ++i) { + ObDatum* data = static_cast(allocator.alloc(sizeof(ObDatum))); + data = new(data) ObDatum(); + data->set_string(ns_value[i]); // default ns value + ASSERT_EQ(true, OB_NOT_NULL(data)); + ret = pass.add(ns_key[i], data); + } + ObJsonBuffer buf(&allocator); + ObPathExprIter pathiter_bin(&allocator); + pathiter_bin.init(ctx, str0, default_ns, &xbin, &pass, false); + ret = pathiter_bin.open(); + ret = pathiter_bin.path_node_->node_to_string(buf); + ObString str2(buf.ptr()); + ASSERT_EQ(str1, str2); + buf.reset(); + int idx = 0; + ObIMulModeBase* res; + ret = pathiter_bin.get_next_node(res); + ASSERT_EQ(OB_SUCCESS, ret); + res->print(buf, true); + + ObXmlBin* bin_res = static_cast(res); + + // ns_element + // element + ObXmlElement element_ns(ObMulModeNodeType::M_ELEMENT, ctx); + element_ns.init(); + ASSERT_EQ(ObMulModeNodeType::M_ELEMENT, element_ns.type()); + for (int i = 0; i < NS_TEST_COUNT && OB_SUCC(ret); ++i) { + ObXmlAttribute* ns1 = static_cast(allocator.alloc(sizeof(ObXmlAttribute))); + ns1 = new(ns1) ObXmlAttribute(ObMulModeNodeType::M_NAMESPACE, ctx); + ASSERT_EQ(true, OB_NOT_NULL(ns1)); + if (i != 0 ) { + ns1->set_xml_key(ns_key[i]); + } else { + ns1->set_xml_key("xmlns"); + } + ns1->set_value(ns_value[i]); + ASSERT_EQ(OB_SUCCESS, element_ns.add_attribute(ns1)); + } + + ASSERT_EQ(OB_SUCCESS, bin_res->append_extend(&element_ns)); + ASSERT_EQ(true, bin_res->check_extend()); + ASSERT_EQ(true, res->check_extend()); + buf.reset(); + bin_res->print(buf, true); + ObString exptend_res(buf.ptr()); + ASSERT_EQ(exptend_res, xml_text_entend); + + ObXmlBin bin_merge(ctx); + ASSERT_EQ(OB_SUCCESS, bin_res->merge_extend(bin_merge)); + buf.reset(); + ret = bin_merge.print(buf, true); + ASSERT_EQ(OB_SUCCESS, ret); + ObString merge_res(buf.ptr()); + std::cout<<"extend res:"<set_key(emelent1); - element2->set_key(emelent2); + element1->set_xml_key(emelent1); + element2->set_xml_key(emelent2); ObString atr1= "sttr1_name"; ObString atr_value1= "sttr1_value"; - attr1->set_key(atr1); + attr1->set_xml_key(atr1); attr1->set_value(atr_value1); element1->add_element(text1); diff --git a/unittest/share/test_xml_tree.cpp b/unittest/share/test_xml_tree.cpp index cdfd3e249e..e77bb09d17 100644 --- a/unittest/share/test_xml_tree.cpp +++ b/unittest/share/test_xml_tree.cpp @@ -79,7 +79,7 @@ TEST_F(TestXmlNodeBase, test_xml_node_init) element1.init(); ASSERT_EQ(ObMulModeNodeType::M_ELEMENT, element1.type()); ObString emelent1= "emelent1"; - element1.set_key(emelent1); + element1.set_xml_key(emelent1); ASSERT_EQ(element1.get_key(), "emelent1"); ASSERT_EQ(element1.get_key(), "emelent1"); ObString cdata1= "cdata1"; @@ -87,15 +87,15 @@ TEST_F(TestXmlNodeBase, test_xml_node_init) ASSERT_EQ(cdata.get_text(), "cdata1"); ObString key1= "key1"; ObString value1= "value1"; - pi.set_key(key1); + pi.set_xml_key(key1); pi.set_value(value1); ObString n1= "n1"; ObString ns1= "namespace1"; - ns.set_key(n1); + ns.set_xml_key(n1); ns.set_value(ns1); ObString atr1= "sttr1_name"; ObString atr_value1= "sttr1_value"; - attr.set_key(atr1); + attr.set_xml_key(atr1); attr.set_value(atr_value1); attr.set_ns(&ns); ObString val_ns; @@ -136,30 +136,30 @@ TEST_F(TestXmlNodeBase, test_xml_node_element_add_child) ASSERT_EQ(ObMulModeNodeType::M_ELEMENT, element1.type()); ObString emelent1= "emelent1"; ObString emelent2= "emelent2"; - element1.set_key(emelent1); - element2.set_key(emelent2); + element1.set_xml_key(emelent1); + element2.set_xml_key(emelent2); ASSERT_EQ(element1.get_key(), "emelent1"); ObString cdata1= "cdata1"; cdata.set_text(cdata1); ASSERT_EQ(cdata.get_text(), "cdata1"); ObString key1= "key1"; ObString value1= "value1"; - pi.set_key(key1); + pi.set_xml_key(key1); pi.set_value(value1); ObString n1= "n1"; ObString ns1= "namespace1"; - ns.set_key(n1); + ns.set_xml_key(n1); ns.set_value(ns1); ObString atr1= "sttr1_name"; ObString atr_value1= "sttr1_value"; - attr1.set_key(atr1); + attr1.set_xml_key(atr1); attr1.set_prefix(n1); attr1.set_value(atr_value1); attr1.set_ns(&ns); ObString atr2= "sttr2_name"; ObString atr_value2= "sttr2_value"; - attr2.set_key(atr2); + attr2.set_xml_key(atr2); attr2.set_value(atr_value2); attr2.set_ns(&ns); ObString val_ns; @@ -216,27 +216,27 @@ TEST_F(TestXmlNodeBase, test_xml_node_element_ns_valid) element1.init(); element2.init(); - element1.set_key(emelent1); - element2.set_key(emelent2); + element1.set_xml_key(emelent1); + element2.set_xml_key(emelent2); ObString n1= "n1"; ObString ns1= "namespace1"; - ns.set_key(n1); + ns.set_xml_key(n1); ns.set_value(ns1); ObString n2= "n2"; ObString nstr2= "namespace2"; - ns2.set_key(n2); + ns2.set_xml_key(n2); ns2.set_value(nstr2); ObString prefix = "n2"; ObString atr1= "sttr1_name"; ObString atr_value1= "sttr1_value"; - attr1.set_key(atr1); + attr1.set_xml_key(atr1); attr1.set_prefix(prefix); attr1.set_value(atr_value1); attr1.set_ns(&ns); ObString atr2= "sttr2_name"; ObString atr_value2= "sttr2_value"; - attr2.set_key(atr2); + attr2.set_xml_key(atr2); attr2.set_value(atr_value2); attr2.set_ns(&ns); element1.add_attribute(&ns); @@ -316,27 +316,27 @@ TEST_F(TestXmlNodeBase, test_path_interface) ASSERT_EQ(true, ObXmlUtil::is_element(element1.type())); ASSERT_EQ(element3_str, element3.get_key()); ObString emelent1= "emelent1"; - element1.set_key(emelent1); + element1.set_xml_key(emelent1); ASSERT_EQ(true, element1.is_element(emelent1)); ObString cdata1= "cdata1"; cdata.set_text(cdata1); ASSERT_EQ(true, ObXmlUtil::is_text(cdata.type())); ObString key1= "key1"; ObString value1= "value1"; - pi.set_key(key1); + pi.set_xml_key(key1); pi.set_value(value1); ASSERT_EQ(true, ObXmlUtil::is_pi(pi.type())); ASSERT_EQ(true, pi.is_pi(key1)); ObString n1= "n1"; ObString ns1= "namespace1"; - ns.set_key(n1); + ns.set_xml_key(n1); ns.set_value(ns1); ASSERT_EQ(false, ObXmlUtil::is_node(ns.type())); ASSERT_EQ(true, ObXmlUtil::is_comment(comment.type())); ObString atr1= "sttr1_name"; ObString atr_value1= "sttr1_value"; - attr1.set_key(atr1); + attr1.set_xml_key(atr1); attr1.set_prefix(n1); attr1.set_value(atr_value1); attr1.set_ns(&ns); @@ -677,20 +677,20 @@ TEST_F(TestXmlNodeBase, test_xml_node_element_add_well_from) ObString emelent1= "emelent1"; ObString emelent2= "emelent2"; ASSERT_EQ(content.attribute_size(), 0); - element1.set_key(emelent1); - element2.set_key(emelent2); + element1.set_xml_key(emelent1); + element2.set_xml_key(emelent2); ASSERT_EQ(element1.get_key(), "emelent1"); ObString cdata1= "cdata1"; cdata.set_text(cdata1); ASSERT_EQ(cdata.get_text(), "cdata1"); ObString key1= "key1"; ObString value1= "value1"; - pi.set_key(key1); + pi.set_xml_key(key1); pi.set_value(value1); ObString atr1= "sttr1_name"; ObString atr_value1= "sttr1_value"; - attr1.set_key(atr1); + attr1.set_xml_key(atr1); attr1.set_value(atr_value1); diff --git a/unittest/share/test_xml_tree_base.cpp b/unittest/share/test_xml_tree_base.cpp index 244512ae70..922ab8bdbc 100644 --- a/unittest/share/test_xml_tree_base.cpp +++ b/unittest/share/test_xml_tree_base.cpp @@ -99,15 +99,14 @@ public: return ret; } - int get_ns_value(ObStack& stk, ObString &ns_value) + int get_ns_value(ObStack& stk, ObString &ns_value, ObIMulModeBase* extend) { return 0; } - int get_ns_value(const ObString& prefix, ObString& ns_value) { + int get_ns_value(const ObString& prefix, ObString& ns_value, int& ans_idx) { return 0; } - bool is_equal_node(const ObIMulModeBase* other) { return false; } @@ -116,6 +115,9 @@ public: return false; } + virtual bool check_extend() { return false; } + virtual bool check_if_defined_ns() { return false; } + int get_value(ObString& value, int64_t index = -1) override { INIT_SUCC(ret); @@ -139,6 +141,10 @@ public: virtual int64_t attribute_size() { return 0; } virtual int64_t attribute_count() { return 0; } + virtual uint16_t get_encoding_flag() {return 0;}; + virtual uint16_t has_xml_decl() {return 0;}; + virtual uint16_t is_unparse() {return 0;}; + virtual ObIMulModeBase* get_attribute_handle() {return nullptr;} ObString get_version() { return ObString(); } ObString get_prefix() { return ObString(); } ObString get_encoding() { return ObString(); } @@ -878,21 +884,21 @@ TEST_F(TestXmlTreeBase, reader) ASSERT_EQ(ObXmlUtil::create_mulmode_tree_context(&allocator, ctx), OB_SUCCESS); ObXmlElement sub1_1(ObMulModeNodeType::M_DOCUMENT, ctx); - sub1_1.set_key(sub_key1); + sub1_1.set_xml_key(sub_key1); sub1_1.set_prefix(sub_value1); ObXmlElement sub1_2(ObMulModeNodeType::M_DOCUMENT, ctx); - sub1_2.set_key(sub_key2); + sub1_2.set_xml_key(sub_key2); sub1_2.set_prefix(sub_value2); ObXmlElement sub1_3(ObMulModeNodeType::M_DOCUMENT, ctx); - sub1_3.set_key(sub_key3); + sub1_3.set_xml_key(sub_key3); sub1_3.set_prefix(sub_value3); ObXmlElement sub1(ObMulModeNodeType::M_DOCUMENT, ctx); - sub1.set_key(key1); + sub1.set_xml_key(key1); sub1.set_prefix(value1); // sub children @@ -903,32 +909,32 @@ TEST_F(TestXmlTreeBase, reader) ObXmlElement sub2(ObMulModeNodeType::M_DOCUMENT, ctx); - sub2.set_key(key2); + sub2.set_xml_key(key2); sub2.set_prefix(value2); ObXmlElement sub3_1(ObMulModeNodeType::M_DOCUMENT, ctx); - sub3_1.set_key(key3); + sub3_1.set_xml_key(key3); sub3_1.set_prefix(value3_1); ObXmlElement sub3_2(ObMulModeNodeType::M_DOCUMENT, ctx); - sub3_2.set_key(key3); + sub3_2.set_xml_key(key3); sub3_2.set_prefix(value3_2); ObXmlElement sub3(ObMulModeNodeType::M_DOCUMENT, ctx); - sub3.set_key(key3); + sub3.set_xml_key(key3); sub3.set_prefix(value3); ObXmlElement sub4(ObMulModeNodeType::M_DOCUMENT, ctx); - sub4.set_key(key4); + sub4.set_xml_key(key4); sub4.set_prefix(value4); ObXmlElement sub5(ObMulModeNodeType::M_DOCUMENT, ctx); - sub5.set_key(key5); + sub5.set_xml_key(key5); sub5.set_prefix(value5); ObXmlElement element(ObMulModeNodeType::M_DOCUMENT, ctx); - sub5.set_key(element_key); + sub5.set_xml_key(element_key); sub5.set_prefix(element_value); ASSERT_EQ(element.add_element(&sub1), OB_SUCCESS); @@ -1098,22 +1104,22 @@ TEST_F(TestXmlTreeBase, lazy_sort) ASSERT_EQ(ObXmlUtil::create_mulmode_tree_context(&allocator, ctx), OB_SUCCESS); ObXmlElement sub1_1(ObMulModeNodeType::M_DOCUMENT, ctx); - sub1_1.set_key(sub_key1); + sub1_1.set_xml_key(sub_key1); sub1_1.set_prefix(sub_value1); ObXmlElement sub1_2(ObMulModeNodeType::M_DOCUMENT, ctx); - sub1_2.set_key(sub_key2); + sub1_2.set_xml_key(sub_key2); sub1_2.set_prefix(sub_value2); ObXmlElement sub1_3(ObMulModeNodeType::M_DOCUMENT, ctx); - sub1_3.set_key(sub_key3); + sub1_3.set_xml_key(sub_key3); sub1_3.set_prefix(sub_value3); ObXmlElement sub1(ObMulModeNodeType::M_DOCUMENT, ctx); sub1.alter_member_sort_policy(false); - sub1.set_key(key1); + sub1.set_xml_key(key1); sub1.set_prefix(value1); // sub children @@ -1129,33 +1135,33 @@ TEST_F(TestXmlTreeBase, lazy_sort) ObXmlElement sub2(ObMulModeNodeType::M_DOCUMENT, ctx); - sub2.set_key(key2); + sub2.set_xml_key(key2); sub2.set_prefix(value2); ObXmlElement sub3_1(ObMulModeNodeType::M_DOCUMENT, ctx); - sub3_1.set_key(key3); + sub3_1.set_xml_key(key3); sub3_1.set_prefix(value3_1); ObXmlElement sub3_2(ObMulModeNodeType::M_DOCUMENT, ctx); - sub3_2.set_key(key3); + sub3_2.set_xml_key(key3); sub3_2.set_prefix(value3_2); ObXmlElement sub3(ObMulModeNodeType::M_DOCUMENT, ctx); - sub3.set_key(key3); + sub3.set_xml_key(key3); sub3.set_prefix(value3); ObXmlElement sub4(ObMulModeNodeType::M_DOCUMENT, ctx); - sub4.set_key(key4); + sub4.set_xml_key(key4); sub4.set_prefix(value4); ObXmlElement sub5(ObMulModeNodeType::M_DOCUMENT, ctx); - sub5.set_key(key5); + sub5.set_xml_key(key5); sub5.set_prefix(value5); ObXmlElement element(ObMulModeNodeType::M_DOCUMENT, ctx); element.alter_member_sort_policy(false); - sub5.set_key(element_key); + sub5.set_xml_key(element_key); sub5.set_prefix(element_value); ASSERT_EQ(element.add_element(&sub5), OB_SUCCESS); diff --git a/unittest/share/test_xpath.cpp b/unittest/share/test_xpath.cpp index 07d19ce840..4b7a8dce41 100644 --- a/unittest/share/test_xpath.cpp +++ b/unittest/share/test_xpath.cpp @@ -712,6 +712,7 @@ ObString seek_suite_ellipsis[2] = {"name=\"b\"", "name=\"d\""}; TEST_F(TestXPath, test_seek_suite_ellipsis) // tested { // 用于解析 + set_compat_mode(oceanbase::lib::Worker::CompatMode::ORACLE); ObString str0 = "//@name"; ObString xml_text(""); int ret = OB_SUCCESS; @@ -2626,7 +2627,7 @@ TEST_F(TestXPath, test_seek_root_ancestor) // tested ASSERT_EQ(OB_SUCCESS, ret); ObPathExprIter pathiter(&allocator); - pathiter.init(ctx,str0, default_ns, doc, &pass); + pathiter.init(ctx, str0, default_ns, doc, &pass); // 解析 ret = pathiter.open(); ASSERT_EQ(OB_SUCCESS, ret); @@ -2696,6 +2697,490 @@ TEST_F(TestXPath, test_seek_root_parent) // tested ASSERT_EQ(OB_ITER_END, ret); } +# define NS_TEST_COUNT 3 +# define ADD_NS_RES_COUNT 6 +ObString ns_key[NS_TEST_COUNT] = {"", "f", "h"}; +ObString ns_value[NS_TEST_COUNT] = {"ns1", "ns2", "ns3"}; +TEST_F(TestXPath, test_basic_add_ns) +{ + set_compat_mode(oceanbase::lib::Worker::CompatMode::ORACLE); + int ret = OB_SUCCESS; + ObArenaAllocator allocator(ObModIds::TEST); + ObString xml_text( + "" + "" + "" + "" + "" + "" + "" + "" + "" + "" + ""); + ObString xml_with_entend(""); + ObXmlDocument* doc = nullptr; + ObMulModeMemCtx* ctx = nullptr; + ASSERT_EQ(ObXmlUtil::create_mulmode_tree_context(&allocator, ctx), OB_SUCCESS); + ret = ObXmlParserUtils::parse_document_text(ctx, xml_text, doc); + ASSERT_EQ(OB_SUCCESS, ret); + ObXmlBin xbin(ctx); + ASSERT_EQ(xbin.parse_tree(doc), OB_SUCCESS); + + // seek + ObString str0 = "/a/f:b"; + ObString str1 = "/ns1:a/ns2:b"; + ObString default_ns(ns_value[0]); + ObPathVarObject pass(allocator); + for (int i = 1; i < NS_TEST_COUNT && OB_SUCC(ret); ++i) { + ObDatum* data = static_cast(allocator.alloc(sizeof(ObDatum))); + data = new(data) ObDatum(); + data->set_string(ns_value[i]); // default ns value + ASSERT_EQ(true, OB_NOT_NULL(data)); + ret = pass.add(ns_key[i], data); + } + ObJsonBuffer buf(&allocator); + ObPathExprIter pathiter_bin(&allocator); + pathiter_bin.init(ctx, str0, default_ns, &xbin, &pass); + ret = pathiter_bin.open(); + ret = pathiter_bin.path_node_->node_to_string(buf); + ObString str2(buf.ptr()); + ASSERT_EQ(str1, str2); + buf.reset(); + int idx = 0; + ObIMulModeBase* res; + int i = 0; + while (OB_SUCC(ret)) { + buf.reset(); + ObIMulModeBase* res; + ret = pathiter_bin.get_next_node(res); + if (i < 1) { + ASSERT_EQ(OB_SUCCESS, ret); + res->print(buf,true); + ObString s(buf.ptr()); + ASSERT_EQ(s, xml_with_entend); + } else { + ASSERT_EQ(OB_ITER_END, ret); + } + ++i; + } +} +ObString add_ns_res[ADD_NS_RES_COUNT] = { + "", + "", + "", + "", + "", + ""}; +TEST_F(TestXPath, test_add_ns) +{ + int ret = OB_SUCCESS; + ObArenaAllocator allocator(ObModIds::TEST); + ObString xml_text( + "" + "" + "" + "" + "" + "" + "" + "" + "" + "" + ""); + ObXmlDocument* doc = nullptr; + ObMulModeMemCtx* ctx = nullptr; + ASSERT_EQ(ObXmlUtil::create_mulmode_tree_context(&allocator, ctx), OB_SUCCESS); + ret = ObXmlParserUtils::parse_document_text(ctx, xml_text, doc); + ASSERT_EQ(OB_SUCCESS, ret); + ObXmlBin xbin(ctx); + ASSERT_EQ(xbin.parse_tree(doc), OB_SUCCESS); + + // seek + ObString str0 = "//*"; + ObString str1 = "//*"; + ObString default_ns(ns_value[0]); + ObPathVarObject pass(allocator); + for (int i = 1; i < NS_TEST_COUNT && OB_SUCC(ret); ++i) { + ObDatum* data = static_cast(allocator.alloc(sizeof(ObDatum))); + data = new(data) ObDatum(); + data->set_string(ns_value[i]); // default ns value + ASSERT_EQ(true, OB_NOT_NULL(data)); + ret = pass.add(ns_key[i], data); + } + ObJsonBuffer buf(&allocator); + ObPathExprIter pathiter_bin(&allocator); + pathiter_bin.init(ctx, str0, default_ns, &xbin, &pass, true); + ret = pathiter_bin.open(); + ret = pathiter_bin.path_node_->node_to_string(buf); + ObString str2(buf.ptr()); + ASSERT_EQ(str1, str2); + buf.reset(); + int idx = 0; + ObIMulModeBase* res; + int i = 0; + while (OB_SUCC(ret)) { + buf.reset(); + ObIMulModeBase* res; + ret = pathiter_bin.get_next_node(res); + if (i < ADD_NS_RES_COUNT) { + ASSERT_EQ(OB_SUCCESS, ret); + res->print(buf,true); + ObString s(buf.ptr()); + std::cout<" + "" + "" + "" + "" + "" + "" + "" + "" + "" + ""); + ObXmlDocument* doc = nullptr; + ObMulModeMemCtx* ctx = nullptr; + ASSERT_EQ(ObXmlUtil::create_mulmode_tree_context(&allocator, ctx), OB_SUCCESS); + ret = ObXmlParserUtils::parse_document_text(ctx, xml_text, doc); + ASSERT_EQ(OB_SUCCESS, ret); + ObXmlBin xbin(ctx); + ASSERT_EQ(xbin.parse_tree(doc), OB_SUCCESS); + + // seek + ObString str0 = "//*"; + ObString str1 = "//*"; + ObString default_ns(ns_value[0]); + ObPathVarObject pass(allocator); + for (int i = 1; i < NS_TEST_COUNT && OB_SUCC(ret); ++i) { + ObDatum* data = static_cast(allocator.alloc(sizeof(ObDatum))); + data = new(data) ObDatum(); + data->set_string(ns_value[i]); // default ns value + ASSERT_EQ(true, OB_NOT_NULL(data)); + ret = pass.add(ns_key[i], data); + } + ObJsonBuffer buf(&allocator); + ObPathExprIter pathiter_bin(&allocator); + pathiter_bin.init(ctx, str0, default_ns, &xbin, &pass, true); + ret = pathiter_bin.open(); + ret = pathiter_bin.path_node_->node_to_string(buf); + ObString str2(buf.ptr()); + ASSERT_EQ(str1, str2); + buf.reset(); + int idx = 0; + ObIMulModeBase* res; + int i = 0; + while (OB_SUCC(ret)) { + buf.reset(); + ObIMulModeBase* res; + ret = pathiter_bin.get_next_node(res); + if (i == 0) { // first ans without extend + ASSERT_EQ(OB_SUCCESS, ret); + res->print(buf,true); + ObString s(buf.ptr()); + std::cout<(res); + ASSERT_EQ(bin_res->check_extend(), true); + ObXmlBin merge_res(ctx); + ASSERT_EQ(OB_SUCCESS, bin_res->merge_extend(merge_res)); + merge_res.print(buf, true); + ObString s(buf.ptr()); + std::cout<", + "", + "", + "", + "", + ""}; +TEST_F(TestXPath, test_add_prefix_ns_and_merge) +{ + int ret = OB_SUCCESS; + ObArenaAllocator allocator(ObModIds::TEST); + ObString xml_text( + "" + "" + "" + "" + "" + "" + "" + "" + "" + "" + ""); + ObXmlDocument* doc = nullptr; + ObMulModeMemCtx* ctx = nullptr; + ASSERT_EQ(ObXmlUtil::create_mulmode_tree_context(&allocator, ctx), OB_SUCCESS); + ret = ObXmlParserUtils::parse_document_text(ctx, xml_text, doc); + ASSERT_EQ(OB_SUCCESS, ret); + ObXmlBin xbin(ctx); + ASSERT_EQ(xbin.parse_tree(doc), OB_SUCCESS); + + // seek + ObString str0 = "//*"; + ObString str1 = "//*"; + ObString default_ns(ns_value[0]); + ObPathVarObject pass(allocator); + for (int i = 1; i < NS_TEST_COUNT && OB_SUCC(ret); ++i) { + ObDatum* data = static_cast(allocator.alloc(sizeof(ObDatum))); + data = new(data) ObDatum(); + data->set_string(ns_value[i]); // default ns value + ASSERT_EQ(true, OB_NOT_NULL(data)); + ret = pass.add(ns_key[i], data); + } + ObJsonBuffer buf(&allocator); + ObPathExprIter pathiter_bin(&allocator); + pathiter_bin.init(ctx, str0, default_ns, &xbin, &pass, true); + ret = pathiter_bin.open(); + ret = pathiter_bin.path_node_->node_to_string(buf); + ObString str2(buf.ptr()); + ASSERT_EQ(str1, str2); + buf.reset(); + int idx = 0; + ObIMulModeBase* res; + int i = 0; + while (OB_SUCC(ret)) { + buf.reset(); + ObIMulModeBase* res; + ret = pathiter_bin.get_next_node(res); + if (i == 0) { // first ans without extend + ASSERT_EQ(OB_SUCCESS, ret); + res->print(buf,true); + ObString s(buf.ptr()); + std::cout<(res); + ASSERT_EQ(bin_res->check_extend(), true); + ObXmlBin merge_res(ctx); + ASSERT_EQ(OB_SUCCESS, bin_res->merge_extend(merge_res)); + ASSERT_EQ(OB_SUCCESS, merge_res.print(buf, true)); + ObString s(buf.ptr()); + std::cout<", + "", + "", + "", + "", + ""}; +TEST_F(TestXPath, test_new_prefix_ns_and_merge) +{ + int ret = OB_SUCCESS; + ObArenaAllocator allocator(ObModIds::TEST); + ObString xml_text( + "" + "" + "" + "" + "" + "" + "" + "" + "" + "" + ""); + ObXmlDocument* doc = nullptr; + ObMulModeMemCtx* ctx = nullptr; + ASSERT_EQ(ObXmlUtil::create_mulmode_tree_context(&allocator, ctx), OB_SUCCESS); + ret = ObXmlParserUtils::parse_document_text(ctx, xml_text, doc); + ASSERT_EQ(OB_SUCCESS, ret); + ObXmlBin xbin(ctx); + ASSERT_EQ(xbin.parse_tree(doc), OB_SUCCESS); + + // seek + ObString str0 = "//*"; + ObString str1 = "//*"; + ObString default_ns(ns_value[0]); + ObPathVarObject pass(allocator); + for (int i = 1; i < NS_TEST_COUNT && OB_SUCC(ret); ++i) { + ObDatum* data = static_cast(allocator.alloc(sizeof(ObDatum))); + data = new(data) ObDatum(); + data->set_string(ns_value[i]); // default ns value + ASSERT_EQ(true, OB_NOT_NULL(data)); + ret = pass.add(ns_key[i], data); + } + ObJsonBuffer buf(&allocator); + ObPathExprIter pathiter_bin(&allocator); + pathiter_bin.init(ctx, str0, default_ns, &xbin, &pass, true); + ret = pathiter_bin.open(); + ret = pathiter_bin.path_node_->node_to_string(buf); + ObString str2(buf.ptr()); + ASSERT_EQ(str1, str2); + buf.reset(); + int idx = 0; + ObIMulModeBase* res; + int i = 0; + while (OB_SUCC(ret)) { + buf.reset(); + ObIMulModeBase* res; + ret = pathiter_bin.get_next_node(res); + if (i == 0) { // first ans without extend + ASSERT_EQ(OB_SUCCESS, ret); + res->print(buf,true); + ObString s(buf.ptr()); + std::cout<(res); + ASSERT_EQ(bin_res->check_extend(), true); + ObXmlBin merge_res(ctx); + ASSERT_EQ(OB_SUCCESS, bin_res->merge_extend(merge_res)); + merge_res.print(buf, true); + ObString s(buf.ptr()); + std::cout<gggcccdddddd", + "ggg", + "ggg", + "ggg", + "ggg", + "ggg", + "ggg", + "ccc", + "ccc", + "ccc", + "", + "ddd", + "ddd", + "ddd", + "ddd", + "ddd", + "", + "ddd", + "ddd", + "ddd"}; +TEST_F(TestXPath, test_merge_ns) +{ + int ret = OB_SUCCESS; + ObArenaAllocator allocator(ObModIds::TEST); + ObString xml_text( + "" + "" + "" + "" + " ggg" + "" + "" + "" + "" + "ccc" + "" + "" + "" + "" + "ddd" + "" + "" + "" + "" + "" + "ddd" + "" + "" + ""); + ObXmlDocument* doc = nullptr; + ObMulModeMemCtx* ctx = nullptr; + ASSERT_EQ(ObXmlUtil::create_mulmode_tree_context(&allocator, ctx), OB_SUCCESS); + ret = ObXmlParserUtils::parse_document_text(ctx, xml_text, doc); + ASSERT_EQ(OB_SUCCESS, ret); + ObXmlBin xbin(ctx); + ASSERT_EQ(xbin.parse_tree(doc), OB_SUCCESS); + + // seek + ObString str0 = "//node()"; + ObString str1 = "//node()"; + ObString default_ns(ns_value[0]); + ObPathVarObject pass(allocator); + for (int i = 1; i < NS_TEST_COUNT && OB_SUCC(ret); ++i) { + ObDatum* data = static_cast(allocator.alloc(sizeof(ObDatum))); + data = new(data) ObDatum(); + data->set_string(ns_value[i]); // default ns value + ASSERT_EQ(true, OB_NOT_NULL(data)); + ret = pass.add(ns_key[i], data); + } + ObJsonBuffer buf(&allocator); + ObPathExprIter pathiter_bin(&allocator); + pathiter_bin.init(ctx, str0, default_ns, &xbin, &pass, true); + ret = pathiter_bin.open(); + ret = pathiter_bin.path_node_->node_to_string(buf); + ObString str2(buf.ptr()); + ASSERT_EQ(str1, str2); + buf.reset(); + int idx = 0; + ObIMulModeBase* res; + int i = 0; + while (OB_SUCC(ret)) { + buf.reset(); + ObIMulModeBase* res; + ret = pathiter_bin.get_next_node(res); + if (i < MERGE_NS_RES_COUNT) { + ASSERT_EQ(OB_SUCCESS, ret); + ObXmlBin* bin_res = static_cast(res); + if (bin_res->check_extend()) { + ObXmlBin merge_res(ctx); + ASSERT_EQ(OB_SUCCESS, bin_res->merge_extend(merge_res)); + merge_res.print(buf, true); + } else { + bin_res->print(buf, true); + } + ObString s(buf.ptr()); + std::cout<