diff --git a/deps/oblib/src/lib/json_type/ob_json_bin.cpp b/deps/oblib/src/lib/json_type/ob_json_bin.cpp index 97d4b05b6f..27bba867ce 100644 --- a/deps/oblib/src/lib/json_type/ob_json_bin.cpp +++ b/deps/oblib/src/lib/json_type/ob_json_bin.cpp @@ -986,7 +986,7 @@ int ObJsonBin::to_tree(ObJsonNode *&json_tree) return ret; } -int ObJsonBin::deserialize_json_value(const char *data, +int ObJsonBin:: deserialize_json_value(const char *data, uint64_t length, uint8_t type, uint64_t value_offset, @@ -1484,7 +1484,7 @@ int ObJsonBin::deserialize_json_object_v0(const char *data, uint64_t length, ObJ ObJsonNode *node = NULL; ret = deserialize_json_value(val, length - value_offset, val_type, value_offset, node, type); if (OB_SUCC(ret)) { - if (OB_FAIL(object->add(key, node))) { + if (OB_FAIL(object->add(key, node, false, true, false))) { LOG_WARN("failed to add node to obj", K(ret)); } } else { diff --git a/deps/oblib/src/lib/json_type/ob_json_parse.cpp b/deps/oblib/src/lib/json_type/ob_json_parse.cpp index cd1b443fb1..2b1db81cf5 100644 --- a/deps/oblib/src/lib/json_type/ob_json_parse.cpp +++ b/deps/oblib/src/lib/json_type/ob_json_parse.cpp @@ -256,7 +256,7 @@ bool ObRapidJsonHandler::seeing_value(ObJsonNode *value) INIT_SUCC(ret); next_state_ = ObJsonExpectNextState::EXPECT_OBJECT_KEY; ObJsonObject *object = dynamic_cast(current_element_); - if (OB_FAIL(object->add(key_, value, with_unique_key_))) { + if (OB_FAIL(object->add(key_, value, with_unique_key_, true, false))) { LOG_WARN("fail to add element to json object", K(ret)); if (ret == OB_ERR_DUPLICATE_KEY) { with_duplicate_key_ = true; @@ -317,7 +317,13 @@ bool ObRapidJsonHandler::is_end_object_or_array() // Sort the key-value pairs of the ObJsonObject at the current level. ObJsonObject *obj = static_cast(current_element_); obj->update_serialize_size(); - obj->sort(); + obj->stable_sort(); + int64_t origin_num = obj->element_count(); + obj->unique(); + if (with_unique_key_ && obj->element_count() < origin_num) { + is_continue = false; + with_duplicate_key_ = true; + } } else { // current is array current_element_->update_serialize_size(); } diff --git a/deps/oblib/src/lib/json_type/ob_json_tree.cpp b/deps/oblib/src/lib/json_type/ob_json_tree.cpp index 53b693af6a..e625782a6d 100644 --- a/deps/oblib/src/lib/json_type/ob_json_tree.cpp +++ b/deps/oblib/src/lib/json_type/ob_json_tree.cpp @@ -669,7 +669,7 @@ int ObJsonObject::replace(const ObJsonNode *old_node, ObJsonNode *new_node) // When constructing a JSON tree, if two keys have the same value, // the latter one will overwrite the former one -int ObJsonObject::add(const common::ObString &key, ObJsonNode *value, bool with_unique_key) +int ObJsonObject::add(const common::ObString &key, ObJsonNode *value, bool with_unique_key, bool is_lazy_sort, bool need_overwrite) { INIT_SUCC(ret); @@ -680,20 +680,28 @@ int ObJsonObject::add(const common::ObString &key, ObJsonNode *value, bool with_ value->set_parent(this); ObJsonObjectPair pair(key, value); - ObJsonKeyCompare cmp; - ObJsonObjectArray::iterator low_iter = std::lower_bound(object_array_.begin(), - object_array_.end(), pair, cmp); - if (low_iter != object_array_.end() && low_iter->get_key() == key) { // Found and covered - if (with_unique_key) { - ret = OB_ERR_DUPLICATE_KEY; - LOG_WARN("Found duplicate key inserted before!", K(key), K(ret)); - } else { - low_iter->set_value(value); + if (need_overwrite) { + ObJsonKeyCompare cmp; + ObJsonObjectArray::iterator low_iter = std::lower_bound(object_array_.begin(), + object_array_.end(), pair, cmp); + if (low_iter != object_array_.end() && low_iter->get_key() == key) { // Found and covered + if (with_unique_key) { + ret = OB_ERR_DUPLICATE_KEY; + LOG_WARN("Found duplicate key inserted before!", K(key), K(ret)); + } else { + low_iter->set_value(value); + } + } else if (OB_FAIL(object_array_.push_back(pair))) { + LOG_WARN("failed to store in object array.", K(ret)); + } else if (!is_lazy_sort) { + sort(); + } + } else { + if (OB_FAIL(object_array_.push_back(pair))) { + LOG_WARN("failed to store in object array.", K(ret)); + } else if (!is_lazy_sort) { + sort(); } - } else { // not found, push back, sort - object_array_.push_back(pair); - // sort again. - sort(); } set_serialize_delta_size(value->get_serialize_size()); } @@ -730,6 +738,41 @@ void ObJsonObject::sort() std::sort(object_array_.begin(), object_array_.end(), cmp); } +void ObJsonObject::stable_sort() +{ + ObJsonKeyCompare cmp; + std::stable_sort(object_array_.begin(), object_array_.end(), cmp); +} + +void ObJsonObject::unique() +{ + int64_t pos = 1; + int64_t cur = 0; + int64_t last = object_array_.count(); + + for (; pos < last; pos++) { + ObJsonObjectPair& cur_ref = object_array_[cur]; + ObJsonObjectPair& pos_ref = object_array_[pos]; + + common::ObString cur_key = cur_ref.get_key(); + common::ObString pos_key = pos_ref.get_key(); + + if (cur_key.length() == pos_key.length() && cur_key.compare(pos_key) == 0) { + cur_ref = pos_ref; + } else { + cur++; + if (cur != pos) { + object_array_[cur] = pos_ref; + } + } + } + + while (++cur < last) { + object_array_.pop_back(); + } + +} + void ObJsonObject::clear() { object_array_.destroy(); diff --git a/deps/oblib/src/lib/json_type/ob_json_tree.h b/deps/oblib/src/lib/json_type/ob_json_tree.h index a8d6707d83..3d0c30f3f2 100644 --- a/deps/oblib/src/lib/json_type/ob_json_tree.h +++ b/deps/oblib/src/lib/json_type/ob_json_tree.h @@ -205,6 +205,9 @@ public: } return max_child + 1; } + + void unique(); + void stable_sort(); OB_INLINE uint64_t get_serialize_size() { if (serialize_size_ == 0) { @@ -256,7 +259,7 @@ public: // @param [in] key The key. // @param [in] value The Json node. // @return Returns OB_SUCCESS on success, error code otherwise. - int add(const common::ObString &key, ObJsonNode *value, bool with_unique_key = false); + int add(const common::ObString &key, ObJsonNode *value, bool with_unique_key = false, bool is_lazy_sort = false, bool need_overwrite = true); // Rename key in current object if exist. // diff --git a/src/sql/engine/aggregate/ob_aggregate_processor.cpp b/src/sql/engine/aggregate/ob_aggregate_processor.cpp index e91e675801..5f6fb06299 100644 --- a/src/sql/engine/aggregate/ob_aggregate_processor.cpp +++ b/src/sql/engine/aggregate/ob_aggregate_processor.cpp @@ -6626,7 +6626,7 @@ int ObAggregateProcessor::get_json_objectagg_result(const ObAggrInfo &aggr_info, } if (OB_FAIL(ret)) { - } else if (OB_FAIL(json_object.object_add(key_data, json_val))) { + } else if (OB_FAIL(json_object.add(key_data, static_cast(json_val), false, true, false))) { LOG_WARN("failed: json object add json value", K(ret)); } else if (json_object.get_serialize_size() > OB_MAX_PACKET_LENGTH) { ret = OB_ERR_TOO_LONG_STRING_IN_CONCAT; @@ -6645,6 +6645,8 @@ int ObAggregateProcessor::get_json_objectagg_result(const ObAggrInfo &aggr_info, } else { ret = OB_SUCCESS; ObString str; + json_object.stable_sort(); + json_object.unique(); // output res if (OB_FAIL(json_object.get_raw_binary(str, &aggr_alloc_))) { LOG_WARN("get result binary failed", K(ret)); diff --git a/src/sql/engine/expr/ob_expr_json_object.cpp b/src/sql/engine/expr/ob_expr_json_object.cpp index 67fe75928e..cbf3ce211e 100644 --- a/src/sql/engine/expr/ob_expr_json_object.cpp +++ b/src/sql/engine/expr/ob_expr_json_object.cpp @@ -224,7 +224,7 @@ int ObExprJsonObject::eval_json_object(const ObExpr &expr, ObEvalCtx &ctx, ObDat } else if (OB_FAIL(ObJsonExprHelper::get_json_val(expr, ctx, &temp_allocator, i+1, j_val))) { ret = OB_ERR_INVALID_JSON_TEXT_IN_PARAM; LOG_USER_ERROR(OB_ERR_INVALID_JSON_TEXT_IN_PARAM); - } else if (OB_FAIL(j_base->object_add(key, j_val))) { + } else if (OB_FAIL(j_obj.add(key, static_cast(j_val), false, true, false))) { if (ret == OB_ERR_JSON_DOCUMENT_NULL_KEY) { LOG_USER_ERROR(OB_ERR_JSON_DOCUMENT_NULL_KEY); } @@ -235,6 +235,8 @@ int ObExprJsonObject::eval_json_object(const ObExpr &expr, ObEvalCtx &ctx, ObDat if (OB_SUCC(ret)) { ObString raw_bin; + j_obj.stable_sort(); + j_obj.unique(); if (OB_FAIL(j_base->get_raw_binary(raw_bin, &temp_allocator))) { LOG_WARN("failed: get json raw binary", K(ret)); } else if (OB_FAIL(ObJsonExprHelper::pack_json_str_res(expr, ctx, res, raw_bin))) { diff --git a/src/sql/engine/expr/ob_expr_json_type.cpp b/src/sql/engine/expr/ob_expr_json_type.cpp index 8d12e51b58..ad574271b3 100644 --- a/src/sql/engine/expr/ob_expr_json_type.cpp +++ b/src/sql/engine/expr/ob_expr_json_type.cpp @@ -156,7 +156,8 @@ int ObExprJsonType::calc(ObEvalCtx &ctx, const ObDatum &data, ObDatumMeta meta, } else if (OB_FAIL(ObJsonBaseFactory::get_json_base(allocator, j_str, j_in_type, j_in_type, j_base))) { LOG_WARN("fail to get json base", K(ret), K(type), K(j_str), K(j_in_type)); - if (ret == OB_ERR_INVALID_JSON_TEXT) { + if (ret == OB_ERR_INVALID_JSON_TEXT_IN_PARAM) { + ret = OB_ERR_INVALID_JSON_TEXT; LOG_USER_ERROR(OB_ERR_INVALID_JSON_TEXT); } } else { diff --git a/unittest/share/test_json_tree.cpp b/unittest/share/test_json_tree.cpp index aeaa8390a8..ec787b78ec 100644 --- a/unittest/share/test_json_tree.cpp +++ b/unittest/share/test_json_tree.cpp @@ -2231,6 +2231,78 @@ TEST_F(TestJsonTree, oracle_sub_type) ASSERT_EQ(OB_SUCCESS, o_float.to_int(i_value)); } +TEST_F(TestJsonTree, test_sort) +{ + // correct json text + common::ObString json_text("{ \"a\" : \"value1\", \"a\" : \"value2\", \ + \"b\" : \"value3\", \"b\" : \"value4\" }"); + common::ObArenaAllocator allocator(ObModIds::TEST); + const char *syntaxerr = NULL; + ObJsonNode *json_tree = NULL; + ASSERT_EQ(OB_SUCCESS, ObJsonParser::parse_json_text(&allocator, json_text.ptr(), + json_text.length(), syntaxerr, NULL, json_tree)); + ASSERT_TRUE(json_tree != NULL); + + ObJsonBuffer j_buf(&allocator); + ASSERT_EQ(json_tree->print(j_buf, false), 0); + + std::string tmp_res(j_buf.ptr()); + std::string result("{\"a\": \"value2\", \"b\": \"value4\"}"); + ASSERT_EQ(result, tmp_res); +} + +TEST_F(TestJsonTree, test_big_json) +{ + common::ObArenaAllocator allocator(ObModIds::TEST); + ObJsonBuffer j_buf(&allocator); + ASSERT_EQ(j_buf.reserve(1024 * 1024), 0); + ASSERT_EQ(j_buf.append("{"), 0); + + + static char origin[] = "0123456789abcdef"; + char key_buffer[33] = {0}; + char value_buffer[16] = {0}; + int idx = 0; + + for (int64_t pos = 0; pos < 20000; ++pos) { + for (int i = 0; i < 32; ++i) { + idx = ObRandom::rand(0, 15); + key_buffer[i] = origin[idx]; + } + + ASSERT_EQ(j_buf.append("\""), 0); + ASSERT_EQ(j_buf.append(key_buffer, 32), 0); + ASSERT_EQ(j_buf.append("\""), 0); + + ASSERT_EQ(j_buf.append(": "), 0); + snprintf(value_buffer, 16, "%ld", pos); + ASSERT_EQ(j_buf.append(value_buffer), 0); + + ASSERT_EQ(j_buf.append(", "), 0); + } + + j_buf.set_length(j_buf.length() - 2); + ASSERT_EQ(j_buf.append("}"), 0); + + // correct json text + common::ObString json_text(j_buf.length(), j_buf.ptr()); + + const char *syntaxerr = NULL; + ObJsonNode *json_tree = NULL; + + struct timeval time_start, time_end; + gettimeofday(&time_start, nullptr); + ASSERT_EQ(OB_SUCCESS, ObJsonParser::parse_json_text(&allocator, json_text.ptr(), + json_text.length(), syntaxerr, NULL, json_tree)); + ASSERT_TRUE(json_tree != NULL); + + gettimeofday(&time_end, nullptr); + + cout << "time start : " << " sec = " << time_start.tv_sec << ", usec = " << time_start.tv_usec << endl; + cout << "time end : " << " sec = " << time_end.tv_sec << ", usec = " << time_end.tv_usec << endl; + +} + } // namespace common } // namespace oceanbase