fix:big json text do parser construct tree too slow

This commit is contained in:
obdev
2023-06-16 03:12:19 +00:00
committed by ob-robot
parent 9ce10fcef5
commit a6367c9813
8 changed files with 151 additions and 22 deletions

View File

@ -986,7 +986,7 @@ int ObJsonBin::to_tree(ObJsonNode *&json_tree)
return ret; return ret;
} }
int ObJsonBin::deserialize_json_value(const char *data, int ObJsonBin:: deserialize_json_value(const char *data,
uint64_t length, uint64_t length,
uint8_t type, uint8_t type,
uint64_t value_offset, uint64_t value_offset,
@ -1484,7 +1484,7 @@ int ObJsonBin::deserialize_json_object_v0(const char *data, uint64_t length, ObJ
ObJsonNode *node = NULL; ObJsonNode *node = NULL;
ret = deserialize_json_value(val, length - value_offset, val_type, value_offset, node, type); ret = deserialize_json_value(val, length - value_offset, val_type, value_offset, node, type);
if (OB_SUCC(ret)) { if (OB_SUCC(ret)) {
if (OB_FAIL(object->add(key, node))) { if (OB_FAIL(object->add(key, node, false, true, false))) {
LOG_WARN("failed to add node to obj", K(ret)); LOG_WARN("failed to add node to obj", K(ret));
} }
} else { } else {

View File

@ -256,7 +256,7 @@ bool ObRapidJsonHandler::seeing_value(ObJsonNode *value)
INIT_SUCC(ret); INIT_SUCC(ret);
next_state_ = ObJsonExpectNextState::EXPECT_OBJECT_KEY; next_state_ = ObJsonExpectNextState::EXPECT_OBJECT_KEY;
ObJsonObject *object = dynamic_cast<ObJsonObject *>(current_element_); ObJsonObject *object = dynamic_cast<ObJsonObject *>(current_element_);
if (OB_FAIL(object->add(key_, value, with_unique_key_))) { if (OB_FAIL(object->add(key_, value, with_unique_key_, true, false))) {
LOG_WARN("fail to add element to json object", K(ret)); LOG_WARN("fail to add element to json object", K(ret));
if (ret == OB_ERR_DUPLICATE_KEY) { if (ret == OB_ERR_DUPLICATE_KEY) {
with_duplicate_key_ = true; with_duplicate_key_ = true;
@ -317,7 +317,13 @@ bool ObRapidJsonHandler::is_end_object_or_array()
// Sort the key-value pairs of the ObJsonObject at the current level. // Sort the key-value pairs of the ObJsonObject at the current level.
ObJsonObject *obj = static_cast<ObJsonObject *>(current_element_); ObJsonObject *obj = static_cast<ObJsonObject *>(current_element_);
obj->update_serialize_size(); obj->update_serialize_size();
obj->sort(); obj->stable_sort();
int64_t origin_num = obj->element_count();
obj->unique();
if (with_unique_key_ && obj->element_count() < origin_num) {
is_continue = false;
with_duplicate_key_ = true;
}
} else { // current is array } else { // current is array
current_element_->update_serialize_size(); current_element_->update_serialize_size();
} }

View File

@ -669,7 +669,7 @@ int ObJsonObject::replace(const ObJsonNode *old_node, ObJsonNode *new_node)
// When constructing a JSON tree, if two keys have the same value, // When constructing a JSON tree, if two keys have the same value,
// the latter one will overwrite the former one // the latter one will overwrite the former one
int ObJsonObject::add(const common::ObString &key, ObJsonNode *value, bool with_unique_key) int ObJsonObject::add(const common::ObString &key, ObJsonNode *value, bool with_unique_key, bool is_lazy_sort, bool need_overwrite)
{ {
INIT_SUCC(ret); INIT_SUCC(ret);
@ -680,20 +680,28 @@ int ObJsonObject::add(const common::ObString &key, ObJsonNode *value, bool with_
value->set_parent(this); value->set_parent(this);
ObJsonObjectPair pair(key, value); ObJsonObjectPair pair(key, value);
ObJsonKeyCompare cmp; if (need_overwrite) {
ObJsonObjectArray::iterator low_iter = std::lower_bound(object_array_.begin(), ObJsonKeyCompare cmp;
object_array_.end(), pair, cmp); ObJsonObjectArray::iterator low_iter = std::lower_bound(object_array_.begin(),
if (low_iter != object_array_.end() && low_iter->get_key() == key) { // Found and covered object_array_.end(), pair, cmp);
if (with_unique_key) { if (low_iter != object_array_.end() && low_iter->get_key() == key) { // Found and covered
ret = OB_ERR_DUPLICATE_KEY; if (with_unique_key) {
LOG_WARN("Found duplicate key inserted before!", K(key), K(ret)); ret = OB_ERR_DUPLICATE_KEY;
} else { LOG_WARN("Found duplicate key inserted before!", K(key), K(ret));
low_iter->set_value(value); } else {
low_iter->set_value(value);
}
} else if (OB_FAIL(object_array_.push_back(pair))) {
LOG_WARN("failed to store in object array.", K(ret));
} else if (!is_lazy_sort) {
sort();
}
} else {
if (OB_FAIL(object_array_.push_back(pair))) {
LOG_WARN("failed to store in object array.", K(ret));
} else if (!is_lazy_sort) {
sort();
} }
} else { // not found, push back, sort
object_array_.push_back(pair);
// sort again.
sort();
} }
set_serialize_delta_size(value->get_serialize_size()); set_serialize_delta_size(value->get_serialize_size());
} }
@ -730,6 +738,41 @@ void ObJsonObject::sort()
std::sort(object_array_.begin(), object_array_.end(), cmp); std::sort(object_array_.begin(), object_array_.end(), cmp);
} }
void ObJsonObject::stable_sort()
{
ObJsonKeyCompare cmp;
std::stable_sort(object_array_.begin(), object_array_.end(), cmp);
}
void ObJsonObject::unique()
{
int64_t pos = 1;
int64_t cur = 0;
int64_t last = object_array_.count();
for (; pos < last; pos++) {
ObJsonObjectPair& cur_ref = object_array_[cur];
ObJsonObjectPair& pos_ref = object_array_[pos];
common::ObString cur_key = cur_ref.get_key();
common::ObString pos_key = pos_ref.get_key();
if (cur_key.length() == pos_key.length() && cur_key.compare(pos_key) == 0) {
cur_ref = pos_ref;
} else {
cur++;
if (cur != pos) {
object_array_[cur] = pos_ref;
}
}
}
while (++cur < last) {
object_array_.pop_back();
}
}
void ObJsonObject::clear() void ObJsonObject::clear()
{ {
object_array_.destroy(); object_array_.destroy();

View File

@ -205,6 +205,9 @@ public:
} }
return max_child + 1; return max_child + 1;
} }
void unique();
void stable_sort();
OB_INLINE uint64_t get_serialize_size() OB_INLINE uint64_t get_serialize_size()
{ {
if (serialize_size_ == 0) { if (serialize_size_ == 0) {
@ -256,7 +259,7 @@ public:
// @param [in] key The key. // @param [in] key The key.
// @param [in] value The Json node. // @param [in] value The Json node.
// @return Returns OB_SUCCESS on success, error code otherwise. // @return Returns OB_SUCCESS on success, error code otherwise.
int add(const common::ObString &key, ObJsonNode *value, bool with_unique_key = false); int add(const common::ObString &key, ObJsonNode *value, bool with_unique_key = false, bool is_lazy_sort = false, bool need_overwrite = true);
// Rename key in current object if exist. // Rename key in current object if exist.
// //

View File

@ -6626,7 +6626,7 @@ int ObAggregateProcessor::get_json_objectagg_result(const ObAggrInfo &aggr_info,
} }
if (OB_FAIL(ret)) { if (OB_FAIL(ret)) {
} else if (OB_FAIL(json_object.object_add(key_data, json_val))) { } else if (OB_FAIL(json_object.add(key_data, static_cast<ObJsonNode*>(json_val), false, true, false))) {
LOG_WARN("failed: json object add json value", K(ret)); LOG_WARN("failed: json object add json value", K(ret));
} else if (json_object.get_serialize_size() > OB_MAX_PACKET_LENGTH) { } else if (json_object.get_serialize_size() > OB_MAX_PACKET_LENGTH) {
ret = OB_ERR_TOO_LONG_STRING_IN_CONCAT; ret = OB_ERR_TOO_LONG_STRING_IN_CONCAT;
@ -6645,6 +6645,8 @@ int ObAggregateProcessor::get_json_objectagg_result(const ObAggrInfo &aggr_info,
} else { } else {
ret = OB_SUCCESS; ret = OB_SUCCESS;
ObString str; ObString str;
json_object.stable_sort();
json_object.unique();
// output res // output res
if (OB_FAIL(json_object.get_raw_binary(str, &aggr_alloc_))) { if (OB_FAIL(json_object.get_raw_binary(str, &aggr_alloc_))) {
LOG_WARN("get result binary failed", K(ret)); LOG_WARN("get result binary failed", K(ret));

View File

@ -224,7 +224,7 @@ int ObExprJsonObject::eval_json_object(const ObExpr &expr, ObEvalCtx &ctx, ObDat
} else if (OB_FAIL(ObJsonExprHelper::get_json_val(expr, ctx, &temp_allocator, i+1, j_val))) { } else if (OB_FAIL(ObJsonExprHelper::get_json_val(expr, ctx, &temp_allocator, i+1, j_val))) {
ret = OB_ERR_INVALID_JSON_TEXT_IN_PARAM; ret = OB_ERR_INVALID_JSON_TEXT_IN_PARAM;
LOG_USER_ERROR(OB_ERR_INVALID_JSON_TEXT_IN_PARAM); LOG_USER_ERROR(OB_ERR_INVALID_JSON_TEXT_IN_PARAM);
} else if (OB_FAIL(j_base->object_add(key, j_val))) { } else if (OB_FAIL(j_obj.add(key, static_cast<ObJsonNode*>(j_val), false, true, false))) {
if (ret == OB_ERR_JSON_DOCUMENT_NULL_KEY) { if (ret == OB_ERR_JSON_DOCUMENT_NULL_KEY) {
LOG_USER_ERROR(OB_ERR_JSON_DOCUMENT_NULL_KEY); LOG_USER_ERROR(OB_ERR_JSON_DOCUMENT_NULL_KEY);
} }
@ -235,6 +235,8 @@ int ObExprJsonObject::eval_json_object(const ObExpr &expr, ObEvalCtx &ctx, ObDat
if (OB_SUCC(ret)) { if (OB_SUCC(ret)) {
ObString raw_bin; ObString raw_bin;
j_obj.stable_sort();
j_obj.unique();
if (OB_FAIL(j_base->get_raw_binary(raw_bin, &temp_allocator))) { if (OB_FAIL(j_base->get_raw_binary(raw_bin, &temp_allocator))) {
LOG_WARN("failed: get json raw binary", K(ret)); LOG_WARN("failed: get json raw binary", K(ret));
} else if (OB_FAIL(ObJsonExprHelper::pack_json_str_res(expr, ctx, res, raw_bin))) { } else if (OB_FAIL(ObJsonExprHelper::pack_json_str_res(expr, ctx, res, raw_bin))) {

View File

@ -156,7 +156,8 @@ int ObExprJsonType::calc(ObEvalCtx &ctx, const ObDatum &data, ObDatumMeta meta,
} else if (OB_FAIL(ObJsonBaseFactory::get_json_base(allocator, j_str, j_in_type, } else if (OB_FAIL(ObJsonBaseFactory::get_json_base(allocator, j_str, j_in_type,
j_in_type, j_base))) { j_in_type, j_base))) {
LOG_WARN("fail to get json base", K(ret), K(type), K(j_str), K(j_in_type)); LOG_WARN("fail to get json base", K(ret), K(type), K(j_str), K(j_in_type));
if (ret == OB_ERR_INVALID_JSON_TEXT) { if (ret == OB_ERR_INVALID_JSON_TEXT_IN_PARAM) {
ret = OB_ERR_INVALID_JSON_TEXT;
LOG_USER_ERROR(OB_ERR_INVALID_JSON_TEXT); LOG_USER_ERROR(OB_ERR_INVALID_JSON_TEXT);
} }
} else { } else {

View File

@ -2231,6 +2231,78 @@ TEST_F(TestJsonTree, oracle_sub_type)
ASSERT_EQ(OB_SUCCESS, o_float.to_int(i_value)); ASSERT_EQ(OB_SUCCESS, o_float.to_int(i_value));
} }
TEST_F(TestJsonTree, test_sort)
{
// correct json text
common::ObString json_text("{ \"a\" : \"value1\", \"a\" : \"value2\", \
\"b\" : \"value3\", \"b\" : \"value4\" }");
common::ObArenaAllocator allocator(ObModIds::TEST);
const char *syntaxerr = NULL;
ObJsonNode *json_tree = NULL;
ASSERT_EQ(OB_SUCCESS, ObJsonParser::parse_json_text(&allocator, json_text.ptr(),
json_text.length(), syntaxerr, NULL, json_tree));
ASSERT_TRUE(json_tree != NULL);
ObJsonBuffer j_buf(&allocator);
ASSERT_EQ(json_tree->print(j_buf, false), 0);
std::string tmp_res(j_buf.ptr());
std::string result("{\"a\": \"value2\", \"b\": \"value4\"}");
ASSERT_EQ(result, tmp_res);
}
TEST_F(TestJsonTree, test_big_json)
{
common::ObArenaAllocator allocator(ObModIds::TEST);
ObJsonBuffer j_buf(&allocator);
ASSERT_EQ(j_buf.reserve(1024 * 1024), 0);
ASSERT_EQ(j_buf.append("{"), 0);
static char origin[] = "0123456789abcdef";
char key_buffer[33] = {0};
char value_buffer[16] = {0};
int idx = 0;
for (int64_t pos = 0; pos < 20000; ++pos) {
for (int i = 0; i < 32; ++i) {
idx = ObRandom::rand(0, 15);
key_buffer[i] = origin[idx];
}
ASSERT_EQ(j_buf.append("\""), 0);
ASSERT_EQ(j_buf.append(key_buffer, 32), 0);
ASSERT_EQ(j_buf.append("\""), 0);
ASSERT_EQ(j_buf.append(": "), 0);
snprintf(value_buffer, 16, "%ld", pos);
ASSERT_EQ(j_buf.append(value_buffer), 0);
ASSERT_EQ(j_buf.append(", "), 0);
}
j_buf.set_length(j_buf.length() - 2);
ASSERT_EQ(j_buf.append("}"), 0);
// correct json text
common::ObString json_text(j_buf.length(), j_buf.ptr());
const char *syntaxerr = NULL;
ObJsonNode *json_tree = NULL;
struct timeval time_start, time_end;
gettimeofday(&time_start, nullptr);
ASSERT_EQ(OB_SUCCESS, ObJsonParser::parse_json_text(&allocator, json_text.ptr(),
json_text.length(), syntaxerr, NULL, json_tree));
ASSERT_TRUE(json_tree != NULL);
gettimeofday(&time_end, nullptr);
cout << "time start : " << " sec = " << time_start.tv_sec << ", usec = " << time_start.tv_usec << endl;
cout << "time end : " << " sec = " << time_end.tv_sec << ", usec = " << time_end.tv_usec << endl;
}
} // namespace common } // namespace common
} // namespace oceanbase } // namespace oceanbase