fix:big json text do parser construct tree too slow
This commit is contained in:
4
deps/oblib/src/lib/json_type/ob_json_bin.cpp
vendored
4
deps/oblib/src/lib/json_type/ob_json_bin.cpp
vendored
@ -986,7 +986,7 @@ int ObJsonBin::to_tree(ObJsonNode *&json_tree)
|
|||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
int ObJsonBin::deserialize_json_value(const char *data,
|
int ObJsonBin:: deserialize_json_value(const char *data,
|
||||||
uint64_t length,
|
uint64_t length,
|
||||||
uint8_t type,
|
uint8_t type,
|
||||||
uint64_t value_offset,
|
uint64_t value_offset,
|
||||||
@ -1484,7 +1484,7 @@ int ObJsonBin::deserialize_json_object_v0(const char *data, uint64_t length, ObJ
|
|||||||
ObJsonNode *node = NULL;
|
ObJsonNode *node = NULL;
|
||||||
ret = deserialize_json_value(val, length - value_offset, val_type, value_offset, node, type);
|
ret = deserialize_json_value(val, length - value_offset, val_type, value_offset, node, type);
|
||||||
if (OB_SUCC(ret)) {
|
if (OB_SUCC(ret)) {
|
||||||
if (OB_FAIL(object->add(key, node))) {
|
if (OB_FAIL(object->add(key, node, false, true, false))) {
|
||||||
LOG_WARN("failed to add node to obj", K(ret));
|
LOG_WARN("failed to add node to obj", K(ret));
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
|
10
deps/oblib/src/lib/json_type/ob_json_parse.cpp
vendored
10
deps/oblib/src/lib/json_type/ob_json_parse.cpp
vendored
@ -256,7 +256,7 @@ bool ObRapidJsonHandler::seeing_value(ObJsonNode *value)
|
|||||||
INIT_SUCC(ret);
|
INIT_SUCC(ret);
|
||||||
next_state_ = ObJsonExpectNextState::EXPECT_OBJECT_KEY;
|
next_state_ = ObJsonExpectNextState::EXPECT_OBJECT_KEY;
|
||||||
ObJsonObject *object = dynamic_cast<ObJsonObject *>(current_element_);
|
ObJsonObject *object = dynamic_cast<ObJsonObject *>(current_element_);
|
||||||
if (OB_FAIL(object->add(key_, value, with_unique_key_))) {
|
if (OB_FAIL(object->add(key_, value, with_unique_key_, true, false))) {
|
||||||
LOG_WARN("fail to add element to json object", K(ret));
|
LOG_WARN("fail to add element to json object", K(ret));
|
||||||
if (ret == OB_ERR_DUPLICATE_KEY) {
|
if (ret == OB_ERR_DUPLICATE_KEY) {
|
||||||
with_duplicate_key_ = true;
|
with_duplicate_key_ = true;
|
||||||
@ -317,7 +317,13 @@ bool ObRapidJsonHandler::is_end_object_or_array()
|
|||||||
// Sort the key-value pairs of the ObJsonObject at the current level.
|
// Sort the key-value pairs of the ObJsonObject at the current level.
|
||||||
ObJsonObject *obj = static_cast<ObJsonObject *>(current_element_);
|
ObJsonObject *obj = static_cast<ObJsonObject *>(current_element_);
|
||||||
obj->update_serialize_size();
|
obj->update_serialize_size();
|
||||||
obj->sort();
|
obj->stable_sort();
|
||||||
|
int64_t origin_num = obj->element_count();
|
||||||
|
obj->unique();
|
||||||
|
if (with_unique_key_ && obj->element_count() < origin_num) {
|
||||||
|
is_continue = false;
|
||||||
|
with_duplicate_key_ = true;
|
||||||
|
}
|
||||||
} else { // current is array
|
} else { // current is array
|
||||||
current_element_->update_serialize_size();
|
current_element_->update_serialize_size();
|
||||||
}
|
}
|
||||||
|
71
deps/oblib/src/lib/json_type/ob_json_tree.cpp
vendored
71
deps/oblib/src/lib/json_type/ob_json_tree.cpp
vendored
@ -669,7 +669,7 @@ int ObJsonObject::replace(const ObJsonNode *old_node, ObJsonNode *new_node)
|
|||||||
|
|
||||||
// When constructing a JSON tree, if two keys have the same value,
|
// When constructing a JSON tree, if two keys have the same value,
|
||||||
// the latter one will overwrite the former one
|
// the latter one will overwrite the former one
|
||||||
int ObJsonObject::add(const common::ObString &key, ObJsonNode *value, bool with_unique_key)
|
int ObJsonObject::add(const common::ObString &key, ObJsonNode *value, bool with_unique_key, bool is_lazy_sort, bool need_overwrite)
|
||||||
{
|
{
|
||||||
INIT_SUCC(ret);
|
INIT_SUCC(ret);
|
||||||
|
|
||||||
@ -680,20 +680,28 @@ int ObJsonObject::add(const common::ObString &key, ObJsonNode *value, bool with_
|
|||||||
value->set_parent(this);
|
value->set_parent(this);
|
||||||
ObJsonObjectPair pair(key, value);
|
ObJsonObjectPair pair(key, value);
|
||||||
|
|
||||||
ObJsonKeyCompare cmp;
|
if (need_overwrite) {
|
||||||
ObJsonObjectArray::iterator low_iter = std::lower_bound(object_array_.begin(),
|
ObJsonKeyCompare cmp;
|
||||||
object_array_.end(), pair, cmp);
|
ObJsonObjectArray::iterator low_iter = std::lower_bound(object_array_.begin(),
|
||||||
if (low_iter != object_array_.end() && low_iter->get_key() == key) { // Found and covered
|
object_array_.end(), pair, cmp);
|
||||||
if (with_unique_key) {
|
if (low_iter != object_array_.end() && low_iter->get_key() == key) { // Found and covered
|
||||||
ret = OB_ERR_DUPLICATE_KEY;
|
if (with_unique_key) {
|
||||||
LOG_WARN("Found duplicate key inserted before!", K(key), K(ret));
|
ret = OB_ERR_DUPLICATE_KEY;
|
||||||
} else {
|
LOG_WARN("Found duplicate key inserted before!", K(key), K(ret));
|
||||||
low_iter->set_value(value);
|
} else {
|
||||||
|
low_iter->set_value(value);
|
||||||
|
}
|
||||||
|
} else if (OB_FAIL(object_array_.push_back(pair))) {
|
||||||
|
LOG_WARN("failed to store in object array.", K(ret));
|
||||||
|
} else if (!is_lazy_sort) {
|
||||||
|
sort();
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if (OB_FAIL(object_array_.push_back(pair))) {
|
||||||
|
LOG_WARN("failed to store in object array.", K(ret));
|
||||||
|
} else if (!is_lazy_sort) {
|
||||||
|
sort();
|
||||||
}
|
}
|
||||||
} else { // not found, push back, sort
|
|
||||||
object_array_.push_back(pair);
|
|
||||||
// sort again.
|
|
||||||
sort();
|
|
||||||
}
|
}
|
||||||
set_serialize_delta_size(value->get_serialize_size());
|
set_serialize_delta_size(value->get_serialize_size());
|
||||||
}
|
}
|
||||||
@ -730,6 +738,41 @@ void ObJsonObject::sort()
|
|||||||
std::sort(object_array_.begin(), object_array_.end(), cmp);
|
std::sort(object_array_.begin(), object_array_.end(), cmp);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void ObJsonObject::stable_sort()
|
||||||
|
{
|
||||||
|
ObJsonKeyCompare cmp;
|
||||||
|
std::stable_sort(object_array_.begin(), object_array_.end(), cmp);
|
||||||
|
}
|
||||||
|
|
||||||
|
void ObJsonObject::unique()
|
||||||
|
{
|
||||||
|
int64_t pos = 1;
|
||||||
|
int64_t cur = 0;
|
||||||
|
int64_t last = object_array_.count();
|
||||||
|
|
||||||
|
for (; pos < last; pos++) {
|
||||||
|
ObJsonObjectPair& cur_ref = object_array_[cur];
|
||||||
|
ObJsonObjectPair& pos_ref = object_array_[pos];
|
||||||
|
|
||||||
|
common::ObString cur_key = cur_ref.get_key();
|
||||||
|
common::ObString pos_key = pos_ref.get_key();
|
||||||
|
|
||||||
|
if (cur_key.length() == pos_key.length() && cur_key.compare(pos_key) == 0) {
|
||||||
|
cur_ref = pos_ref;
|
||||||
|
} else {
|
||||||
|
cur++;
|
||||||
|
if (cur != pos) {
|
||||||
|
object_array_[cur] = pos_ref;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
while (++cur < last) {
|
||||||
|
object_array_.pop_back();
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
void ObJsonObject::clear()
|
void ObJsonObject::clear()
|
||||||
{
|
{
|
||||||
object_array_.destroy();
|
object_array_.destroy();
|
||||||
|
5
deps/oblib/src/lib/json_type/ob_json_tree.h
vendored
5
deps/oblib/src/lib/json_type/ob_json_tree.h
vendored
@ -205,6 +205,9 @@ public:
|
|||||||
}
|
}
|
||||||
return max_child + 1;
|
return max_child + 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void unique();
|
||||||
|
void stable_sort();
|
||||||
OB_INLINE uint64_t get_serialize_size()
|
OB_INLINE uint64_t get_serialize_size()
|
||||||
{
|
{
|
||||||
if (serialize_size_ == 0) {
|
if (serialize_size_ == 0) {
|
||||||
@ -256,7 +259,7 @@ public:
|
|||||||
// @param [in] key The key.
|
// @param [in] key The key.
|
||||||
// @param [in] value The Json node.
|
// @param [in] value The Json node.
|
||||||
// @return Returns OB_SUCCESS on success, error code otherwise.
|
// @return Returns OB_SUCCESS on success, error code otherwise.
|
||||||
int add(const common::ObString &key, ObJsonNode *value, bool with_unique_key = false);
|
int add(const common::ObString &key, ObJsonNode *value, bool with_unique_key = false, bool is_lazy_sort = false, bool need_overwrite = true);
|
||||||
|
|
||||||
// Rename key in current object if exist.
|
// Rename key in current object if exist.
|
||||||
//
|
//
|
||||||
|
@ -6626,7 +6626,7 @@ int ObAggregateProcessor::get_json_objectagg_result(const ObAggrInfo &aggr_info,
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (OB_FAIL(ret)) {
|
if (OB_FAIL(ret)) {
|
||||||
} else if (OB_FAIL(json_object.object_add(key_data, json_val))) {
|
} else if (OB_FAIL(json_object.add(key_data, static_cast<ObJsonNode*>(json_val), false, true, false))) {
|
||||||
LOG_WARN("failed: json object add json value", K(ret));
|
LOG_WARN("failed: json object add json value", K(ret));
|
||||||
} else if (json_object.get_serialize_size() > OB_MAX_PACKET_LENGTH) {
|
} else if (json_object.get_serialize_size() > OB_MAX_PACKET_LENGTH) {
|
||||||
ret = OB_ERR_TOO_LONG_STRING_IN_CONCAT;
|
ret = OB_ERR_TOO_LONG_STRING_IN_CONCAT;
|
||||||
@ -6645,6 +6645,8 @@ int ObAggregateProcessor::get_json_objectagg_result(const ObAggrInfo &aggr_info,
|
|||||||
} else {
|
} else {
|
||||||
ret = OB_SUCCESS;
|
ret = OB_SUCCESS;
|
||||||
ObString str;
|
ObString str;
|
||||||
|
json_object.stable_sort();
|
||||||
|
json_object.unique();
|
||||||
// output res
|
// output res
|
||||||
if (OB_FAIL(json_object.get_raw_binary(str, &aggr_alloc_))) {
|
if (OB_FAIL(json_object.get_raw_binary(str, &aggr_alloc_))) {
|
||||||
LOG_WARN("get result binary failed", K(ret));
|
LOG_WARN("get result binary failed", K(ret));
|
||||||
|
@ -224,7 +224,7 @@ int ObExprJsonObject::eval_json_object(const ObExpr &expr, ObEvalCtx &ctx, ObDat
|
|||||||
} else if (OB_FAIL(ObJsonExprHelper::get_json_val(expr, ctx, &temp_allocator, i+1, j_val))) {
|
} else if (OB_FAIL(ObJsonExprHelper::get_json_val(expr, ctx, &temp_allocator, i+1, j_val))) {
|
||||||
ret = OB_ERR_INVALID_JSON_TEXT_IN_PARAM;
|
ret = OB_ERR_INVALID_JSON_TEXT_IN_PARAM;
|
||||||
LOG_USER_ERROR(OB_ERR_INVALID_JSON_TEXT_IN_PARAM);
|
LOG_USER_ERROR(OB_ERR_INVALID_JSON_TEXT_IN_PARAM);
|
||||||
} else if (OB_FAIL(j_base->object_add(key, j_val))) {
|
} else if (OB_FAIL(j_obj.add(key, static_cast<ObJsonNode*>(j_val), false, true, false))) {
|
||||||
if (ret == OB_ERR_JSON_DOCUMENT_NULL_KEY) {
|
if (ret == OB_ERR_JSON_DOCUMENT_NULL_KEY) {
|
||||||
LOG_USER_ERROR(OB_ERR_JSON_DOCUMENT_NULL_KEY);
|
LOG_USER_ERROR(OB_ERR_JSON_DOCUMENT_NULL_KEY);
|
||||||
}
|
}
|
||||||
@ -235,6 +235,8 @@ int ObExprJsonObject::eval_json_object(const ObExpr &expr, ObEvalCtx &ctx, ObDat
|
|||||||
|
|
||||||
if (OB_SUCC(ret)) {
|
if (OB_SUCC(ret)) {
|
||||||
ObString raw_bin;
|
ObString raw_bin;
|
||||||
|
j_obj.stable_sort();
|
||||||
|
j_obj.unique();
|
||||||
if (OB_FAIL(j_base->get_raw_binary(raw_bin, &temp_allocator))) {
|
if (OB_FAIL(j_base->get_raw_binary(raw_bin, &temp_allocator))) {
|
||||||
LOG_WARN("failed: get json raw binary", K(ret));
|
LOG_WARN("failed: get json raw binary", K(ret));
|
||||||
} else if (OB_FAIL(ObJsonExprHelper::pack_json_str_res(expr, ctx, res, raw_bin))) {
|
} else if (OB_FAIL(ObJsonExprHelper::pack_json_str_res(expr, ctx, res, raw_bin))) {
|
||||||
|
@ -156,7 +156,8 @@ int ObExprJsonType::calc(ObEvalCtx &ctx, const ObDatum &data, ObDatumMeta meta,
|
|||||||
} else if (OB_FAIL(ObJsonBaseFactory::get_json_base(allocator, j_str, j_in_type,
|
} else if (OB_FAIL(ObJsonBaseFactory::get_json_base(allocator, j_str, j_in_type,
|
||||||
j_in_type, j_base))) {
|
j_in_type, j_base))) {
|
||||||
LOG_WARN("fail to get json base", K(ret), K(type), K(j_str), K(j_in_type));
|
LOG_WARN("fail to get json base", K(ret), K(type), K(j_str), K(j_in_type));
|
||||||
if (ret == OB_ERR_INVALID_JSON_TEXT) {
|
if (ret == OB_ERR_INVALID_JSON_TEXT_IN_PARAM) {
|
||||||
|
ret = OB_ERR_INVALID_JSON_TEXT;
|
||||||
LOG_USER_ERROR(OB_ERR_INVALID_JSON_TEXT);
|
LOG_USER_ERROR(OB_ERR_INVALID_JSON_TEXT);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
|
@ -2231,6 +2231,78 @@ TEST_F(TestJsonTree, oracle_sub_type)
|
|||||||
ASSERT_EQ(OB_SUCCESS, o_float.to_int(i_value));
|
ASSERT_EQ(OB_SUCCESS, o_float.to_int(i_value));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
TEST_F(TestJsonTree, test_sort)
|
||||||
|
{
|
||||||
|
// correct json text
|
||||||
|
common::ObString json_text("{ \"a\" : \"value1\", \"a\" : \"value2\", \
|
||||||
|
\"b\" : \"value3\", \"b\" : \"value4\" }");
|
||||||
|
common::ObArenaAllocator allocator(ObModIds::TEST);
|
||||||
|
const char *syntaxerr = NULL;
|
||||||
|
ObJsonNode *json_tree = NULL;
|
||||||
|
ASSERT_EQ(OB_SUCCESS, ObJsonParser::parse_json_text(&allocator, json_text.ptr(),
|
||||||
|
json_text.length(), syntaxerr, NULL, json_tree));
|
||||||
|
ASSERT_TRUE(json_tree != NULL);
|
||||||
|
|
||||||
|
ObJsonBuffer j_buf(&allocator);
|
||||||
|
ASSERT_EQ(json_tree->print(j_buf, false), 0);
|
||||||
|
|
||||||
|
std::string tmp_res(j_buf.ptr());
|
||||||
|
std::string result("{\"a\": \"value2\", \"b\": \"value4\"}");
|
||||||
|
ASSERT_EQ(result, tmp_res);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_F(TestJsonTree, test_big_json)
|
||||||
|
{
|
||||||
|
common::ObArenaAllocator allocator(ObModIds::TEST);
|
||||||
|
ObJsonBuffer j_buf(&allocator);
|
||||||
|
ASSERT_EQ(j_buf.reserve(1024 * 1024), 0);
|
||||||
|
ASSERT_EQ(j_buf.append("{"), 0);
|
||||||
|
|
||||||
|
|
||||||
|
static char origin[] = "0123456789abcdef";
|
||||||
|
char key_buffer[33] = {0};
|
||||||
|
char value_buffer[16] = {0};
|
||||||
|
int idx = 0;
|
||||||
|
|
||||||
|
for (int64_t pos = 0; pos < 20000; ++pos) {
|
||||||
|
for (int i = 0; i < 32; ++i) {
|
||||||
|
idx = ObRandom::rand(0, 15);
|
||||||
|
key_buffer[i] = origin[idx];
|
||||||
|
}
|
||||||
|
|
||||||
|
ASSERT_EQ(j_buf.append("\""), 0);
|
||||||
|
ASSERT_EQ(j_buf.append(key_buffer, 32), 0);
|
||||||
|
ASSERT_EQ(j_buf.append("\""), 0);
|
||||||
|
|
||||||
|
ASSERT_EQ(j_buf.append(": "), 0);
|
||||||
|
snprintf(value_buffer, 16, "%ld", pos);
|
||||||
|
ASSERT_EQ(j_buf.append(value_buffer), 0);
|
||||||
|
|
||||||
|
ASSERT_EQ(j_buf.append(", "), 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
j_buf.set_length(j_buf.length() - 2);
|
||||||
|
ASSERT_EQ(j_buf.append("}"), 0);
|
||||||
|
|
||||||
|
// correct json text
|
||||||
|
common::ObString json_text(j_buf.length(), j_buf.ptr());
|
||||||
|
|
||||||
|
const char *syntaxerr = NULL;
|
||||||
|
ObJsonNode *json_tree = NULL;
|
||||||
|
|
||||||
|
struct timeval time_start, time_end;
|
||||||
|
gettimeofday(&time_start, nullptr);
|
||||||
|
ASSERT_EQ(OB_SUCCESS, ObJsonParser::parse_json_text(&allocator, json_text.ptr(),
|
||||||
|
json_text.length(), syntaxerr, NULL, json_tree));
|
||||||
|
ASSERT_TRUE(json_tree != NULL);
|
||||||
|
|
||||||
|
gettimeofday(&time_end, nullptr);
|
||||||
|
|
||||||
|
cout << "time start : " << " sec = " << time_start.tv_sec << ", usec = " << time_start.tv_usec << endl;
|
||||||
|
cout << "time end : " << " sec = " << time_end.tv_sec << ", usec = " << time_end.tv_usec << endl;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace common
|
} // namespace common
|
||||||
} // namespace oceanbase
|
} // namespace oceanbase
|
||||||
|
|
||||||
|
Reference in New Issue
Block a user