From 40299d280d72cd291bebdbad36b7d8b94cb35389 Mon Sep 17 00:00:00 2001 From: lihangyu <15605149486@163.com> Date: Fri, 21 Jul 2023 17:02:01 +0800 Subject: [PATCH] =?UTF-8?q?[Fix](json=20reader)=20fix=20rapidjson=20`array?= =?UTF-8?q?->PushBack`=20may=20take=20ownership=E2=80=A6=20(#21988)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit With bellow json path `["$.data","$.data.datatimestamp"]` After `array_obj->PushBack` the `data` field owner will be taken from array_obj, and lead to null values for json path `$.data.datatimestamp` Rapidjson doc: ``` //! Append a GenericValue at the end of the array. \note The ownership of \c value will be transferred to this array on success. */ GenericValue& PushBack(GenericValue& value, Allocator& allocator); ``` --- be/src/exprs/json_functions.cpp | 6 +++-- .../load_p0/stream_load/test_json_load.out | 3 +++ .../load_p0/stream_load/with_jsonpath.json | 1 + .../load_p0/stream_load/test_json_load.groovy | 27 +++++++++++++++++++ 4 files changed, 35 insertions(+), 2 deletions(-) create mode 100644 regression-test/data/load_p0/stream_load/with_jsonpath.json diff --git a/be/src/exprs/json_functions.cpp b/be/src/exprs/json_functions.cpp index 107217ed4f..1dce452e34 100644 --- a/be/src/exprs/json_functions.cpp +++ b/be/src/exprs/json_functions.cpp @@ -172,11 +172,13 @@ rapidjson::Value* JsonFunctions::get_json_array_from_parsed_json( rapidjson::Value* root = match_value(parsed_paths, document, mem_allocator, true); if (root == nullptr || root == document) { // not found return nullptr; - } else if (!root->IsArray()) { + } else if (!root->IsArray() && wrap_explicitly) { rapidjson::Value* array_obj = nullptr; array_obj = static_cast(mem_allocator.Malloc(sizeof(rapidjson::Value))); array_obj->SetArray(); - array_obj->PushBack(*root, mem_allocator); + rapidjson::Value copy; + copy.CopyFrom(*root, mem_allocator); + array_obj->PushBack(std::move(copy), mem_allocator); // set `wrap_explicitly` to true, so that the caller knows that this Array is wrapped actively. *wrap_explicitly = true; return array_obj; diff --git a/regression-test/data/load_p0/stream_load/test_json_load.out b/regression-test/data/load_p0/stream_load/test_json_load.out index 6296f37099..ad68ffa129 100644 --- a/regression-test/data/load_p0/stream_load/test_json_load.out +++ b/regression-test/data/load_p0/stream_load/test_json_load.out @@ -202,3 +202,6 @@ -- !select1 -- John 30 New York {"email":"john@example.com","phone":"+1-123-456-7890"} +-- !select22 -- +11324 1321313082437 1678834024274 20230315 {"base_mac_value_null":24,"base_1_value_respiratoryrate":11,"base_3_value_heartrate":51,"base_3_status_onoroutofbed":3,"base_null_count_circulation":84,"base_1_status_onoroutofbed":3,"base_1_value_heartrate":51,"base_3_value_respiratoryrate":11,"base_3_value_bodyactivityenergy":43652,"base_2_value_respiratoryrate":11,"base_2_value_bodyactivityenergy":28831,"base_2_status_onoroutofbed":3,"base_1_value_bodyactivityenergy":56758,"base_2_value_heartrate":51,"tsltype":"properties","datatimestamp":1678834024274,"command":"0105","macaddress":"405EE1805029"} + diff --git a/regression-test/data/load_p0/stream_load/with_jsonpath.json b/regression-test/data/load_p0/stream_load/with_jsonpath.json new file mode 100644 index 0000000000..11d14310af --- /dev/null +++ b/regression-test/data/load_p0/stream_load/with_jsonpath.json @@ -0,0 +1 @@ +{"data":{"base_mac_value_null":24,"base_1_value_respiratoryrate":11,"base_3_value_heartrate":51,"base_3_status_onoroutofbed":3,"base_null_count_circulation":84,"base_1_status_onoroutofbed":3,"base_1_value_heartrate":51,"base_3_value_respiratoryrate":11,"base_3_value_bodyactivityenergy":43652,"base_2_value_respiratoryrate":11,"base_2_value_bodyactivityenergy":28831,"base_2_status_onoroutofbed":3,"base_1_value_bodyactivityenergy":56758,"base_2_value_heartrate":51,"tsltype":"properties","datatimestamp":1678834024274,"command":"0105","macaddress":"405EE1805029"},"deviceid":1321313082437,"productid":11324} diff --git a/regression-test/suites/load_p0/stream_load/test_json_load.groovy b/regression-test/suites/load_p0/stream_load/test_json_load.groovy index 7a69114d11..c982196bb1 100644 --- a/regression-test/suites/load_p0/stream_load/test_json_load.groovy +++ b/regression-test/suites/load_p0/stream_load/test_json_load.groovy @@ -588,6 +588,33 @@ suite("test_json_load", "p0") { try_sql("DROP TABLE IF EXISTS ${testTable}") } + // case22: nested and it's member with jsonpath + try { + testTable = "test_json_load" + sql "DROP TABLE IF EXISTS ${testTable}" + sql """CREATE TABLE IF NOT EXISTS ${testTable} + ( + `productid` bigint NOT NULL COMMENT "productid", + `deviceid` bigint NOT NULL COMMENT "deviceid", + `datatimestamp` string NULL COMMENT "datatimestamp", + `dt` int NULL COMMENT "dt", + `data` string + ) + DUPLICATE KEY(`productid`, `deviceid`) + DISTRIBUTED BY RANDOM BUCKETS auto + properties( + "replication_num" = "1" + ); + """ +1 + load_json_data.call("${testTable}", 'with_jsonpath', '', 'true', 'json', """productid, deviceid, data, datatimestamp, dt=from_unixtime(substr(datatimestamp,1,10),'%Y%m%d')""", + '["$.productid","$.deviceid","$.data","$.data.datatimestamp"]', '', '', '', 'with_jsonpath.json') + qt_select22 "select * from ${testTable}" + + } finally { + try_sql("DROP TABLE IF EXISTS ${testTable}") + } + // if 'enableHdfs' in regression-conf.groovy has been set to true, // the test will run these case as below. if (enableHdfs()) {