From 5e1caea2b1859a4244accbe6166b21e479513f06 Mon Sep 17 00:00:00 2001 From: Mingyu Chen Date: Thu, 13 Jan 2022 15:30:38 +0800 Subject: [PATCH] [fix](lateral-view) Fix some bugs about lateral view (#7721) 1. fix core dump when using multi explode_bitmap #7716 2. fix bug that json array extract by json path is wrong #7717 3. fix bug that after lateral view, the null value become non-null value #7718 4. fix bug that lateral view may return error: couldn't resolve slot descriptor 1. #7719 5. fix error result when using lateral view with where predicate #7720 --- .github/PULL_REQUEST_TEMPLATE.md | 2 +- be/src/exec/json_scanner.cpp | 6 ++-- be/src/exec/table_function_node.cpp | 15 ++++---- be/src/exprs/json_functions.cpp | 9 +++-- be/src/exprs/json_functions.h | 5 +-- .../exprs/table_function/explode_bitmap.cpp | 1 + be/test/exprs/json_function_test.cpp | 35 +++++++++++++++---- .../apache/doris/analysis/LateralViewRef.java | 6 ++-- 8 files changed, 53 insertions(+), 26 deletions(-) diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index b179379e78..6f56b55bac 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -6,7 +6,7 @@ Issue Number: close #xxx Describe the overview of changes. -## Checklist(Requried) +## Checklist(Required) 1. Does it affect the original behavior: (Yes/No/I Don't know) 2. Has unit tests been added: (Yes/No/No Need) diff --git a/be/src/exec/json_scanner.cpp b/be/src/exec/json_scanner.cpp index 991cee6044..134d646c8f 100644 --- a/be/src/exec/json_scanner.cpp +++ b/be/src/exec/json_scanner.cpp @@ -669,9 +669,10 @@ bool JsonReader::_write_values_by_jsonpath(rapidjson::Value& objectValue, MemPoo for (size_t i = 0; i < column_num; i++) { rapidjson::Value* json_values = nullptr; + bool wrap_explicitly = false; if (LIKELY(i < _parsed_jsonpaths.size())) { json_values = JsonFunctions::get_json_array_from_parsed_json( - _parsed_jsonpaths[i], &objectValue, _origin_json_doc.GetAllocator()); + _parsed_jsonpaths[i], &objectValue, _origin_json_doc.GetAllocator(), &wrap_explicitly); } if (json_values == nullptr) { @@ -690,8 +691,7 @@ bool JsonReader::_write_values_by_jsonpath(rapidjson::Value& objectValue, MemPoo } } else { CHECK(json_values->IsArray()); - CHECK(json_values->Size() >= 1); - if (json_values->Size() == 1) { + if (json_values->Size() == 1 && wrap_explicitly) { // NOTICE1: JsonFunctions::get_json_array_from_parsed_json() will wrap the single json object with an array. // so here we unwrap the array to get the real element. // if json_values' size > 1, it means we just match an array, not a wrapped one, so no need to unwrap. diff --git a/be/src/exec/table_function_node.cpp b/be/src/exec/table_function_node.cpp index d192354bb2..22010c413a 100644 --- a/be/src/exec/table_function_node.cpp +++ b/be/src/exec/table_function_node.cpp @@ -196,7 +196,6 @@ Status TableFunctionNode::get_next(RuntimeState* state, RowBatch* row_batch, boo uint8_t* tuple_buffer = nullptr; Tuple* tuple_ptr = nullptr; Tuple* pre_tuple_ptr = nullptr; - int row_idx = 0; while (true) { RETURN_IF_CANCELLED(state); @@ -252,7 +251,7 @@ Status TableFunctionNode::get_next(RuntimeState* state, RowBatch* row_batch, boo pre_tuple_ptr = tuple_ptr; // The tuples order in parent row batch should be // child1, child2, tf1, tf2, ... - TupleRow* parent_tuple_row = row_batch->get_row(row_idx++); + TupleRow* parent_tuple_row = row_batch->get_row(row_batch->add_row()); // 1. copy child tuples int tuple_idx = 0; for (int i = 0; i < _child_tuple_desc_size; tuple_idx++, i++) { @@ -263,8 +262,9 @@ Status TableFunctionNode::get_next(RuntimeState* state, RowBatch* row_batch, boo SlotDescriptor* child_slot_desc = child_tuple_desc->slots()[j]; SlotDescriptor* parent_slot_desc = parent_tuple_desc->slots()[j]; - if (_output_slot_ids[parent_slot_desc->id()]) { - Tuple* child_tuple = _cur_child_tuple_row->get_tuple(child_rowdesc.get_tuple_idx(child_tuple_desc->id())); + Tuple* child_tuple = _cur_child_tuple_row->get_tuple(child_rowdesc.get_tuple_idx(child_tuple_desc->id())); + if (_output_slot_ids[parent_slot_desc->id()] && !child_tuple->is_null(child_slot_desc->null_indicator_offset())) { + // only write child slot if it is selected and not null. void* dest_slot = tuple_ptr->get_slot(parent_slot_desc->tuple_offset()); RawValue::write(child_tuple->get_slot(child_slot_desc->tuple_offset()), dest_slot, parent_slot_desc->type(), row_batch->tuple_data_pool()); tuple_ptr->set_not_null(parent_slot_desc->null_indicator_offset()); @@ -273,15 +273,14 @@ Status TableFunctionNode::get_next(RuntimeState* state, RowBatch* row_batch, boo } } parent_tuple_row->set_tuple(tuple_idx, tuple_ptr); - tuple_ptr = reinterpret_cast(reinterpret_cast(tuple_ptr) + parent_tuple_desc->byte_size()); + tuple_ptr = reinterpret_cast(reinterpret_cast(tuple_ptr) + child_tuple_desc->byte_size()); } - // 2. copy funtion result + // 2. copy function result for (int i = 0; tuple_idx < _parent_tuple_desc_size; tuple_idx++, i++) { TupleDescriptor* parent_tuple_desc = parent_rowdesc.tuple_descriptors()[tuple_idx]; SlotDescriptor* parent_slot_desc = parent_tuple_desc->slots()[0]; void* dest_slot = tuple_ptr->get_slot(parent_slot_desc->tuple_offset()); - // if (_fn_values[i] != nullptr && _output_slot_ids[parent_slot_desc->id()]) { if (_fn_values[i] != nullptr) { RawValue::write(_fn_values[i], dest_slot, parent_slot_desc->type(), row_batch->tuple_data_pool()); tuple_ptr->set_not_null(parent_slot_desc->null_indicator_offset()); @@ -311,7 +310,7 @@ Status TableFunctionNode::get_next(RuntimeState* state, RowBatch* row_batch, boo *eos = false; break; } - } + } // end while true if (row_batch->at_capacity()) { break; diff --git a/be/src/exprs/json_functions.cpp b/be/src/exprs/json_functions.cpp index 3727553b28..55e11aa204 100644 --- a/be/src/exprs/json_functions.cpp +++ b/be/src/exprs/json_functions.cpp @@ -364,15 +364,16 @@ rapidjson::Value* JsonFunctions::get_json_object(FunctionContext* context, rapidjson::Value* JsonFunctions::get_json_array_from_parsed_json( const std::string& json_path, rapidjson::Value* document, - rapidjson::Document::AllocatorType& mem_allocator) { + rapidjson::Document::AllocatorType& mem_allocator, bool* wrap_explicitly) { std::vector vec; parse_json_paths(json_path, &vec); - return get_json_array_from_parsed_json(vec, document, mem_allocator); + return get_json_array_from_parsed_json(vec, document, mem_allocator, wrap_explicitly); } rapidjson::Value* JsonFunctions::get_json_array_from_parsed_json( const std::vector& parsed_paths, rapidjson::Value* document, - rapidjson::Document::AllocatorType& mem_allocator) { + rapidjson::Document::AllocatorType& mem_allocator, bool* wrap_explicitly) { + *wrap_explicitly = false; if (!parsed_paths[0].is_valid) { return nullptr; } @@ -395,6 +396,8 @@ rapidjson::Value* JsonFunctions::get_json_array_from_parsed_json( array_obj = static_cast(mem_allocator.Malloc(sizeof(rapidjson::Value))); array_obj->SetArray(); array_obj->PushBack(*root, mem_allocator); + // set `wrap_explicitly` to true, so that the caller knows that this Array is wrapped actively. + *wrap_explicitly = true; return array_obj; } return root; diff --git a/be/src/exprs/json_functions.h b/be/src/exprs/json_functions.h index ab0d7e48e1..0253ac5e83 100644 --- a/be/src/exprs/json_functions.h +++ b/be/src/exprs/json_functions.h @@ -103,16 +103,17 @@ public: /** * The `document` parameter must be has parsed. * return Value Is Array object + * wrap_explicitly is set to true when the returned Array is wrapped actively. */ static rapidjson::Value* get_json_array_from_parsed_json( const std::vector& parsed_paths, rapidjson::Value* document, - rapidjson::Document::AllocatorType& mem_allocator); + rapidjson::Document::AllocatorType& mem_allocator, bool* wrap_explicitly); // this is only for test, it will parse the json path inside, // so that we can easily pass a json path as string. static rapidjson::Value* get_json_array_from_parsed_json( const std::string& jsonpath, rapidjson::Value* document, - rapidjson::Document::AllocatorType& mem_allocator); + rapidjson::Document::AllocatorType& mem_allocator, bool* wrap_explicitly); static rapidjson::Value* get_json_object_from_parsed_json( const std::vector& parsed_paths, rapidjson::Value* document, diff --git a/be/src/exprs/table_function/explode_bitmap.cpp b/be/src/exprs/table_function/explode_bitmap.cpp index 67fe561346..1d1ca4d0b9 100644 --- a/be/src/exprs/table_function/explode_bitmap.cpp +++ b/be/src/exprs/table_function/explode_bitmap.cpp @@ -80,6 +80,7 @@ void ExplodeBitmapTableFunction::_reset_iterator() { } _cur_iter = new BitmapValueIterator(*_cur_bitmap); _cur_value = **_cur_iter; + _cur_offset = 0; } Status ExplodeBitmapTableFunction::reset() { diff --git a/be/test/exprs/json_function_test.cpp b/be/test/exprs/json_function_test.cpp index 354f36a8fb..3273c7415a 100644 --- a/be/test/exprs/json_function_test.cpp +++ b/be/test/exprs/json_function_test.cpp @@ -251,6 +251,7 @@ TEST_F(JsonFunctionTest, special_char) { } TEST_F(JsonFunctionTest, json_path1) { + bool wrap_explicitly; std::string json_raw_data( "[{\"k1\":\"v1\",\"keyname\":{\"ip\":\"10.10.0.1\",\"value\":20}},{\"k1\":\"v1-1\"," "\"keyname\":{\"ip\":\"10.20.10.1\",\"value\":20}}]"); @@ -260,20 +261,20 @@ TEST_F(JsonFunctionTest, json_path1) { } rapidjson::Value* res3; res3 = JsonFunctions::get_json_array_from_parsed_json("$.[*].keyname.ip", &jsonDoc, - jsonDoc.GetAllocator()); + jsonDoc.GetAllocator(), &wrap_explicitly); ASSERT_TRUE(res3->IsArray()); for (int i = 0; i < res3->Size(); i++) { std::cout << (*res3)[i].GetString() << std::endl; } res3 = JsonFunctions::get_json_array_from_parsed_json("$.[*].k1", &jsonDoc, - jsonDoc.GetAllocator()); + jsonDoc.GetAllocator(), &wrap_explicitly); ASSERT_TRUE(res3->IsArray()); for (int i = 0; i < res3->Size(); i++) { std::cout << (*res3)[i].GetString() << std::endl; } - res3 = JsonFunctions::get_json_array_from_parsed_json("$", &jsonDoc, jsonDoc.GetAllocator()); + res3 = JsonFunctions::get_json_array_from_parsed_json("$", &jsonDoc, jsonDoc.GetAllocator(), &wrap_explicitly); ASSERT_TRUE(res3->IsArray()); rapidjson::StringBuffer buffer; buffer.Clear(); @@ -283,6 +284,7 @@ TEST_F(JsonFunctionTest, json_path1) { } TEST_F(JsonFunctionTest, json_path_get_nullobject) { + bool wrap_explicitly; std::string json_raw_data( "[{\"a\":\"a1\", \"b\":\"b1\", \"c\":\"c1\"},{\"a\":\"a2\", " "\"c\":\"c2\"},{\"a\":\"a3\", \"b\":\"b3\", \"c\":\"c3\"}]"); @@ -292,7 +294,7 @@ TEST_F(JsonFunctionTest, json_path_get_nullobject) { } rapidjson::Value* res3 = JsonFunctions::get_json_array_from_parsed_json("$.[*].b", &jsonDoc, - jsonDoc.GetAllocator()); + jsonDoc.GetAllocator(), &wrap_explicitly); ASSERT_TRUE(res3->IsArray()); ASSERT_EQ(res3->Size(), 3); for (int i = 0; i < res3->Size(); i++) { @@ -306,6 +308,7 @@ TEST_F(JsonFunctionTest, json_path_get_nullobject) { } TEST_F(JsonFunctionTest, json_path_test) { + bool wrap_explicitly; { std::string json_raw_data("[{\"a\":\"a1\", \"b\":\"b1\"}, {\"a\":\"a2\", \"b\":\"b2\"}]"); rapidjson::Document jsonDoc; @@ -314,7 +317,7 @@ TEST_F(JsonFunctionTest, json_path_test) { } rapidjson::Value* res3 = JsonFunctions::get_json_array_from_parsed_json( - "$.[*].a", &jsonDoc, jsonDoc.GetAllocator()); + "$.[*].a", &jsonDoc, jsonDoc.GetAllocator(), &wrap_explicitly); ASSERT_TRUE(res3->IsArray()); ASSERT_EQ(res3->Size(), 2); for (int i = 0; i < res3->Size(); i++) { @@ -327,14 +330,14 @@ TEST_F(JsonFunctionTest, json_path_test) { std::cout << " " << std::endl; } { - std::string json_raw_data("{\"a\":[\"a1\",\"a2\"], \"b\":[\"b1\",\"b2\"]}"); + std::string json_raw_data("{\"a\":[\"a1\",\"a2\"], \"b\":[\"b1\",\"b2\"], \"c\":[\"c1\"], \"d\":[], \"e\":\"e1\"}"); rapidjson::Document jsonDoc; if (jsonDoc.Parse(json_raw_data.c_str()).HasParseError()) { ASSERT_TRUE(false); } rapidjson::Value* res3 = JsonFunctions::get_json_array_from_parsed_json( - "$.a", &jsonDoc, jsonDoc.GetAllocator()); + "$.a", &jsonDoc, jsonDoc.GetAllocator(), &wrap_explicitly); ASSERT_TRUE(res3->IsArray()); ASSERT_EQ(res3->Size(), 2); for (int i = 0; i < res3->Size(); i++) { @@ -345,6 +348,24 @@ TEST_F(JsonFunctionTest, json_path_test) { } } std::cout << " " << std::endl; + + rapidjson::Value* res4 = JsonFunctions::get_json_array_from_parsed_json( + "$.c", &jsonDoc, jsonDoc.GetAllocator(), &wrap_explicitly); + ASSERT_TRUE(res4->IsArray()); + ASSERT_EQ(res4->Size(), 1); + ASSERT_FALSE(wrap_explicitly); + + rapidjson::Value* res5 = JsonFunctions::get_json_array_from_parsed_json( + "$.d", &jsonDoc, jsonDoc.GetAllocator(), &wrap_explicitly); + ASSERT_TRUE(res5->IsArray()); + ASSERT_EQ(res5->Size(), 0); + ASSERT_FALSE(wrap_explicitly); + + rapidjson::Value* res6 = JsonFunctions::get_json_array_from_parsed_json( + "$.e", &jsonDoc, jsonDoc.GetAllocator(), &wrap_explicitly); + ASSERT_TRUE(res6->IsArray()); + ASSERT_EQ(res6->Size(), 1); + ASSERT_TRUE(wrap_explicitly); } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/LateralViewRef.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/LateralViewRef.java index 8326854937..5736d233c9 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/LateralViewRef.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/LateralViewRef.java @@ -90,8 +90,10 @@ public class LateralViewRef extends TableRef { fnExpr.setTableFnCall(true); checkAndSupplyDefaultTableName(fnExpr); fnExpr.analyze(analyzer); - checkScalarFunction(fnExpr.getChild(0)); - fnExpr.getChild(0).collect(SlotRef.class, originSlotRefList); + for (Expr expr : fnExpr.getChildren()) { + checkScalarFunction(expr); + } + fnExpr.collect(SlotRef.class, originSlotRefList); } @Override