diff --git a/be/src/exec/table_function_node.cpp b/be/src/exec/table_function_node.cpp index de852d1abe..1eb94aac0e 100644 --- a/be/src/exec/table_function_node.cpp +++ b/be/src/exec/table_function_node.cpp @@ -300,13 +300,22 @@ Status TableFunctionNode::get_next(RuntimeState* state, RowBatch* row_batch, boo SlotDescriptor* child_slot_desc = child_tuple_desc->slots()[j]; SlotDescriptor* parent_slot_desc = parent_tuple_desc->slots()[j]; - if (_output_slot_ids[parent_slot_desc->id()] && - !child_tuple->is_null(child_slot_desc->null_indicator_offset()) && - child_slot_desc->type().is_string_type()) { + if (child_tuple->is_null(child_slot_desc->null_indicator_offset())) { + continue; + } + if (child_slot_desc->type().is_string_type()) { void* dest_slot = tuple_ptr->get_slot(parent_slot_desc->tuple_offset()); - RawValue::write(child_tuple->get_slot(child_slot_desc->tuple_offset()), - dest_slot, parent_slot_desc->type(), - row_batch->tuple_data_pool()); + if (_output_slot_ids[parent_slot_desc->id()]) { + // deep coopy + RawValue::write( + child_tuple->get_slot(child_slot_desc->tuple_offset()), + dest_slot, parent_slot_desc->type(), + row_batch->tuple_data_pool()); + } else { + // clear for unused slot + StringValue* dest = reinterpret_cast(dest_slot); + dest->replace(nullptr, 0); + } } } parent_tuple_row->set_tuple(tuple_idx, tuple_ptr); diff --git a/be/src/vec/exec/vtable_function_node.cpp b/be/src/vec/exec/vtable_function_node.cpp index 54551bdbce..0852d9d966 100644 --- a/be/src/vec/exec/vtable_function_node.cpp +++ b/be/src/vec/exec/vtable_function_node.cpp @@ -166,6 +166,10 @@ Status VTableFunctionNode::get_expanded_block(RuntimeState* state, Block* output // 1. copy data from child_block. for (int i = 0; i < _child_slots.size(); i++) { + if (!slot_need_copy(i)) { + columns[i]->insert_default(); + continue; + } auto src_column = _child_block->get_by_position(i).column; columns[i]->insert_from(*src_column, _cur_child_offset); } @@ -231,4 +235,4 @@ Status VTableFunctionNode::_process_next_child_row() { return Status::OK(); } -} // namespace doris::vectorized \ No newline at end of file +} // namespace doris::vectorized diff --git a/be/src/vec/exec/vtable_function_node.h b/be/src/vec/exec/vtable_function_node.h index 1913cd1d35..72108fa1d9 100644 --- a/be/src/vec/exec/vtable_function_node.h +++ b/be/src/vec/exec/vtable_function_node.h @@ -33,6 +33,26 @@ public: private: Status _process_next_child_row() override; + /* Now the output tuples for table function node is base_table_tuple + tf1 + tf2 + ... + But not all slots are used, the real used slots are inside table_function_node.outputSlotIds. + For case like explode_bitmap: + SELECT a2,count(*) as a3 FROM A WHERE a1 IN + (SELECT c1 FROM B LATERAL VIEW explode_bitmap(b1) C as c1) + GROUP BY a2 ORDER BY a3; + Actually we only need to output column c1, no need to output columns in bitmap table B. + Copy large bitmap columns are very expensive and slow. + + Here we check if the slot is realy used, otherwise we avoid copy it and just insert a default value. + + A better solution is: + 1. FE: create a new output tuple based on the real output slots; + 2. BE: refractor (V)TableFunctionNode output rows based no the new tuple; + */ + inline bool slot_need_copy(SlotId slot_id) const { + auto id = _output_slots[slot_id]->id(); + return (id < _output_slot_ids.size()) && (_output_slot_ids[id]); + } + using TableFunctionNode::get_next; Status get_expanded_block(RuntimeState* state, Block* output_block, bool* eos); @@ -42,4 +62,4 @@ private: std::vector _output_slots; }; -} // namespace doris::vectorized \ No newline at end of file +} // namespace doris::vectorized diff --git a/fe/fe-core/src/main/java/org/apache/doris/planner/TableFunctionNode.java b/fe/fe-core/src/main/java/org/apache/doris/planner/TableFunctionNode.java index 17cc72030d..58f1092d55 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/planner/TableFunctionNode.java +++ b/fe/fe-core/src/main/java/org/apache/doris/planner/TableFunctionNode.java @@ -42,7 +42,6 @@ import java.util.Set; import java.util.stream.Collectors; public class TableFunctionNode extends PlanNode { - private List lateralViewRefs; private ArrayList fnCallExprList; private List lateralViewTupleIds; @@ -100,11 +99,23 @@ public class TableFunctionNode extends PlanNode { for (Expr resultExpr : baseTblResultExprs) { // find all slotRef bound by tupleIds in resultExpr resultExpr.getSlotRefsBoundByTupleIds(tupleIds, outputSlotRef); + + // For vec engine while lateral view involves subquery + Expr dst = outputSmap.get(resultExpr); + if (dst != null) { + dst.getSlotRefsBoundByTupleIds(tupleIds, outputSlotRef); + } } // case2 List remainConjuncts = analyzer.getRemainConjuncts(tupleIds); for (Expr expr : remainConjuncts) { expr.getSlotRefsBoundByTupleIds(tupleIds, outputSlotRef); + + // For vec engine while lateral view involves subquery + Expr dst = outputSmap.get(expr); + if (dst != null) { + dst.getSlotRefsBoundByTupleIds(tupleIds, outputSlotRef); + } } // set output slot ids for (SlotRef slotRef : outputSlotRef) { diff --git a/regression-test/data/query_p0/sql_functions/table_function/explode_json_array.out b/regression-test/data/query_p0/sql_functions/table_function/explode_json_array.out index 4170e82089..978be26cf5 100644 --- a/regression-test/data/query_p0/sql_functions/table_function/explode_json_array.out +++ b/regression-test/data/query_p0/sql_functions/table_function/explode_json_array.out @@ -140,3 +140,17 @@ true 400 Dan 50 4 Street 4 22.214 b 400 Dan 50 4 Street 4 214.1 b +-- !outer_join_explode_json_array11 -- +\N \N 1 +\N \N 3 +\N \N b +\N 30 1 +\N 30 3 +\N 30 b +\N 50 1 +\N 50 3 +\N 50 b +\N 80 1 +\N 80 3 +\N 80 b + diff --git a/regression-test/suites/query_p0/sql_functions/table_function/explode_json_array.groovy b/regression-test/suites/query_p0/sql_functions/table_function/explode_json_array.groovy index ebc573cbe2..ee08b6d9e8 100644 --- a/regression-test/suites/query_p0/sql_functions/table_function/explode_json_array.groovy +++ b/regression-test/suites/query_p0/sql_functions/table_function/explode_json_array.groovy @@ -37,6 +37,7 @@ suite("explode_json_array") { (400, 'Dan', 50, 4, 'Street 4') """ // not vectorized + sql """ set enable_vectorized_engine = false """ qt_explode_json_array1 """ SELECT * FROM ${tableName} LATERAL VIEW EXPLODE_JSON_ARRAY_INT('[30, 60]') t1 as c_age LATERAL VIEW EXPLODE_JSON_ARRAY_INT('[40, 80]') t2 as d_age @@ -83,4 +84,7 @@ suite("explode_json_array") { LATERAL VIEW EXPLODE_JSON_ARRAY_DOUBLE('[1.23, 22.214, 214.1]') t2 as d ORDER BY id, c, d """ + qt_outer_join_explode_json_array11 """SELECT id, age, e1 FROM (SELECT id, age, e1 FROM (SELECT b.id, a.age FROM + ${tableName} a LEFT JOIN ${tableName} b ON a.id=b.age)T LATERAL VIEW EXPLODE_JSON_ARRAY_STRING('[1, "b", 3]') + TMP AS e1) AS T ORDER BY age, e1""" }