From 8c237e82a3c7329dbc2a06380c7d9e3a1d112cd2 Mon Sep 17 00:00:00 2001 From: HappenLee Date: Sat, 11 May 2024 11:27:56 +0800 Subject: [PATCH] [Bug](exec) fix intersections/differences bug (#34675) --- be/src/pipeline/exec/set_sink_operator.cpp | 27 +++++------ be/src/pipeline/exec/set_source_operator.cpp | 9 +--- be/src/pipeline/pipeline_x/dependency.h | 12 ++++- be/src/vec/exec/vset_operation_node.cpp | 47 +++++++------------ .../correctness_p0/test_set_operation.out | 1 + .../correctness_p0/test_set_operation.groovy | 2 + 6 files changed, 43 insertions(+), 55 deletions(-) diff --git a/be/src/pipeline/exec/set_sink_operator.cpp b/be/src/pipeline/exec/set_sink_operator.cpp index 2042e3eb1a..b5774276fd 100644 --- a/be/src/pipeline/exec/set_sink_operator.cpp +++ b/be/src/pipeline/exec/set_sink_operator.cpp @@ -140,19 +140,13 @@ Status SetSinkOperatorX::_extract_build_column( block.get_by_position(result_col_id).column = block.get_by_position(result_col_id).column->convert_to_full_column_if_const(); - const auto* column = block.get_by_position(result_col_id).column.get(); - - if (const auto* nullable = check_and_get_column(*column)) { - const auto& col_nested = nullable->get_nested_column(); - if (local_state._shared_state->build_not_ignore_null[i]) { - raw_ptrs[i] = nullable; - } else { - raw_ptrs[i] = &col_nested; - } - - } else { - raw_ptrs[i] = column; + if (local_state._shared_state->build_not_ignore_null[i]) { + block.get_by_position(result_col_id).column = + make_nullable(block.get_by_position(result_col_id).column); } + + const auto* column = block.get_by_position(result_col_id).column.get(); + raw_ptrs[i] = column; DCHECK_GE(result_col_id, 0); local_state._shared_state->build_col_idx.insert({result_col_id, i}); } @@ -191,11 +185,14 @@ Status SetSinkLocalState::open(RuntimeState* state) { auto& parent = _parent->cast(); DCHECK(parent._cur_child_id == 0); auto& child_exprs_lists = _shared_state->child_exprs_lists; - + _shared_state->build_not_ignore_null.resize(child_exprs_lists[parent._cur_child_id].size()); _shared_state->hash_table_variants = std::make_unique(); - for (const auto& ctx : child_exprs_lists[parent._cur_child_id]) { - _shared_state->build_not_ignore_null.push_back(ctx->root()->is_nullable()); + for (const auto& ctl : child_exprs_lists) { + for (int i = 0; i < ctl.size(); ++i) { + _shared_state->build_not_ignore_null[i] = + _shared_state->build_not_ignore_null[i] || ctl[i]->root()->is_nullable(); + } } _shared_state->hash_table_init(); return Status::OK(); diff --git a/be/src/pipeline/exec/set_source_operator.cpp b/be/src/pipeline/exec/set_source_operator.cpp index 88d38d325a..61fd428b49 100644 --- a/be/src/pipeline/exec/set_source_operator.cpp +++ b/be/src/pipeline/exec/set_source_operator.cpp @@ -177,14 +177,7 @@ void SetSourceOperatorX::_add_result_columns( auto it = value.begin(); for (auto idx = build_col_idx.begin(); idx != build_col_idx.end(); ++idx) { auto& column = *build_block.get_by_position(idx->first).column; - if (local_state._mutable_cols[idx->second]->is_nullable() ^ column.is_nullable()) { - DCHECK(local_state._mutable_cols[idx->second]->is_nullable()); - ((vectorized::ColumnNullable*)(local_state._mutable_cols[idx->second].get())) - ->insert_from_not_nullable(column, it->row_num); - - } else { - local_state._mutable_cols[idx->second]->insert_from(column, it->row_num); - } + local_state._mutable_cols[idx->second]->insert_from(column, it->row_num); } block_size++; } diff --git a/be/src/pipeline/pipeline_x/dependency.h b/be/src/pipeline/pipeline_x/dependency.h index 19099b32f8..693bde10f3 100644 --- a/be/src/pipeline/pipeline_x/dependency.h +++ b/be/src/pipeline/pipeline_x/dependency.h @@ -689,8 +689,18 @@ public: } return; } + + // here need to change type to nullable, because some case eg: + // (select 0) intersect (select null) the build side hash table should not + // ignore null value. + std::vector data_types; + for (const auto& ctx : child_exprs_lists[0]) { + data_types.emplace_back(build_not_ignore_null[0] + ? make_nullable(ctx->root()->data_type()) + : ctx->root()->data_type()); + } if (!try_get_hash_map_context_fixed( - *hash_table_variants, child_exprs_lists[0])) { + *hash_table_variants, data_types)) { hash_table_variants->emplace(); } } diff --git a/be/src/vec/exec/vset_operation_node.cpp b/be/src/vec/exec/vset_operation_node.cpp index c4802bee4c..209e755636 100644 --- a/be/src/vec/exec/vset_operation_node.cpp +++ b/be/src/vec/exec/vset_operation_node.cpp @@ -155,21 +155,20 @@ Status VSetOperationNode::prepare(RuntimeState* state) { auto column_nums = _child_expr_lists[0].size(); DCHECK_EQ(output_data_types.size(), column_nums) << output_data_types.size() << " " << column_nums; - // the nullable is not depend on child, it's should use _row_descriptor from FE plan - // some case all not nullable column from children, but maybe need output nullable. - vector nullable_flags(column_nums, false); - for (int i = 0; i < column_nums; ++i) { - nullable_flags[i] = output_data_types[i]->is_nullable(); - } - for (int i = 0; i < _child_expr_lists.size(); ++i) { RETURN_IF_ERROR(VExpr::prepare(_child_expr_lists[i], state, child(i)->row_desc())); } for (int i = 0; i < _child_expr_lists[0].size(); ++i) { const auto& ctx = _child_expr_lists[0][i]; - _build_not_ignore_null.push_back(ctx->root()->is_nullable()); - _left_table_data_types.push_back(nullable_flags[i] ? make_nullable(ctx->root()->data_type()) - : ctx->root()->data_type()); + // the nullable is not depend on child, it's should use _row_descriptor from FE plan + // some case all not nullable column from children, but maybe need output nullable. + if (output_data_types[i]->is_nullable()) { + _build_not_ignore_null.push_back(true); + _left_table_data_types.push_back(make_nullable(ctx->root()->data_type())); + } else { + _build_not_ignore_null.push_back(false); + _left_table_data_types.push_back(ctx->root()->data_type()); + } } hash_table_init(); @@ -213,7 +212,7 @@ void VSetOperationNode::hash_table_init() { return; } if (!try_get_hash_map_context_fixed( - *_hash_table_variants, _child_expr_lists[0])) { + *_hash_table_variants, _left_table_data_types)) { _hash_table_variants->emplace(); } } @@ -337,14 +336,7 @@ void VSetOperationNode::add_result_columns(RowRefListWithFlags& va auto it = value.begin(); for (auto idx = _build_col_idx.begin(); idx != _build_col_idx.end(); ++idx) { const auto& column = *_build_block.get_by_position(idx->first).column; - if (_mutable_cols[idx->second]->is_nullable() ^ column.is_nullable()) { - DCHECK(_mutable_cols[idx->second]->is_nullable()); - ((ColumnNullable*)(_mutable_cols[idx->second].get())) - ->insert_from_not_nullable(column, it->row_num); - - } else { - _mutable_cols[idx->second]->insert_from(column, it->row_num); - } + _mutable_cols[idx->second]->insert_from(column, it->row_num); } block_size++; } @@ -427,19 +419,12 @@ Status VSetOperationNode::extract_build_column(Block& block, block.get_by_position(result_col_id).column = block.get_by_position(result_col_id).column->convert_to_full_column_if_const(); - const auto* column = block.get_by_position(result_col_id).column.get(); - - if (const auto* nullable = check_and_get_column(*column)) { - const auto& col_nested = nullable->get_nested_column(); - if (_build_not_ignore_null[i]) { - raw_ptrs[i] = nullable; - } else { - raw_ptrs[i] = &col_nested; - } - - } else { - raw_ptrs[i] = column; + if (_build_not_ignore_null[i]) { + block.get_by_position(result_col_id).column = + make_nullable(block.get_by_position(result_col_id).column); } + const auto* column = block.get_by_position(result_col_id).column.get(); + raw_ptrs[i] = column; DCHECK_GE(result_col_id, 0); _build_col_idx.insert({result_col_id, i}); } diff --git a/regression-test/data/correctness_p0/test_set_operation.out b/regression-test/data/correctness_p0/test_set_operation.out index f6d9f32d4c..09fa831406 100644 --- a/regression-test/data/correctness_p0/test_set_operation.out +++ b/regression-test/data/correctness_p0/test_set_operation.out @@ -10,3 +10,4 @@ bbbb aaaa bbbb +-- !select1 -- diff --git a/regression-test/suites/correctness_p0/test_set_operation.groovy b/regression-test/suites/correctness_p0/test_set_operation.groovy index e3e28a9e3a..5ee6348a03 100644 --- a/regression-test/suites/correctness_p0/test_set_operation.groovy +++ b/regression-test/suites/correctness_p0/test_set_operation.groovy @@ -125,4 +125,6 @@ suite("test_set_operation") { """ qt_select1 """ SELECT DISTINCT * FROM((SELECT sku_code FROM test_B) INTERSECT (SELECT sku_code FROM test_B) UNION (SELECT sku_code FROM test_A)) as t order by 1; """ + qt_select1 """ (select 0) intersect (select null); """ + }