From fb9363f04210e6a082da236ac5bce2d25ce75bb5 Mon Sep 17 00:00:00 2001 From: Jerry Hu Date: Thu, 30 May 2024 10:33:13 +0800 Subject: [PATCH] [fix](set) incorrect result of set operator (#35607) If there are duplicated expressions in the select list, the result will be incorrect. ## Proposed changes Issue Number: close #28438 --- be/src/pipeline/exec/set_sink_operator.cpp | 2 +- be/src/pipeline/exec/set_source_operator.cpp | 4 ++-- be/src/pipeline/pipeline_x/dependency.h | 4 ++-- be/src/vec/exec/vset_operation_node.cpp | 6 +++--- be/src/vec/exec/vset_operation_node.h | 4 ++-- .../data/query_p0/operator/test_set_operator.out | 12 ++++++++++++ .../query_p0/operator/test_set_operator.groovy | 8 ++++++++ 7 files changed, 30 insertions(+), 10 deletions(-) diff --git a/be/src/pipeline/exec/set_sink_operator.cpp b/be/src/pipeline/exec/set_sink_operator.cpp index b5774276fd..aceeac2559 100644 --- a/be/src/pipeline/exec/set_sink_operator.cpp +++ b/be/src/pipeline/exec/set_sink_operator.cpp @@ -148,7 +148,7 @@ Status SetSinkOperatorX::_extract_build_column( const auto* column = block.get_by_position(result_col_id).column.get(); raw_ptrs[i] = column; DCHECK_GE(result_col_id, 0); - local_state._shared_state->build_col_idx.insert({result_col_id, i}); + local_state._shared_state->build_col_idx.insert({i, result_col_id}); } return Status::OK(); } diff --git a/be/src/pipeline/exec/set_source_operator.cpp b/be/src/pipeline/exec/set_source_operator.cpp index 61fd428b49..0b3ec80211 100644 --- a/be/src/pipeline/exec/set_source_operator.cpp +++ b/be/src/pipeline/exec/set_source_operator.cpp @@ -176,8 +176,8 @@ void SetSourceOperatorX::_add_result_columns( auto it = value.begin(); for (auto idx = build_col_idx.begin(); idx != build_col_idx.end(); ++idx) { - auto& column = *build_block.get_by_position(idx->first).column; - local_state._mutable_cols[idx->second]->insert_from(column, it->row_num); + auto& column = *build_block.get_by_position(idx->second).column; + local_state._mutable_cols[idx->first]->insert_from(column, it->row_num); } block_size++; } diff --git a/be/src/pipeline/pipeline_x/dependency.h b/be/src/pipeline/pipeline_x/dependency.h index 1d14c66a5b..72763d03c8 100644 --- a/be/src/pipeline/pipeline_x/dependency.h +++ b/be/src/pipeline/pipeline_x/dependency.h @@ -634,8 +634,8 @@ public: vectorized::Block build_block; // build to source //record element size in hashtable int64_t valid_element_in_hash_tbl = 0; - //first:column_id, could point to origin column or cast column - //second:idx mapped to column types + //first: idx mapped to column types + //second: column_id, could point to origin column or cast column std::unordered_map build_col_idx; //// shared static states (shared, decided in prepare/open...) diff --git a/be/src/vec/exec/vset_operation_node.cpp b/be/src/vec/exec/vset_operation_node.cpp index 209e755636..294ac58482 100644 --- a/be/src/vec/exec/vset_operation_node.cpp +++ b/be/src/vec/exec/vset_operation_node.cpp @@ -335,8 +335,8 @@ void VSetOperationNode::add_result_columns(RowRefListWithFlags& va int& block_size) { auto it = value.begin(); for (auto idx = _build_col_idx.begin(); idx != _build_col_idx.end(); ++idx) { - const auto& column = *_build_block.get_by_position(idx->first).column; - _mutable_cols[idx->second]->insert_from(column, it->row_num); + const auto& column = *_build_block.get_by_position(idx->second).column; + _mutable_cols[idx->first]->insert_from(column, it->row_num); } block_size++; } @@ -426,7 +426,7 @@ Status VSetOperationNode::extract_build_column(Block& block, const auto* column = block.get_by_position(result_col_id).column.get(); raw_ptrs[i] = column; DCHECK_GE(result_col_id, 0); - _build_col_idx.insert({result_col_id, i}); + _build_col_idx.insert({i, result_col_id}); } return Status::OK(); } diff --git a/be/src/vec/exec/vset_operation_node.h b/be/src/vec/exec/vset_operation_node.h index ce5a8eb1db..f14d1515c5 100644 --- a/be/src/vec/exec/vset_operation_node.h +++ b/be/src/vec/exec/vset_operation_node.h @@ -112,8 +112,8 @@ private: std::vector _child_expr_lists; //record build column type DataTypes _left_table_data_types; - //first:column_id, could point to origin column or cast column - //second:idx mapped to column types + //first: idx mapped to column types + //second: column_id, could point to origin column or cast column std::unordered_map _build_col_idx; //record insert column id during probe std::vector _probe_column_inserted_id; diff --git a/regression-test/data/query_p0/operator/test_set_operator.out b/regression-test/data/query_p0/operator/test_set_operator.out index 1d8bc5ef93..48eb4a0c9b 100644 --- a/regression-test/data/query_p0/operator/test_set_operator.out +++ b/regression-test/data/query_p0/operator/test_set_operator.out @@ -13,3 +13,15 @@ 9 9 +-- !select_minus -- +3 3 +4 4 +5 5 +7 7 + +-- !select_except -- +3 3 +4 4 +5 5 +7 7 + diff --git a/regression-test/suites/query_p0/operator/test_set_operator.groovy b/regression-test/suites/query_p0/operator/test_set_operator.groovy index 1bc9cc29e4..7d6219585e 100644 --- a/regression-test/suites/query_p0/operator/test_set_operator.groovy +++ b/regression-test/suites/query_p0/operator/test_set_operator.groovy @@ -89,4 +89,12 @@ suite("test_set_operators", "query,p0,arrow_flight_sql") { t3 on t2.col1=t3.col1; """ + + order_qt_select_minus """ + select col1, col1 from t1 minus select col1, col1 from t2; + """ + + order_qt_select_except """ + select col1, col1 from t1 except select col1, col1 from t2; + """ }