diff --git a/be/src/vec/exec/join/process_hash_table_probe_impl.h b/be/src/vec/exec/join/process_hash_table_probe_impl.h index fa23105c1e..ba49134c03 100644 --- a/be/src/vec/exec/join/process_hash_table_probe_impl.h +++ b/be/src/vec/exec/join/process_hash_table_probe_impl.h @@ -1058,8 +1058,24 @@ Status ProcessHashTableProbe::process_data_in_hashtable(HashTableTyp }; if (visited_iter.ok()) { - for (; visited_iter.ok() && block_size < _batch_size; ++visited_iter) { - insert_from_hash_table(visited_iter->block_offset, visited_iter->row_num); + if constexpr (std::is_same_v) { + for (; visited_iter.ok() && block_size < _batch_size; ++visited_iter) { + insert_from_hash_table(visited_iter->block_offset, visited_iter->row_num); + } + } else { + for (; visited_iter.ok() && block_size < _batch_size; ++visited_iter) { + if constexpr (JoinOpType == TJoinOp::RIGHT_SEMI_JOIN) { + if (visited_iter->visited) { + insert_from_hash_table(visited_iter->block_offset, + visited_iter->row_num); + } + } else { + if (!visited_iter->visited) { + insert_from_hash_table(visited_iter->block_offset, + visited_iter->row_num); + } + } + } } if (!visited_iter.ok()) { ++iter; @@ -1070,16 +1086,16 @@ Status ProcessHashTableProbe::process_data_in_hashtable(HashTableTyp auto& mapped = iter->get_second(); if constexpr (std::is_same_v) { if (mapped.visited) { - visited_iter = mapped.begin(); - for (; visited_iter.ok() && block_size < _batch_size; ++visited_iter) { - if constexpr (JoinOpType == TJoinOp::RIGHT_SEMI_JOIN) { + if constexpr (JoinOpType == TJoinOp::RIGHT_SEMI_JOIN) { + visited_iter = mapped.begin(); + for (; visited_iter.ok() && block_size < _batch_size; ++visited_iter) { insert_from_hash_table(visited_iter->block_offset, visited_iter->row_num); } - } - if (visited_iter.ok()) { - // block_size >= _batch_size, quit for loop - break; + if (visited_iter.ok()) { + // block_size >= _batch_size, quit for loop + break; + } } } else { if constexpr (JoinOpType != TJoinOp::RIGHT_SEMI_JOIN) { diff --git a/regression-test/data/tpcds_sf1_p1/functions_test/test_right_join.out b/regression-test/data/tpcds_sf1_p1/functions_test/test_right_join.out new file mode 100644 index 0000000000..94939a0607 --- /dev/null +++ b/regression-test/data/tpcds_sf1_p1/functions_test/test_right_join.out @@ -0,0 +1,13 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !sql_right_outer_join_with_other_conjuncts -- +27796 + +-- !sql_full_join_with_other_conjuncts -- +27796 + +-- !sql_right_semi_join_with_other_conjuncts -- +8153 + +-- !sql_right_anti_join_with_other_conjuncts -- +19643 + diff --git a/regression-test/suites/tpcds_sf1_p1/functions_test/test_right_join.groovy b/regression-test/suites/tpcds_sf1_p1/functions_test/test_right_join.groovy new file mode 100644 index 0000000000..32683de5f9 --- /dev/null +++ b/regression-test/suites/tpcds_sf1_p1/functions_test/test_right_join.groovy @@ -0,0 +1,158 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("test_right_join") { + sql """set enable_nereids_planner=false""" + sql """set parallel_fragment_exec_instance_num=1""" + sql "use regression_test_tpcds_sf1_p1" + + qt_sql_right_outer_join_with_other_conjuncts """ + WITH customer_total_return AS ( + SELECT + ca_state ctr_state, + sum(cr_return_amt_inc_tax) ctr_total_return + FROM + catalog_returns, + date_dim, + customer_address + WHERE + (cr_returned_date_sk = d_date_sk) + AND (d_year = 2000) + AND (cr_returning_addr_sk = ca_address_sk) + GROUP BY + cr_returning_customer_sk, + ca_state + ), + ctr2 as ( + SELECT + (avg(ctr_total_return)) ctr_total_return, + ctr_state + FROM + customer_total_return + group by + ctr_state + ) + SELECT + count(ctr1.ctr_state) + FROM + ctr2 right + join customer_total_return ctr1 on ctr1.ctr_state = ctr2.ctr_state + and ctr1.ctr_total_return > ctr2.ctr_total_return; + """ + + qt_sql_full_join_with_other_conjuncts """ + WITH customer_total_return AS ( + SELECT + ca_state ctr_state, + sum(cr_return_amt_inc_tax) ctr_total_return + FROM + catalog_returns, + date_dim, + customer_address + WHERE + (cr_returned_date_sk = d_date_sk) + AND (d_year = 2000) + AND (cr_returning_addr_sk = ca_address_sk) + GROUP BY + cr_returning_customer_sk, + ca_state + ), + ctr2 as ( + SELECT + (avg(ctr_total_return)) ctr_total_return, + ctr_state + FROM + customer_total_return + group by + ctr_state + ) + SELECT + count(ctr1.ctr_state) + FROM + ctr2 full + join customer_total_return ctr1 on ctr1.ctr_state = ctr2.ctr_state + and ctr1.ctr_total_return > ctr2.ctr_total_return; + """ + + qt_sql_right_semi_join_with_other_conjuncts """ + WITH customer_total_return AS ( + SELECT + ca_state ctr_state, + sum(cr_return_amt_inc_tax) ctr_total_return + FROM + catalog_returns, + date_dim, + customer_address + WHERE + (cr_returned_date_sk = d_date_sk) + AND (d_year = 2000) + AND (cr_returning_addr_sk = ca_address_sk) + GROUP BY + cr_returning_customer_sk, + ca_state + ), + ctr2 as ( + SELECT + (avg(ctr_total_return)) ctr_total_return, + ctr_state + FROM + customer_total_return + group by + ctr_state + ) + SELECT + count(ctr1.ctr_state) + FROM + ctr2 right semi + join customer_total_return ctr1 on ctr1.ctr_state = ctr2.ctr_state + and ctr1.ctr_total_return > ctr2.ctr_total_return; + """ + + qt_sql_right_anti_join_with_other_conjuncts """ + WITH customer_total_return AS ( + SELECT + ca_state ctr_state, + sum(cr_return_amt_inc_tax) ctr_total_return + FROM + catalog_returns, + date_dim, + customer_address + WHERE + (cr_returned_date_sk = d_date_sk) + AND (d_year = 2000) + AND (cr_returning_addr_sk = ca_address_sk) + GROUP BY + cr_returning_customer_sk, + ca_state + ), + ctr2 as ( + SELECT + (avg(ctr_total_return)) ctr_total_return, + ctr_state + FROM + customer_total_return + group by + ctr_state + ) + SELECT + count(ctr1.ctr_state) + FROM + ctr2 right anti + join customer_total_return ctr1 on ctr1.ctr_state = ctr2.ctr_state + and ctr1.ctr_total_return > ctr2.ctr_total_return; + """ +} \ No newline at end of file