[fix](join) incorrect result of left semi/anti join with empty build side (#28898)

This commit is contained in:
Jerry Hu
2023-12-25 09:07:38 +08:00
committed by GitHub
parent d8cb4da73d
commit b7ae7a07c7
3 changed files with 56 additions and 4 deletions

View File

@ -226,6 +226,9 @@ public:
template <int JoinOpType>
void prepare_build(size_t num_elem, int batch_size, bool has_null_key) {
_has_null_key = has_null_key;
// the first row in build side is not really from build side table
_empty_build_side = num_elem <= 1;
max_batch_size = batch_size;
bucket_size = calc_bucket_size(num_elem + 1);
first.resize(bucket_size + 1);
@ -262,6 +265,14 @@ public:
uint32_t* __restrict probe_idxs, bool& probe_visited,
uint32_t* __restrict build_idxs,
doris::vectorized::ColumnFilterHelper* mark_column) {
if constexpr (JoinOpType == doris::TJoinOp::NULL_AWARE_LEFT_ANTI_JOIN) {
if (_empty_build_side) {
return _process_null_aware_left_anti_join_for_empty_build_side<
JoinOpType, with_other_conjuncts, is_mark_join>(
probe_idx, probe_rows, probe_idxs, build_idxs, mark_column);
}
}
if constexpr (is_mark_join) {
return _find_batch_mark<JoinOpType, with_other_conjuncts>(
keys, build_idx_map, probe_idx, probe_rows, probe_idxs, build_idxs,
@ -367,6 +378,29 @@ private:
return std::tuple {probe_idx, 0U, matched_cnt};
}
template <int JoinOpType, bool with_other_conjuncts, bool is_mark_join>
auto _process_null_aware_left_anti_join_for_empty_build_side(
int probe_idx, int probe_rows, uint32_t* __restrict probe_idxs,
uint32_t* __restrict build_idxs, doris::vectorized::ColumnFilterHelper* mark_column) {
static_assert(JoinOpType == doris::TJoinOp::NULL_AWARE_LEFT_ANTI_JOIN);
auto matched_cnt = 0;
const auto batch_size = max_batch_size;
while (probe_idx < probe_rows && matched_cnt < batch_size) {
probe_idxs[matched_cnt] = probe_idx++;
if constexpr (is_mark_join) {
build_idxs[matched_cnt] = 0;
}
++matched_cnt;
}
if constexpr (is_mark_join && !with_other_conjuncts) {
mark_column->resize_fill(matched_cnt, 1);
}
return std::tuple {probe_idx, 0U, matched_cnt};
}
auto _find_batch_right_semi_anti(const Key* __restrict keys,
const uint32_t* __restrict build_idx_map, int probe_idx,
int probe_rows) {
@ -532,6 +566,7 @@ private:
Cell cell;
doris::vectorized::Arena* pool;
bool _has_null_key = false;
bool _empty_build_side = true;
};
template <typename Key, typename Mapped, typename Hash = DefaultHash<Key>,

View File

@ -9,3 +9,10 @@
-- !select --
-- !anti_emtpy_right --
\N
1
3
-- !semi_emtpy_right --

View File

@ -60,11 +60,21 @@ suite("test_null_aware_left_anti_join") {
sql """ set parallel_pipeline_task_num=2; """
qt_select """ select ${tableName2}.k1 from ${tableName2} where k1 not in (select ${tableName1}.k1 from ${tableName1}) order by ${tableName2}.k1; """
sql """
drop table if exists ${tableName2};
// In left anti join, if right side is empty, all rows(null included) of left should be output.
qt_anti_emtpy_right """
select
*
from ${tableName1} t1 where k1 not in (
select k1 from ${tableName2} t2 where t2.k1 > 2
) order by 1;
"""
sql """
drop table if exists ${tableName1};
// In left semi join, if right side is empty, no row should be output.
qt_semi_emtpy_right """
select
*
from ${tableName1} t1 where k1 in (
select k1 from ${tableName2} t2 where t2.k1 > 2
) order by 1;
"""
}