[fix](join) incorrect result of left semi/anti join with empty build side (#28898)
This commit is contained in:
@ -226,6 +226,9 @@ public:
|
||||
template <int JoinOpType>
|
||||
void prepare_build(size_t num_elem, int batch_size, bool has_null_key) {
|
||||
_has_null_key = has_null_key;
|
||||
|
||||
// the first row in build side is not really from build side table
|
||||
_empty_build_side = num_elem <= 1;
|
||||
max_batch_size = batch_size;
|
||||
bucket_size = calc_bucket_size(num_elem + 1);
|
||||
first.resize(bucket_size + 1);
|
||||
@ -262,6 +265,14 @@ public:
|
||||
uint32_t* __restrict probe_idxs, bool& probe_visited,
|
||||
uint32_t* __restrict build_idxs,
|
||||
doris::vectorized::ColumnFilterHelper* mark_column) {
|
||||
if constexpr (JoinOpType == doris::TJoinOp::NULL_AWARE_LEFT_ANTI_JOIN) {
|
||||
if (_empty_build_side) {
|
||||
return _process_null_aware_left_anti_join_for_empty_build_side<
|
||||
JoinOpType, with_other_conjuncts, is_mark_join>(
|
||||
probe_idx, probe_rows, probe_idxs, build_idxs, mark_column);
|
||||
}
|
||||
}
|
||||
|
||||
if constexpr (is_mark_join) {
|
||||
return _find_batch_mark<JoinOpType, with_other_conjuncts>(
|
||||
keys, build_idx_map, probe_idx, probe_rows, probe_idxs, build_idxs,
|
||||
@ -367,6 +378,29 @@ private:
|
||||
return std::tuple {probe_idx, 0U, matched_cnt};
|
||||
}
|
||||
|
||||
template <int JoinOpType, bool with_other_conjuncts, bool is_mark_join>
|
||||
auto _process_null_aware_left_anti_join_for_empty_build_side(
|
||||
int probe_idx, int probe_rows, uint32_t* __restrict probe_idxs,
|
||||
uint32_t* __restrict build_idxs, doris::vectorized::ColumnFilterHelper* mark_column) {
|
||||
static_assert(JoinOpType == doris::TJoinOp::NULL_AWARE_LEFT_ANTI_JOIN);
|
||||
auto matched_cnt = 0;
|
||||
const auto batch_size = max_batch_size;
|
||||
|
||||
while (probe_idx < probe_rows && matched_cnt < batch_size) {
|
||||
probe_idxs[matched_cnt] = probe_idx++;
|
||||
if constexpr (is_mark_join) {
|
||||
build_idxs[matched_cnt] = 0;
|
||||
}
|
||||
++matched_cnt;
|
||||
}
|
||||
|
||||
if constexpr (is_mark_join && !with_other_conjuncts) {
|
||||
mark_column->resize_fill(matched_cnt, 1);
|
||||
}
|
||||
|
||||
return std::tuple {probe_idx, 0U, matched_cnt};
|
||||
}
|
||||
|
||||
auto _find_batch_right_semi_anti(const Key* __restrict keys,
|
||||
const uint32_t* __restrict build_idx_map, int probe_idx,
|
||||
int probe_rows) {
|
||||
@ -532,6 +566,7 @@ private:
|
||||
Cell cell;
|
||||
doris::vectorized::Arena* pool;
|
||||
bool _has_null_key = false;
|
||||
bool _empty_build_side = true;
|
||||
};
|
||||
|
||||
template <typename Key, typename Mapped, typename Hash = DefaultHash<Key>,
|
||||
|
||||
@ -9,3 +9,10 @@
|
||||
|
||||
-- !select --
|
||||
|
||||
-- !anti_emtpy_right --
|
||||
\N
|
||||
1
|
||||
3
|
||||
|
||||
-- !semi_emtpy_right --
|
||||
|
||||
|
||||
@ -60,11 +60,21 @@ suite("test_null_aware_left_anti_join") {
|
||||
sql """ set parallel_pipeline_task_num=2; """
|
||||
qt_select """ select ${tableName2}.k1 from ${tableName2} where k1 not in (select ${tableName1}.k1 from ${tableName1}) order by ${tableName2}.k1; """
|
||||
|
||||
sql """
|
||||
drop table if exists ${tableName2};
|
||||
// In left anti join, if right side is empty, all rows(null included) of left should be output.
|
||||
qt_anti_emtpy_right """
|
||||
select
|
||||
*
|
||||
from ${tableName1} t1 where k1 not in (
|
||||
select k1 from ${tableName2} t2 where t2.k1 > 2
|
||||
) order by 1;
|
||||
"""
|
||||
|
||||
sql """
|
||||
drop table if exists ${tableName1};
|
||||
// In left semi join, if right side is empty, no row should be output.
|
||||
qt_semi_emtpy_right """
|
||||
select
|
||||
*
|
||||
from ${tableName1} t1 where k1 in (
|
||||
select k1 from ${tableName2} t2 where t2.k1 > 2
|
||||
) order by 1;
|
||||
"""
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user