[fix](join) incorrect result of mark join (#30543)

incorrect result of mark join
This commit is contained in:
Jerry Hu
2024-01-31 17:59:27 +08:00
committed by yiguolei
parent 711b156a78
commit 77b366fc4b
11 changed files with 352 additions and 156 deletions

View File

@ -67,12 +67,11 @@ struct ProcessHashTableProbe {
// each matching join column need to be processed by other join conjunct. so the struct of mutable block
// and output block may be different
// The output result is determined by the other join conjunct result and same_to_prev struct
Status do_other_join_conjuncts(Block* output_block, bool is_mark_join,
std::vector<uint8_t>& visited, bool has_null_in_build_side);
Status do_other_join_conjuncts(Block* output_block, std::vector<uint8_t>& visited,
bool has_null_in_build_side);
template <bool with_other_conjuncts>
Status do_mark_join_conjuncts(Block* output_block, size_t hash_table_bucket_size,
const std::set<uint32_t>& null_result);
Status do_mark_join_conjuncts(Block* output_block, size_t hash_table_bucket_size);
template <typename HashTableType>
typename HashTableType::State _init_probe_side(HashTableType& hash_table_ctx, size_t probe_rows,
@ -85,6 +84,10 @@ struct ProcessHashTableProbe {
Status process_data_in_hashtable(HashTableType& hash_table_ctx, MutableBlock& mutable_block,
Block* output_block, bool* eos);
/// For null aware join with other conjuncts, if the probe key of one row on left side is null,
/// we should make this row match with all rows in build side.
size_t _process_probe_null_key(uint32_t probe_idx);
Parent* _parent = nullptr;
const int _batch_size;
const std::shared_ptr<Block>& _build_block;
@ -93,7 +96,15 @@ struct ProcessHashTableProbe {
std::vector<uint32_t> _probe_indexs;
bool _probe_visited = false;
bool _picking_null_keys = false;
std::vector<uint32_t> _build_indexs;
std::vector<uint8_t> _null_flags;
/// If the probe key of one row on left side is null,
/// we will make all rows in build side match with this row,
/// `_build_index_for_null_probe_key` is used to record the progress if the build block is too big.
uint32_t _build_index_for_null_probe_key {0};
std::vector<int> _build_blocks_locs;
// only need set the tuple is null in RIGHT_OUTER_JOIN and FULL_OUTER_JOIN
ColumnUInt8::Container* _tuple_is_null_left_flags = nullptr;

View File

@ -131,6 +131,11 @@ typename HashTableType::State ProcessHashTableProbe<JoinOpType, Parent>::_init_p
// may over batch size 1 for some outer join case
_probe_indexs.resize(_batch_size + 1);
_build_indexs.resize(_batch_size + 1);
if constexpr (JoinOpType == TJoinOp::NULL_AWARE_LEFT_ANTI_JOIN ||
JoinOpType == TJoinOp::NULL_AWARE_LEFT_SEMI_JOIN) {
_null_flags.resize(_batch_size + 1);
memset(_null_flags.data(), 0, _batch_size + 1);
}
if (!_parent->_ready_probe) {
_parent->_ready_probe = true;
@ -175,26 +180,41 @@ Status ProcessHashTableProbe<JoinOpType, Parent>::do_process(HashTableType& hash
auto& mcol = mutable_block.mutable_columns();
int current_offset = 0;
std::unique_ptr<ColumnFilterHelper> mark_column;
if (is_mark_join) {
mark_column = std::make_unique<ColumnFilterHelper>(*mcol[mcol.size() - 1]);
}
/// `null_result` set which contains the probe indexes of null results.
std::set<uint32_t> null_result;
if constexpr ((JoinOpType == doris::TJoinOp::NULL_AWARE_LEFT_ANTI_JOIN ||
JoinOpType == doris::TJoinOp::NULL_AWARE_LEFT_SEMI_JOIN) &&
with_other_conjuncts) {
SCOPED_TIMER(_search_hashtable_timer);
auto [new_probe_idx, new_build_idx, new_current_offset] =
hash_table_ctx.hash_table->find_null_aware_with_other_conjuncts(
hash_table_ctx.keys, hash_table_ctx.bucket_nums.data(), probe_index,
build_index, probe_rows, _probe_indexs.data(), _build_indexs.data(),
null_result, *(_parent->_build_indexes_null), _build_block->rows());
probe_index = new_probe_idx;
build_index = new_build_idx;
current_offset = new_current_offset;
/// If `_build_index_for_null_probe_key` is not zero, it means we are in progress of handling probe null key.
if (_build_index_for_null_probe_key) {
DCHECK_EQ(build_index, hash_table_ctx.hash_table->get_bucket_size());
current_offset = _process_probe_null_key(probe_index);
if (!_build_index_for_null_probe_key) {
probe_index++;
build_index = 0;
}
} else {
auto [new_probe_idx, new_build_idx, new_current_offset, picking_null_keys] =
hash_table_ctx.hash_table->find_null_aware_with_other_conjuncts(
hash_table_ctx.keys, hash_table_ctx.bucket_nums.data(), probe_index,
build_index, probe_rows, _probe_indexs.data(), _build_indexs.data(),
_null_flags.data(), _picking_null_keys);
probe_index = new_probe_idx;
build_index = new_build_idx;
current_offset = new_current_offset;
_picking_null_keys = picking_null_keys;
if (build_index == hash_table_ctx.hash_table->get_bucket_size()) {
_build_index_for_null_probe_key = 1;
if (current_offset == 0) {
current_offset = _process_probe_null_key(probe_index);
if (!_build_index_for_null_probe_key) {
probe_index++;
build_index = 0;
}
}
}
}
} else {
SCOPED_TIMER(_search_hashtable_timer);
auto [new_probe_idx, new_build_idx,
@ -203,7 +223,7 @@ Status ProcessHashTableProbe<JoinOpType, Parent>::do_process(HashTableType& hash
need_null_map_for_probe &&
ignore_null > (hash_table_ctx.keys, hash_table_ctx.bucket_nums.data(),
probe_index, build_index, probe_rows, _probe_indexs.data(),
_probe_visited, _build_indexs.data(), mark_column.get());
_probe_visited, _build_indexs.data());
probe_index = new_probe_idx;
build_index = new_build_idx;
current_offset = new_current_offset;
@ -235,20 +255,76 @@ Status ProcessHashTableProbe<JoinOpType, Parent>::do_process(HashTableType& hash
if constexpr (is_mark_join) {
return do_mark_join_conjuncts<with_other_conjuncts>(
output_block, hash_table_ctx.hash_table->get_bucket_size(), null_result);
output_block, hash_table_ctx.hash_table->get_bucket_size());
} else if constexpr (with_other_conjuncts) {
return do_other_join_conjuncts(output_block, is_mark_join,
hash_table_ctx.hash_table->get_visited(),
return do_other_join_conjuncts(output_block, hash_table_ctx.hash_table->get_visited(),
hash_table_ctx.hash_table->has_null_key());
}
return Status::OK();
}
template <int JoinOpType, typename Parent>
size_t ProcessHashTableProbe<JoinOpType, Parent>::_process_probe_null_key(uint32_t probe_index) {
const auto rows = _build_block->rows();
DCHECK_LT(_build_index_for_null_probe_key, rows);
DCHECK_LT(0, _build_index_for_null_probe_key);
size_t matched_cnt = 0;
for (; _build_index_for_null_probe_key < rows && matched_cnt < _batch_size; ++matched_cnt) {
_probe_indexs[matched_cnt] = probe_index;
_build_indexs[matched_cnt] = _build_index_for_null_probe_key++;
_null_flags[matched_cnt] = 1;
}
if (_build_index_for_null_probe_key == rows) {
_build_index_for_null_probe_key = 0;
_probe_indexs[matched_cnt] = probe_index;
_build_indexs[matched_cnt] = 0;
_null_flags[matched_cnt] = 0;
matched_cnt++;
}
return matched_cnt;
}
/**
* Mark join: there is a column named mark column which stores the result of mark join conjunct.
* For example:
* ```sql
* select * from t1 where t1.k1 not in (select t2.k1 from t2 where t2.k2 = t1.k2 and t2.k3 > t1.k3) or t1.k4 < 10;
* ```
* equal join conjuncts: t2.k2 = t1.k2
* mark join conjunct: t1.k1 = t2.k1
* other join conjuncts: t2.k3 > t1.k3
* other predicates: $c$1 or t1.k4 < 10 # `$c$1` means the result of mark join conjunct(mark column)
*
* Executing flow:
*
* Equal join conjuncts (probe hash table)
* ↓↓
* Mark join conjuncts (result is nullable, stored in mark column)
* ↓↓
* Other join conjuncts (update the mark column)
* ↓↓
* Other predicates (filter rows)
*
* ```sql
* select * from t1 where t1.k1 not in (select t2.k1 from t2 where t2.k3 > t1.k3) or t1.k4 < 10;
* ```
* This sql has no equal join conjuncts:
* equal join conjuncts: NAN
* mark join conjunct: t1.k1 = t2.k1
* other join conjuncts: t2.k3 > t1.k3
* other predicates: $c$1 or t1.k4 < 10 # `$c$1` means the result of mark join conjunct(mark column)
*
* To avoid using nested loop join, we use the mark join conjunct(`t1.k1 = t2.k1`) as the equal join conjunct.
* So this query will be a "null aware left anti join", which means the equal conjunct's result should be nullable.
*/
template <int JoinOpType, typename Parent>
template <bool with_other_conjuncts>
Status ProcessHashTableProbe<JoinOpType, Parent>::do_mark_join_conjuncts(
Block* output_block, size_t hash_table_bucket_size, const std::set<uint32_t>& null_result) {
Block* output_block, size_t hash_table_bucket_size) {
DCHECK(JoinOpType == TJoinOp::LEFT_ANTI_JOIN ||
JoinOpType == TJoinOp::NULL_AWARE_LEFT_ANTI_JOIN ||
JoinOpType == TJoinOp::LEFT_SEMI_JOIN ||
@ -260,6 +336,10 @@ Status ProcessHashTableProbe<JoinOpType, Parent>::do_mark_join_conjuncts(
JoinOpType == TJoinOp::NULL_AWARE_LEFT_ANTI_JOIN;
const auto row_count = output_block->rows();
if (!row_count) {
return Status::OK();
}
auto mark_column_mutable =
output_block->get_by_position(_parent->_mark_column_id).column->assume_mutable();
auto& mark_column = assert_cast<ColumnNullable&>(*mark_column_mutable);
@ -281,8 +361,7 @@ Status ProcessHashTableProbe<JoinOpType, Parent>::do_mark_join_conjuncts(
filter_data[i] = _build_indexs[i] != 0 && _build_indexs[i] != hash_table_bucket_size;
if constexpr (is_null_aware_join) {
if constexpr (with_other_conjuncts) {
mark_null_map[i] =
null_result.contains(_probe_indexs[i]) && _build_indexs[i] != 0;
mark_null_map[i] = _null_flags[i];
} else {
if (filter_data[i]) {
last_probe_matched = _probe_indexs[i];
@ -361,7 +440,7 @@ Status ProcessHashTableProbe<JoinOpType, Parent>::do_mark_join_conjuncts(
if constexpr (is_anti_join) {
// flip the mark column
for (size_t i = 0; i != row_count; ++i) {
mark_filter_data[i] ^= 1;
mark_filter_data[i] ^= 1; // not null/ null
}
}
@ -372,8 +451,7 @@ Status ProcessHashTableProbe<JoinOpType, Parent>::do_mark_join_conjuncts(
template <int JoinOpType, typename Parent>
Status ProcessHashTableProbe<JoinOpType, Parent>::do_other_join_conjuncts(
Block* output_block, bool is_mark_join, std::vector<uint8_t>& visited,
bool has_null_in_build_side) {
Block* output_block, std::vector<uint8_t>& visited, bool has_null_in_build_side) {
// dispose the other join conjunct exec
auto row_count = output_block->rows();
if (!row_count) {
@ -440,30 +518,18 @@ Status ProcessHashTableProbe<JoinOpType, Parent>::do_other_join_conjuncts(
for (size_t i = 0; i < row_count; ++i) {
bool not_matched_before = _parent->_last_probe_match != _probe_indexs[i];
// _build_indexs[i] == 0 means the end of this probe index
// if a probe row not matched with any build row, we need output a false value into mark column
if constexpr (JoinOpType == TJoinOp::LEFT_SEMI_JOIN) {
if (_build_indexs[i] == 0) {
filter_map[i] = is_mark_join && not_matched_before;
filter_column_ptr[i] = false;
filter_map[i] = false;
} else if (filter_column_ptr[i]) {
filter_map[i] = not_matched_before;
_parent->_last_probe_match = _probe_indexs[i];
} else {
if (filter_column_ptr[i]) {
filter_map[i] = not_matched_before;
_parent->_last_probe_match = _probe_indexs[i];
} else {
filter_map[i] = false;
}
filter_map[i] = false;
}
} else {
if (_build_indexs[i] == 0) {
if (not_matched_before) {
filter_map[i] = true;
} else if (is_mark_join) {
filter_map[i] = true;
filter_column_ptr[i] = false;
} else {
filter_map[i] = false;
}
filter_map[i] = not_matched_before;
} else {
filter_map[i] = false;
if (filter_column_ptr[i]) {
@ -473,21 +539,6 @@ Status ProcessHashTableProbe<JoinOpType, Parent>::do_other_join_conjuncts(
}
}
if (is_mark_join) {
auto mark_column =
output_block->get_by_position(orig_columns - 1).column->assume_mutable();
ColumnFilterHelper helper(*mark_column);
for (size_t i = 0; i < row_count; ++i) {
bool mathced = filter_column_ptr[i] &&
(_build_indexs[i] != 0) == (JoinOpType == TJoinOp::LEFT_SEMI_JOIN);
if (has_null_in_build_side && !mathced) {
helper.insert_null();
} else {
helper.insert_value(mathced);
}
}
}
output_block->get_by_position(result_column_id).column = std::move(new_filter_column);
} else if constexpr (JoinOpType == TJoinOp::RIGHT_SEMI_JOIN ||
JoinOpType == TJoinOp::RIGHT_ANTI_JOIN) {
@ -512,8 +563,7 @@ Status ProcessHashTableProbe<JoinOpType, Parent>::do_other_join_conjuncts(
JoinOpType == TJoinOp::NULL_AWARE_LEFT_ANTI_JOIN) {
orig_columns = _right_col_idx;
}
RETURN_IF_ERROR(Block::filter_block(output_block, result_column_id,
is_mark_join ? output_block->columns() : orig_columns));
RETURN_IF_ERROR(Block::filter_block(output_block, result_column_id, orig_columns));
}
return Status::OK();

View File

@ -180,12 +180,6 @@ Status HashJoinNode::init(const TPlanNode& tnode, RuntimeState* state) {
}
#endif
if ((_join_op == TJoinOp::NULL_AWARE_LEFT_ANTI_JOIN ||
_join_op == TJoinOp::NULL_AWARE_LEFT_SEMI_JOIN) &&
_have_other_join_conjunct) {
_build_indexes_null = std::make_shared<std::vector<uint32_t>>();
}
_runtime_filters.resize(_runtime_filter_descs.size());
for (size_t i = 0; i < _runtime_filter_descs.size(); i++) {
RETURN_IF_ERROR(state->runtime_filter_mgr()->register_producer_filter(
@ -761,7 +755,6 @@ Status HashJoinNode::sink(doris::RuntimeState* state, vectorized::Block* in_bloc
// arena will be shared with other instances.
_shared_hash_table_context->arena = _arena;
_shared_hash_table_context->block = _build_block;
_shared_hash_table_context->build_indexes_null = _build_indexes_null;
_shared_hash_table_context->hash_table_variants = _hash_table_variants;
_shared_hash_table_context->short_circuit_for_null_in_probe_side =
_has_null_in_build_side;
@ -794,7 +787,6 @@ Status HashJoinNode::sink(doris::RuntimeState* state, vectorized::Block* in_bloc
*std::static_pointer_cast<HashTableVariants>(
_shared_hash_table_context->hash_table_variants));
_build_block = _shared_hash_table_context->block;
_build_indexes_null = _shared_hash_table_context->build_indexes_null;
if (!_shared_hash_table_context->runtime_filters.empty()) {
auto ret = std::visit(

View File

@ -117,11 +117,6 @@ struct ProcessHashTableBuild {
for (uint32_t i = 1; i < _rows; i++) {
if ((*null_map)[i]) {
*has_null_key = true;
if constexpr (with_other_conjuncts &&
(JoinOpType == TJoinOp::NULL_AWARE_LEFT_ANTI_JOIN ||
JoinOpType == TJoinOp::NULL_AWARE_LEFT_SEMI_JOIN)) {
_parent->_build_indexes_null->emplace_back(i);
}
}
}
if (short_circuit_for_null && *has_null_key) {
@ -136,8 +131,8 @@ struct ProcessHashTableBuild {
hash_table_ctx.init_serialized_keys(_build_raw_ptrs, _rows,
null_map ? null_map->data() : nullptr, true, true,
hash_table_ctx.hash_table->get_bucket_size());
hash_table_ctx.hash_table->build(hash_table_ctx.keys, hash_table_ctx.bucket_nums.data(),
_rows);
hash_table_ctx.hash_table->template build<JoinOpType, with_other_conjuncts>(
hash_table_ctx.keys, hash_table_ctx.bucket_nums.data(), _rows);
hash_table_ctx.bucket_nums.resize(_batch_size);
hash_table_ctx.bucket_nums.shrink_to_fit();
@ -303,13 +298,6 @@ private:
std::vector<uint16_t> _probe_column_disguise_null;
std::vector<uint16_t> _probe_column_convert_to_null;
/*
* For null aware anti/semi join with other join conjuncts, we do need to care about the rows in
* build side with null keys,
* because the other join conjuncts' result maybe change null to false(null & false == false).
*/
std::shared_ptr<std::vector<uint32_t>> _build_indexes_null;
DataTypes _right_table_data_types;
DataTypes _left_table_data_types;
std::vector<std::string> _right_table_column_names;