[Opt](exec) opt the dispose nullable column logic (#17192)

This commit is contained in:
HappenLee
2023-03-01 23:25:40 +08:00
committed by GitHub
parent 633f2d52a4
commit 1244eed1cd
5 changed files with 54 additions and 36 deletions

View File

@ -177,15 +177,26 @@ void Block::erase(const std::set<size_t>& positions) {
}
}
void Block::erase(size_t position) {
if (data.empty()) {
LOG(FATAL) << "Block is empty";
void Block::erase_tail(size_t start) {
DCHECK(start <= data.size()) << fmt::format(
"Position out of bound in Block::erase(), max position = {}", data.size());
data.erase(data.begin() + start, data.end());
for (auto it = index_by_name.begin(); it != index_by_name.end();) {
if (it->second >= start) {
index_by_name.erase(it++);
} else {
++it;
}
}
if (start < row_same_bit.size()) {
row_same_bit.erase(row_same_bit.begin() + start, row_same_bit.end());
}
}
if (position >= data.size()) {
LOG(FATAL) << fmt::format("Position out of bound in Block::erase(), max position = {}",
data.size() - 1);
}
void Block::erase(size_t position) {
DCHECK(!data.empty()) << "Block is empty";
DCHECK(position < data.size()) << fmt::format(
"Position out of bound in Block::erase(), max position = {}", data.size() - 1);
erase_impl(position);
}
@ -700,21 +711,16 @@ void Block::append_block_by_selector(MutableBlock* dst, const IColumn::Selector&
Status Block::filter_block(Block* block, const std::vector<uint32_t>& columns_to_filter,
int filter_column_id, int column_to_keep) {
ColumnPtr filter_column = block->get_by_position(filter_column_id).column;
const auto& filter_column = block->get_by_position(filter_column_id).column;
if (auto* nullable_column = check_and_get_column<ColumnNullable>(*filter_column)) {
ColumnPtr nested_column = nullable_column->get_nested_column_ptr();
const auto& nested_column = nullable_column->get_nested_column_ptr();
MutableColumnPtr mutable_holder =
nested_column->use_count() == 1
? nested_column->assume_mutable()
: nested_column->clone_resized(nested_column->size());
ColumnUInt8* concrete_column = typeid_cast<ColumnUInt8*>(mutable_holder.get());
if (!concrete_column) {
return Status::InvalidArgument(
"Illegal type {} of column for filter. Must be UInt8 or Nullable(UInt8).",
filter_column->get_name());
}
ColumnUInt8* concrete_column = assert_cast<ColumnUInt8*>(mutable_holder.get());
auto* __restrict null_map = nullable_column->get_null_map_data().data();
IColumn::Filter& filter = concrete_column->get_data();
auto* __restrict filter_data = filter.data();

View File

@ -100,6 +100,8 @@ public:
void insert_unique(ColumnWithTypeAndName&& elem);
/// remove the column at the specified position
void erase(size_t position);
/// remove the column at the [start, end)
void erase_tail(size_t start);
/// remove the columns at the specified positions
void erase(const std::set<size_t>& positions);
/// remove the column with the specified name
@ -290,9 +292,7 @@ public:
static Status filter_block(Block* block, int filter_column_id, int column_to_keep);
static void erase_useless_column(Block* block, int column_to_keep) {
for (int i = block->columns() - 1; i >= column_to_keep; --i) {
block->erase(i);
}
block->erase_tail(column_to_keep);
}
// serialize block to PBlock

View File

@ -256,7 +256,7 @@ Status RowGroupReader::_do_lazy_read(Block* block, size_t batch_size, size_t* re
// generated from next batch, so the filter column is removed ahead.
DCHECK_EQ(block->rows(), 0);
} else {
ColumnPtr& filter_column = block->get_by_position(filter_column_id).column;
const auto& filter_column = block->get_by_position(filter_column_id).column;
RETURN_IF_ERROR(_filter_block(block, filter_column, origin_column_num,
_lazy_read_ctx.all_predicate_col_ids));
}
@ -496,7 +496,7 @@ Status RowGroupReader::_build_pos_delete_filter(size_t read_rows) {
Status RowGroupReader::_filter_block(Block* block, const ColumnPtr& filter_column,
int column_to_keep, std::vector<uint32_t> columns_to_filter) {
if (auto* nullable_column = check_and_get_column<ColumnNullable>(*filter_column)) {
const ColumnPtr& nested_column = nullable_column->get_nested_column_ptr();
const auto& nested_column = nullable_column->get_nested_column_ptr();
MutableColumnPtr mutable_holder =
nested_column->use_count() == 1

View File

@ -525,9 +525,9 @@ Status VNestedLoopJoinNode::_do_filtering_and_update_visited_flags(Block* block,
DCHECK((*_vjoin_conjunct_ptr) != nullptr);
int result_column_id = -1;
RETURN_IF_ERROR((*_vjoin_conjunct_ptr)->execute(block, &result_column_id));
ColumnPtr filter_column = block->get_by_position(result_column_id).column;
const auto& filter_column = block->get_by_position(result_column_id).column;
if (auto* nullable_column = check_and_get_column<ColumnNullable>(*filter_column)) {
ColumnPtr nested_column = nullable_column->get_nested_column_ptr();
const auto& nested_column = nullable_column->get_nested_column_ptr();
MutableColumnPtr mutable_holder =
nested_column->use_count() == 1

View File

@ -37,35 +37,40 @@ namespace doris::vectorized {
ColumnPtr wrap_in_nullable(const ColumnPtr& src, const Block& block, const ColumnNumbers& args,
size_t result, size_t input_rows_count) {
ColumnPtr result_null_map_column;
/// If result is already nullable.
ColumnPtr src_not_nullable = src;
MutableColumnPtr mutable_result_null_map_column;
if (src->only_null())
return src;
else if (auto* nullable = check_and_get_column<ColumnNullable>(*src)) {
if (auto* nullable = check_and_get_column<ColumnNullable>(*src)) {
src_not_nullable = nullable->get_nested_column_ptr();
result_null_map_column = nullable->get_null_map_column_ptr();
}
for (const auto& arg : args) {
const ColumnWithTypeAndName& elem = block.get_by_position(arg);
if (!elem.type->is_nullable()) continue;
if (!elem.type->is_nullable()) {
continue;
}
bool is_const = is_column_const(*elem.column);
/// Const Nullable that are NULL.
if (elem.column->only_null())
if (is_const && assert_cast<const ColumnConst*>(elem.column.get())->only_null()) {
return block.get_by_position(result).type->create_column_const(input_rows_count,
Null());
}
if (is_const) {
continue;
}
if (is_column_const(*elem.column)) continue;
if (auto* nullable = check_and_get_column<ColumnNullable>(*elem.column)) {
if (auto* nullable = assert_cast<const ColumnNullable*>(elem.column.get())) {
const ColumnPtr& null_map_column = nullable->get_null_map_column_ptr();
if (!result_null_map_column) {
result_null_map_column = null_map_column->clone_resized(null_map_column->size());
result_null_map_column = null_map_column->clone_resized(input_rows_count);
} else {
MutableColumnPtr mutable_result_null_map_column =
(*std::move(result_null_map_column)).assume_mutable();
if (!mutable_result_null_map_column) {
mutable_result_null_map_column =
(*std::move(result_null_map_column)).assume_mutable();
}
NullMap& result_null_map =
assert_cast<ColumnUInt8&>(*mutable_result_null_map_column).get_data();
@ -73,12 +78,19 @@ ColumnPtr wrap_in_nullable(const ColumnPtr& src, const Block& block, const Colum
assert_cast<const ColumnUInt8&>(*null_map_column).get_data();
VectorizedUtils::update_null_map(result_null_map, src_null_map);
result_null_map_column = std::move(mutable_result_null_map_column);
}
}
}
if (!result_null_map_column) return make_nullable(src);
if (!result_null_map_column) {
if (is_column_const(*src)) {
return ColumnConst::create(
make_nullable(assert_cast<const ColumnConst&>(*src).get_data_column_ptr(),
false),
input_rows_count);
}
return ColumnNullable::create(src, ColumnUInt8::create(input_rows_count, 0));
}
return ColumnNullable::create(src_not_nullable->convert_to_full_column_if_const(),
result_null_map_column);