From 620d33a76318a07247652a562b752d28c6d8a1fa Mon Sep 17 00:00:00 2001 From: Pxl Date: Thu, 25 Aug 2022 11:42:52 +0800 Subject: [PATCH] [Enchancement](optimize) set result_size_hint to filter_block (#11972) --- be/src/vec/columns/column_vector.cpp | 8 ++-- be/src/vec/columns/columns_common.cpp | 69 ++++++++++++--------------- be/src/vec/columns/columns_common.h | 38 --------------- be/src/vec/core/block.cpp | 23 ++------- be/src/vec/core/block.h | 3 ++ be/test/vec/core/block_test.cpp | 8 ++++ 6 files changed, 52 insertions(+), 97 deletions(-) diff --git a/be/src/vec/columns/column_vector.cpp b/be/src/vec/columns/column_vector.cpp index 51ed9371d1..31ea06ebad 100644 --- a/be/src/vec/columns/column_vector.cpp +++ b/be/src/vec/columns/column_vector.cpp @@ -297,7 +297,7 @@ ColumnPtr ColumnVector::filter(const IColumn::Filter& filt, ssize_t result_si auto res = this->create(); Container& res_data = res->get_data(); - if (result_size_hint) res_data.reserve(result_size_hint > 0 ? result_size_hint : size); + res_data.reserve(result_size_hint > 0 ? result_size_hint : size); const UInt8* filt_pos = filt.data(); const UInt8* filt_end = filt_pos + size; @@ -319,7 +319,7 @@ ColumnPtr ColumnVector::filter(const IColumn::Filter& filt, ssize_t result_si } else { while (mask) { const size_t idx = __builtin_ctzll(mask); - res_data.push_back(data_pos[idx]); + res_data.push_back_without_reserve(data_pos[idx]); mask = mask & (mask - 1); } } @@ -329,7 +329,9 @@ ColumnPtr ColumnVector::filter(const IColumn::Filter& filt, ssize_t result_si } while (filt_pos < filt_end) { - if (*filt_pos) res_data.push_back(*data_pos); + if (*filt_pos) { + res_data.push_back_without_reserve(*data_pos); + } ++filt_pos; ++data_pos; diff --git a/be/src/vec/columns/columns_common.cpp b/be/src/vec/columns/columns_common.cpp index d7c7ed0b14..02183b9876 100644 --- a/be/src/vec/columns/columns_common.cpp +++ b/be/src/vec/columns/columns_common.cpp @@ -29,7 +29,6 @@ #include "vec/columns/column.h" #include "vec/columns/column_vector.h" #include "vec/columns/columns_common.h" -#include "vec/common/typeid_cast.h" namespace doris::vectorized { @@ -48,7 +47,7 @@ size_t count_bytes_in_filter(const IColumn::Filter& filt) { const __m128i zero16 = _mm_setzero_si128(); const Int8* end64 = pos + filt.size() / 64 * 64; - for (; pos < end64; pos += 64) + for (; pos < end64; pos += 64) { count += __builtin_popcountll( static_cast(_mm_movemask_epi8(_mm_cmpgt_epi8( _mm_loadu_si128(reinterpret_cast(pos)), zero16))) | @@ -61,8 +60,9 @@ size_t count_bytes_in_filter(const IColumn::Filter& filt) { (static_cast(_mm_movemask_epi8(_mm_cmpgt_epi8( _mm_loadu_si128(reinterpret_cast(pos + 48)), zero16))) << 48)); + } - /// TODO Add duff device for tail? + /// TODO Add duff device for tail? #endif for (; pos < end; ++pos) { @@ -75,13 +75,17 @@ size_t count_bytes_in_filter(const IColumn::Filter& filt) { std::vector count_columns_size_in_selector(IColumn::ColumnIndex num_columns, const IColumn::Selector& selector) { std::vector counts(num_columns); - for (auto idx : selector) ++counts[idx]; + for (auto idx : selector) { + ++counts[idx]; + } return counts; } bool memory_is_byte(const void* data, size_t size, uint8_t byte) { - if (size == 0) return true; + if (size == 0) { + return true; + } auto ptr = reinterpret_cast(data); return *ptr == byte && memcmp(ptr, ptr + 1, size - 1) == 0; } @@ -104,16 +108,16 @@ struct ResultOffsetsBuilder { res_offsets.reserve(result_size_hint > 0 ? result_size_hint : src_size); } - void insertOne(size_t array_size) { + void insert_one(size_t array_size) { current_src_offset += array_size; - res_offsets.push_back(current_src_offset); + res_offsets.push_back_without_reserve(current_src_offset); } template - void insertChunk(const IColumn::Offset* src_offsets_pos, bool first, - IColumn::Offset chunk_offset, size_t chunk_size) { + void insert_chunk(const IColumn::Offset* src_offsets_pos, bool first, + IColumn::Offset chunk_offset, size_t chunk_size) { const auto offsets_size_old = res_offsets.size(); - res_offsets.resize(offsets_size_old + SIMD_BYTES); + res_offsets.resize_assume_reserved(offsets_size_old + SIMD_BYTES); memcpy(&res_offsets[offsets_size_old], src_offsets_pos, SIMD_BYTES * sizeof(IColumn::Offset)); @@ -125,7 +129,9 @@ struct ResultOffsetsBuilder { const auto res_offsets_pos = &res_offsets[offsets_size_old]; /// adjust offsets - for (size_t i = 0; i < SIMD_BYTES; ++i) res_offsets_pos[i] -= diff_offset; + for (size_t i = 0; i < SIMD_BYTES; ++i) { + res_offsets_pos[i] -= diff_offset; + } } } current_src_offset += chunk_size; @@ -135,10 +141,10 @@ struct ResultOffsetsBuilder { struct NoResultOffsetsBuilder { explicit NoResultOffsetsBuilder(IColumn::Offsets*) {} void reserve(ssize_t, size_t) {} - void insertOne(size_t) {} + void insert_one(size_t) {} template - void insertChunk(const IColumn::Offset*, bool, IColumn::Offset, size_t) {} + void insert_chunk(const IColumn::Offset*, bool, IColumn::Offset, size_t) {} }; template @@ -151,15 +157,15 @@ void filter_arrays_impl_generic(const PaddedPODArray& src_elems, LOG(FATAL) << "Size of filter doesn't match size of column."; } + constexpr int ASSUME_STRING_LENGTH = 5; ResultOffsetsBuilder result_offsets_builder(res_offsets); - if (result_size_hint) { - result_offsets_builder.reserve(result_size_hint, size); + result_offsets_builder.reserve(result_size_hint, size); - if (result_size_hint < 0) - res_elems.reserve(src_elems.size()); - else if (result_size_hint < 1000000000 && src_elems.size() < 1000000000) /// Avoid overflow. - res_elems.reserve((result_size_hint * src_elems.size() + size - 1) / size); + if (result_size_hint < 0) { + res_elems.reserve(src_elems.size() * ASSUME_STRING_LENGTH); + } else if (result_size_hint < 1000000000 && src_elems.size() < 1000000000) { /// Avoid overflow. + res_elems.reserve(result_size_hint * ASSUME_STRING_LENGTH); } const UInt8* filt_pos = filt.data(); @@ -173,7 +179,7 @@ void filter_arrays_impl_generic(const PaddedPODArray& src_elems, const auto arr_offset = offset_ptr == offsets_begin ? 0 : offset_ptr[-1]; const auto arr_size = *offset_ptr - arr_offset; - result_offsets_builder.insertOne(arr_size); + result_offsets_builder.insert_one(arr_size); const auto elems_size_old = res_elems.size(); res_elems.resize(elems_size_old + arr_size); @@ -193,8 +199,8 @@ void filter_arrays_impl_generic(const PaddedPODArray& src_elems, const auto chunk_offset = first ? 0 : offsets_pos[-1]; const auto chunk_size = offsets_pos[SIMD_BYTES - 1] - chunk_offset; - result_offsets_builder.template insertChunk(offsets_pos, first, - chunk_offset, chunk_size); + result_offsets_builder.template insert_chunk(offsets_pos, first, + chunk_offset, chunk_size); /// copy elements for SIMD_BYTES arrays at once const auto elems_size_old = res_elems.size(); @@ -213,7 +219,9 @@ void filter_arrays_impl_generic(const PaddedPODArray& src_elems, } while (filt_pos < filt_end) { - if (*filt_pos) copy_array(offsets_pos); + if (*filt_pos) { + copy_array(offsets_pos); + } ++filt_pos; ++offsets_pos; @@ -259,19 +267,4 @@ INSTANTIATE(Float64) #undef INSTANTIATE -namespace detail { -template -const PaddedPODArray* get_indexes_data(const IColumn& indexes) { - auto* column = typeid_cast*>(&indexes); - if (column) return &column->get_data(); - - return nullptr; -} - -template const PaddedPODArray* get_indexes_data(const IColumn& indexes); -template const PaddedPODArray* get_indexes_data(const IColumn& indexes); -template const PaddedPODArray* get_indexes_data(const IColumn& indexes); -template const PaddedPODArray* get_indexes_data(const IColumn& indexes); -} // namespace detail - } // namespace doris::vectorized diff --git a/be/src/vec/columns/columns_common.h b/be/src/vec/columns/columns_common.h index ef9c00c72c..dc9116e343 100644 --- a/be/src/vec/columns/columns_common.h +++ b/be/src/vec/columns/columns_common.h @@ -50,42 +50,4 @@ void filter_arrays_impl_only_data(const PaddedPODArray& src_elems, const IColumn::Offsets& src_offsets, PaddedPODArray& res_elems, const IColumn::Filter& filt, ssize_t result_size_hint); -namespace detail { -template -const PaddedPODArray* get_indexes_data(const IColumn& indexes); -} - -/// Check limit <= indexes->size() and call column.index_impl(const PaddedPodArray & indexes, UInt64 limit). -template -ColumnPtr select_index_impl(const Column& column, const IColumn& indexes, size_t limit) { - if (limit == 0) limit = indexes.size(); - - if (indexes.size() < limit) { - LOG(FATAL) << "Size of indexes is less than required."; - } - - if (auto* data_uint8 = detail::get_indexes_data(indexes)) - return column.template index_impl(*data_uint8, limit); - else if (auto* data_uint16 = detail::get_indexes_data(indexes)) - return column.template index_impl(*data_uint16, limit); - else if (auto* data_uint32 = detail::get_indexes_data(indexes)) - return column.template index_impl(*data_uint32, limit); - else if (auto* data_uint64 = detail::get_indexes_data(indexes)) - return column.template index_impl(*data_uint64, limit); - else { - LOG(FATAL) << "Indexes column for IColumn::select must be ColumnUInt, got" - << indexes.get_name(); - return nullptr; - } -} - -#define INSTANTIATE_INDEX_IMPL(Column) \ - template ColumnPtr Column::indexImpl(const PaddedPODArray& indexes, \ - size_t limit) const; \ - template ColumnPtr Column::indexImpl(const PaddedPODArray& indexes, \ - size_t limit) const; \ - template ColumnPtr Column::indexImpl(const PaddedPODArray& indexes, \ - size_t limit) const; \ - template ColumnPtr Column::indexImpl(const PaddedPODArray& indexes, \ - size_t limit) const; } // namespace doris::vectorized diff --git a/be/src/vec/core/block.cpp b/be/src/vec/core/block.cpp index 8728803a4d..5c30a78a3a 100644 --- a/be/src/vec/core/block.cpp +++ b/be/src/vec/core/block.cpp @@ -23,11 +23,6 @@ #include #include -#include -#include -#include -#include - #include "common/status.h" #include "runtime/descriptors.h" #include "runtime/row_batch.h" @@ -35,26 +30,17 @@ #include "runtime/tuple_row.h" #include "udf/udf.h" #include "util/block_compression.h" +#include "util/simd/bits.h" #include "vec/columns/column.h" #include "vec/columns/column_const.h" #include "vec/columns/column_nullable.h" #include "vec/columns/column_string.h" #include "vec/columns/column_vector.h" -#include "vec/columns/columns_common.h" #include "vec/columns/columns_number.h" #include "vec/common/assert_cast.h" -#include "vec/common/exception.h" #include "vec/common/string_ref.h" #include "vec/common/typeid_cast.h" -#include "vec/data_types/data_type_bitmap.h" -#include "vec/data_types/data_type_date.h" -#include "vec/data_types/data_type_date_time.h" -#include "vec/data_types/data_type_decimal.h" #include "vec/data_types/data_type_factory.hpp" -#include "vec/data_types/data_type_hll.h" -#include "vec/data_types/data_type_nullable.h" -#include "vec/data_types/data_type_number.h" -#include "vec/data_types/data_type_string.h" namespace doris::vectorized { @@ -628,8 +614,9 @@ void Block::update_hash(SipHash& hash) const { } } -void filter_block_internal(Block* block, const IColumn::Filter& filter, uint32_t column_to_keep) { - auto count = count_bytes_in_filter(filter); +void Block::filter_block_internal(Block* block, const IColumn::Filter& filter, + uint32_t column_to_keep) { + size_t count = filter.size() - simd::count_zero_num((int8_t*)filter.data(), filter.size()); if (count == 0) { for (size_t i = 0; i < column_to_keep; ++i) { std::move(*block->get_by_position(i).column).assume_mutable()->clear(); @@ -638,7 +625,7 @@ void filter_block_internal(Block* block, const IColumn::Filter& filter, uint32_t if (count != block->rows()) { for (size_t i = 0; i < column_to_keep; ++i) { block->get_by_position(i).column = - block->get_by_position(i).column->filter(filter, 0); + block->get_by_position(i).column->filter(filter, count); } } } diff --git a/be/src/vec/core/block.h b/be/src/vec/core/block.h index b4fd18bb0c..1d7ffb6a22 100644 --- a/be/src/vec/core/block.h +++ b/be/src/vec/core/block.h @@ -257,6 +257,9 @@ public: // copy a new block by the offset column Block copy_block(const std::vector& column_offset) const; + static void filter_block_internal(Block* block, const IColumn::Filter& filter, + uint32_t column_to_keep); + static Status filter_block(Block* block, int filter_column_id, int column_to_keep); static void erase_useless_column(Block* block, int column_to_keep) { diff --git a/be/test/vec/core/block_test.cpp b/be/test/vec/core/block_test.cpp index a3a5be3b26..3188808b54 100644 --- a/be/test/vec/core/block_test.cpp +++ b/be/test/vec/core/block_test.cpp @@ -501,5 +501,13 @@ TEST(BlockTest, dump_data) { fill_block_with_array_string(block1); // Note: here we should set 'row_num' in dump_data EXPECT_GT(block1.dump_data(10).size(), 1); + + vectorized::IColumn::Filter filter; + int size = block1.rows() / 2; + for (int i = 0; i < block1.rows(); i++) { + filter.push_back(i % 2); + } + vectorized::Block::filter_block_internal(&block1, filter, block1.columns()); + EXPECT_EQ(size, block1.rows()); } } // namespace doris