diff --git a/be/src/olap/rowset/segment_v2/column_reader.cpp b/be/src/olap/rowset/segment_v2/column_reader.cpp index d65c355174..761140852a 100644 --- a/be/src/olap/rowset/segment_v2/column_reader.cpp +++ b/be/src/olap/rowset/segment_v2/column_reader.cpp @@ -775,39 +775,58 @@ Status DefaultValueColumnIterator::next_batch(size_t* n, ColumnBlockView* dst, b void DefaultValueColumnIterator::insert_default_data(vectorized::MutableColumnPtr &dst, size_t n) { vectorized::Int128 int128; - char* data_ptr = (char*)&int128; + char* data_ptr = (char *) &int128; size_t data_len = sizeof(int128); - auto type = _type_info->type(); - if (type == OLAP_FIELD_TYPE_DATE) { - assert(_type_size == sizeof(FieldTypeTraits::CppType)); //uint24_t - std::string str = FieldTypeTraits::to_string(_mem_value); + auto insert_column_data = [&]() { + for (size_t i = 0; i < n; ++i) { + dst->insert_data(data_ptr, data_len); + } + }; - vectorized::VecDateTimeValue value; - value.from_date_str(str.c_str(), str.length()); - value.cast_to_date(); - //TODO: here is int128 = int64 - int128 = binary_cast(value); - } else if (type == OLAP_FIELD_TYPE_DATETIME) { - assert(_type_size == sizeof(FieldTypeTraits::CppType)); //int64_t - std::string str = FieldTypeTraits::to_string(_mem_value); + switch (_type_info->type()) { + case OLAP_FIELD_TYPE_OBJECT: + case OLAP_FIELD_TYPE_HLL:{ + dst->insert_many_defaults(n); + break; + } - vectorized::VecDateTimeValue value; - value.from_date_str(str.c_str(), str.length()); - value.to_datetime(); + case OLAP_FIELD_TYPE_DATE: { + assert(_type_size == sizeof(FieldTypeTraits::CppType)); //uint24_t + std::string str = FieldTypeTraits::to_string(_mem_value); - int128 = binary_cast(value); - } else if (type == OLAP_FIELD_TYPE_DECIMAL) { - assert(_type_size == sizeof(FieldTypeTraits::CppType)); //decimal12_t - decimal12_t* d = (decimal12_t*)_mem_value; - int128 = DecimalV2Value(d->integer, d->fraction).value(); - } else { - data_ptr = (char*)_mem_value; - data_len = _type_size; - } + vectorized::VecDateTimeValue value; + value.from_date_str(str.c_str(), str.length()); + value.cast_to_date(); + //TODO: here is int128 = int64, here rely on the logic of little endian + int128 = binary_cast(value); + insert_column_data(); + break; + } + case OLAP_FIELD_TYPE_DATETIME: { + assert(_type_size == sizeof(FieldTypeTraits::CppType)); //int64_t + std::string str = FieldTypeTraits::to_string(_mem_value); - for (size_t i = 0; i < n; ++i) { - dst->insert_data(data_ptr, data_len); + vectorized::VecDateTimeValue value; + value.from_date_str(str.c_str(), str.length()); + value.to_datetime(); + + int128 = binary_cast(value); + insert_column_data(); + break; + } + case OLAP_FIELD_TYPE_DECIMAL: { + assert(_type_size == sizeof(FieldTypeTraits::CppType)); //decimal12_t + decimal12_t *d = (decimal12_t *) _mem_value; + int128 = DecimalV2Value(d->integer, d->fraction).value(); + insert_column_data(); + break; + } + default: { + data_ptr = (char *) _mem_value; + data_len = _type_size; + insert_column_data(); + } } } diff --git a/be/src/olap/rowset/segment_v2/segment_iterator.cpp b/be/src/olap/rowset/segment_v2/segment_iterator.cpp index ed0429cdec..53bf87a639 100644 --- a/be/src/olap/rowset/segment_v2/segment_iterator.cpp +++ b/be/src/olap/rowset/segment_v2/segment_iterator.cpp @@ -134,6 +134,7 @@ Status SegmentIterator::_init(bool is_vec) { RETURN_IF_ERROR(_get_row_ranges_by_column_conditions()); if (is_vec) { _vec_init_lazy_materialization(); + _vec_init_char_column_id(); } else { _init_lazy_materialization(); } @@ -707,6 +708,17 @@ void SegmentIterator::_vec_init_lazy_materialization() { } } +void SegmentIterator::_vec_init_char_column_id() { + for (size_t i = 0; i < _schema.num_column_ids(); i++) { + auto cid = _schema.column_id(i); + auto column_desc = _schema.column(cid); + + if (column_desc->type() == OLAP_FIELD_TYPE_CHAR) { + _char_type_idx.emplace_back(i); + } + } +} + Status SegmentIterator::_read_columns(const std::vector& column_ids, vectorized::MutableColumns& column_block, size_t nrows) { for (auto cid : column_ids) { @@ -720,8 +732,6 @@ Status SegmentIterator::_read_columns(const std::vector& column_ids, void SegmentIterator::_init_current_block( vectorized::Block* block, std::vector& current_columns) { - _char_type_idx.clear(); - bool is_block_mem_reuse = block->mem_reuse(); if (is_block_mem_reuse) { block->clear_column_data(_schema.num_column_ids()); @@ -738,11 +748,7 @@ void SegmentIterator::_init_current_block( auto cid = _schema.column_id(i); auto column_desc = _schema.column(cid); - if (column_desc->type() == OLAP_FIELD_TYPE_CHAR) { - _char_type_idx.emplace_back(i); - } - - if (_is_pred_column[cid]) { //todo(wb) maybe we can relase it after output block + if (_is_pred_column[cid]) { //todo(wb) maybe we can release it after output block current_columns[cid]->clear(); } else { // non-predicate column if (is_block_mem_reuse) { diff --git a/be/src/olap/rowset/segment_v2/segment_iterator.h b/be/src/olap/rowset/segment_v2/segment_iterator.h index 07ee964369..42a2cafc90 100644 --- a/be/src/olap/rowset/segment_v2/segment_iterator.h +++ b/be/src/olap/rowset/segment_v2/segment_iterator.h @@ -79,6 +79,10 @@ private: void _init_lazy_materialization(); void _vec_init_lazy_materialization(); + // TODO: Fix Me + // CHAR type in storge layer padding the 0 in length. But query engine need ignore the padding 0. + // so segment iterator need to shrink char column before output it. only use in vec query engine. + void _vec_init_char_column_id(); uint32_t segment_id() const { return _segment->id(); } uint32_t num_rows() const { return _segment->num_rows(); } diff --git a/be/src/vec/core/block.cpp b/be/src/vec/core/block.cpp index ca44be9e8f..dc970df517 100644 --- a/be/src/vec/core/block.cpp +++ b/be/src/vec/core/block.cpp @@ -924,22 +924,24 @@ std::unique_ptr Block::create_same_struct_block(size_t size) const { return temp_block; } -void Block::shrink_char_type_column_suffix_zero(std::vector char_type_idx) { +void Block::shrink_char_type_column_suffix_zero(const std::vector& char_type_idx) { for (auto idx : char_type_idx) { - if (this->get_by_position(idx).column->is_nullable()) { - this->get_by_position(idx).column = ColumnNullable::create( - reinterpret_cast( - reinterpret_cast( - this->get_by_position(idx).column.get()) - ->get_nested_column_ptr() - .get()) - ->get_shinked_column(), - reinterpret_cast(this->get_by_position(idx).column.get()) - ->get_null_map_column_ptr()); - } else { - this->get_by_position(idx).column = - reinterpret_cast(this->get_by_position(idx).column.get()) - ->get_shinked_column(); + if (idx < data.size()) { + if (this->get_by_position(idx).column->is_nullable()) { + this->get_by_position(idx).column = ColumnNullable::create( + reinterpret_cast( + reinterpret_cast( + this->get_by_position(idx).column.get()) + ->get_nested_column_ptr() + .get()) + ->get_shinked_column(), + reinterpret_cast(this->get_by_position(idx).column.get()) + ->get_null_map_column_ptr()); + } else { + this->get_by_position(idx).column = + reinterpret_cast(this->get_by_position(idx).column.get()) + ->get_shinked_column(); + } } } } diff --git a/be/src/vec/core/block.h b/be/src/vec/core/block.h index 8fa4a3004a..82d85145ca 100644 --- a/be/src/vec/core/block.h +++ b/be/src/vec/core/block.h @@ -104,6 +104,16 @@ public: doris::vectorized::IColumn* input_col_ptr, uint16_t* sel_rowid_idx, uint16_t select_size, int block_cid, size_t batch_size) { + // Only the additional deleted filter condition need to materialize column be at the end of the block + // We should not to materialize the column of query engine do not need. So here just return OK. + // Eg: + // `delete from table where a = 10;` + // `select b from table;` + // a column only effective in segment iterator, the block from query engine only contain the b column. + // so the `block_cid >= data.size()` is true + if (block_cid >= data.size()) + return Status::OK(); + if (is_block_mem_reuse) { auto* raw_res_ptr = this->get_by_position(block_cid).column.get(); const_cast(raw_res_ptr)->reserve(batch_size); @@ -296,7 +306,7 @@ public: doris::Tuple* deep_copy_tuple(const TupleDescriptor&, MemPool*, int, int, bool padding_char = false); - void shrink_char_type_column_suffix_zero(std::vector char_type_idx); + void shrink_char_type_column_suffix_zero(const std::vector& char_type_idx); private: void erase_impl(size_t position);