[improvement]Add reading by rowids to speed up lazy materialization (#10506)
This commit is contained in:
@ -261,6 +261,38 @@ Status BinaryDictPageDecoder::next_batch(size_t* n, vectorized::MutableColumnPtr
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
Status BinaryDictPageDecoder::read_by_rowids(const rowid_t* rowids, ordinal_t page_first_ordinal,
|
||||
size_t* n, vectorized::MutableColumnPtr& dst) {
|
||||
if (_encoding_type == PLAIN_ENCODING) {
|
||||
return _data_page_decoder->read_by_rowids(rowids, page_first_ordinal, n, dst);
|
||||
}
|
||||
DCHECK(_parsed);
|
||||
DCHECK(_dict_decoder != nullptr) << "dict decoder pointer is nullptr";
|
||||
|
||||
if (PREDICT_FALSE(*n == 0)) {
|
||||
*n = 0;
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
const auto* data_array = reinterpret_cast<const int32_t*>(_bit_shuffle_ptr->get_data(0));
|
||||
auto total = *n;
|
||||
size_t read_count = 0;
|
||||
int32_t data[total];
|
||||
for (size_t i = 0; i < total; ++i) {
|
||||
ordinal_t ord = rowids[i] - page_first_ordinal;
|
||||
if (PREDICT_FALSE(ord >= _bit_shuffle_ptr->_num_elements)) {
|
||||
break;
|
||||
}
|
||||
|
||||
data[read_count++] = data_array[ord];
|
||||
}
|
||||
|
||||
if (LIKELY(read_count > 0))
|
||||
dst->insert_many_dict_data(data, 0, _dict_word_info, read_count, _dict_decoder->_num_elems);
|
||||
*n = read_count;
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
Status BinaryDictPageDecoder::next_batch(size_t* n, ColumnBlockView* dst) {
|
||||
if (_encoding_type == PLAIN_ENCODING) {
|
||||
return _data_page_decoder->next_batch(n, dst);
|
||||
|
||||
@ -108,6 +108,9 @@ public:
|
||||
|
||||
Status next_batch(size_t* n, vectorized::MutableColumnPtr& dst) override;
|
||||
|
||||
Status read_by_rowids(const rowid_t* rowids, ordinal_t page_first_ordinal, size_t* n,
|
||||
vectorized::MutableColumnPtr& dst) override;
|
||||
|
||||
size_t count() const override { return _data_page_decoder->count(); }
|
||||
|
||||
size_t current_index() const override { return _data_page_decoder->current_index(); }
|
||||
|
||||
@ -271,6 +271,38 @@ public:
|
||||
return Status::OK();
|
||||
};
|
||||
|
||||
Status read_by_rowids(const rowid_t* rowids, ordinal_t page_first_ordinal, size_t* n,
|
||||
vectorized::MutableColumnPtr& dst) override {
|
||||
DCHECK(_parsed);
|
||||
if (PREDICT_FALSE(*n == 0)) {
|
||||
*n = 0;
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
auto total = *n;
|
||||
size_t read_count = 0;
|
||||
uint32_t len_array[total];
|
||||
uint32_t start_offset_array[total];
|
||||
for (size_t i = 0; i < total; ++i) {
|
||||
ordinal_t ord = rowids[i] - page_first_ordinal;
|
||||
if (UNLIKELY(ord >= _num_elems)) {
|
||||
break;
|
||||
}
|
||||
|
||||
const uint32_t start_offset = offset(ord);
|
||||
start_offset_array[read_count] = start_offset;
|
||||
len_array[read_count] = offset(ord + 1) - start_offset;
|
||||
read_count++;
|
||||
}
|
||||
|
||||
if (LIKELY(read_count > 0))
|
||||
dst->insert_many_binary_data(_data.mutable_data(), len_array, start_offset_array,
|
||||
read_count);
|
||||
|
||||
*n = read_count;
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
size_t count() const override {
|
||||
DCHECK(_parsed);
|
||||
return _num_elems;
|
||||
|
||||
@ -408,6 +408,32 @@ public:
|
||||
return Status::OK();
|
||||
};
|
||||
|
||||
Status read_by_rowids(const rowid_t* rowids, ordinal_t page_first_ordinal, size_t* n,
|
||||
vectorized::MutableColumnPtr& dst) override {
|
||||
DCHECK(_parsed);
|
||||
if (PREDICT_FALSE(*n == 0)) {
|
||||
*n = 0;
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
auto total = *n;
|
||||
auto read_count = 0;
|
||||
CppType data[total];
|
||||
for (size_t i = 0; i < total; ++i) {
|
||||
ordinal_t ord = rowids[i] - page_first_ordinal;
|
||||
if (UNLIKELY(ord >= _num_elements)) {
|
||||
break;
|
||||
}
|
||||
|
||||
data[read_count++] = *reinterpret_cast<CppType*>(get_data(ord));
|
||||
}
|
||||
|
||||
if (LIKELY(read_count > 0)) dst->insert_many_fix_len_data((const char*)data, read_count);
|
||||
|
||||
*n = read_count;
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
Status peek_next_batch(size_t* n, ColumnBlockView* dst) override {
|
||||
return next_batch<false>(n, dst);
|
||||
}
|
||||
|
||||
@ -704,6 +704,80 @@ Status FileColumnIterator::next_batch(size_t* n, vectorized::MutableColumnPtr& d
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
Status FileColumnIterator::read_by_rowids(const rowid_t* rowids, const size_t count,
|
||||
vectorized::MutableColumnPtr& dst) {
|
||||
size_t remaining = count;
|
||||
size_t total_read_count = 0;
|
||||
size_t nrows_to_read = 0;
|
||||
while (remaining > 0) {
|
||||
RETURN_IF_ERROR(seek_to_ordinal(rowids[total_read_count]));
|
||||
|
||||
// number of rows to be read from this page
|
||||
nrows_to_read = std::min(remaining, _page.remaining());
|
||||
|
||||
if (_page.has_null) {
|
||||
size_t already_read = 0;
|
||||
while ((nrows_to_read - already_read) > 0) {
|
||||
bool is_null = false;
|
||||
size_t this_run = std::min(nrows_to_read - already_read, _page.remaining());
|
||||
if (UNLIKELY(this_run == 0)) {
|
||||
break;
|
||||
}
|
||||
this_run = _page.null_decoder.GetNextRun(&is_null, this_run);
|
||||
size_t offset = total_read_count + already_read;
|
||||
size_t this_read_count = 0;
|
||||
rowid_t current_ordinal_in_page = _page.offset_in_page + _page.first_ordinal;
|
||||
for (size_t i = 0; i < this_run; ++i) {
|
||||
if (rowids[offset + i] - current_ordinal_in_page >= this_run) {
|
||||
break;
|
||||
}
|
||||
this_read_count++;
|
||||
}
|
||||
|
||||
auto origin_index = _page.data_decoder->current_index();
|
||||
if (this_read_count > 0) {
|
||||
if (is_null) {
|
||||
auto* null_col =
|
||||
vectorized::check_and_get_column<vectorized::ColumnNullable>(dst);
|
||||
if (UNLIKELY(null_col == nullptr)) {
|
||||
return Status::InternalError("unexpected column type in column reader");
|
||||
}
|
||||
|
||||
const_cast<vectorized::ColumnNullable*>(null_col)->insert_null_elements(
|
||||
this_read_count);
|
||||
} else {
|
||||
size_t read_count = this_read_count;
|
||||
|
||||
// ordinal in nullable columns' data buffer maybe be not continuously(the data doesn't contain null value),
|
||||
// so we need use `page_start_off_in_decoder` to calculate the actual offset in `data_decoder`
|
||||
size_t page_start_off_in_decoder =
|
||||
_page.first_ordinal + _page.offset_in_page - origin_index;
|
||||
RETURN_IF_ERROR(_page.data_decoder->read_by_rowids(
|
||||
&rowids[offset], page_start_off_in_decoder, &read_count, dst));
|
||||
DCHECK_EQ(read_count, this_read_count);
|
||||
}
|
||||
}
|
||||
|
||||
if (!is_null) _page.data_decoder->seek_to_position_in_page(origin_index + this_run);
|
||||
|
||||
already_read += this_read_count;
|
||||
_page.offset_in_page += this_run;
|
||||
DCHECK(_page.offset_in_page <= _page.num_rows);
|
||||
}
|
||||
|
||||
nrows_to_read = already_read;
|
||||
total_read_count += nrows_to_read;
|
||||
remaining -= nrows_to_read;
|
||||
} else {
|
||||
_page.data_decoder->read_by_rowids(&rowids[total_read_count], _page.first_ordinal,
|
||||
&nrows_to_read, dst);
|
||||
total_read_count += nrows_to_read;
|
||||
remaining -= nrows_to_read;
|
||||
}
|
||||
}
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
Status FileColumnIterator::_load_next_page(bool* eos) {
|
||||
_page_iter.next();
|
||||
if (!_page_iter.valid()) {
|
||||
|
||||
@ -247,6 +247,11 @@ public:
|
||||
return Status::NotSupported("not implement");
|
||||
}
|
||||
|
||||
virtual Status read_by_rowids(const rowid_t* rowids, const size_t count,
|
||||
vectorized::MutableColumnPtr& dst) {
|
||||
return Status::NotSupported("not implement");
|
||||
}
|
||||
|
||||
virtual ordinal_t get_current_ordinal() const = 0;
|
||||
|
||||
virtual Status get_row_ranges_by_zone_map(CondColumn* cond_column, CondColumn* delete_condition,
|
||||
@ -283,6 +288,9 @@ public:
|
||||
|
||||
Status next_batch(size_t* n, vectorized::MutableColumnPtr& dst, bool* has_null) override;
|
||||
|
||||
Status read_by_rowids(const rowid_t* rowids, const size_t count,
|
||||
vectorized::MutableColumnPtr& dst) override;
|
||||
|
||||
ordinal_t get_current_ordinal() const override { return _current_ordinal; }
|
||||
|
||||
// get row ranges by zone map
|
||||
|
||||
@ -86,6 +86,11 @@ public:
|
||||
return Status::NotSupported("not implement vec op now");
|
||||
}
|
||||
|
||||
virtual Status read_by_rowids(const rowid_t* rowids, ordinal_t page_first_ordinal, size_t* n,
|
||||
vectorized::MutableColumnPtr& dst) {
|
||||
return Status::NotSupported("not implement vec op now");
|
||||
}
|
||||
|
||||
// Same as `next_batch` except for not moving forward the cursor.
|
||||
// When read array's ordinals in `ArrayFileColumnIterator`, we want to read one extra ordinal
|
||||
// but do not want to move forward the cursor.
|
||||
|
||||
@ -254,6 +254,38 @@ public:
|
||||
return Status::OK();
|
||||
};
|
||||
|
||||
Status read_by_rowids(const rowid_t* rowids, ordinal_t page_first_ordinal, size_t* n,
|
||||
vectorized::MutableColumnPtr& dst) override {
|
||||
DCHECK(_parsed);
|
||||
if (PREDICT_FALSE(*n == 0 || _cur_index >= _num_elements)) {
|
||||
*n = 0;
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
auto total = *n;
|
||||
bool result = false;
|
||||
size_t read_count = 0;
|
||||
CppType value;
|
||||
for (size_t i = 0; i < total; ++i) {
|
||||
ordinal_t ord = rowids[i] - page_first_ordinal;
|
||||
if (UNLIKELY(ord >= _num_elements)) {
|
||||
*n = read_count;
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
_rle_decoder.Skip(ord - _cur_index);
|
||||
_cur_index = ord;
|
||||
|
||||
result = _rle_decoder.Get(&value);
|
||||
_cur_index++;
|
||||
DCHECK(result);
|
||||
dst->insert_data((char*)(&value), SIZE_OF_TYPE);
|
||||
read_count++;
|
||||
}
|
||||
*n = read_count;
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
size_t count() const override { return _num_elements; }
|
||||
|
||||
size_t current_index() const override { return _cur_index; }
|
||||
|
||||
@ -699,28 +699,12 @@ void SegmentIterator::_vec_init_lazy_materialization() {
|
||||
|
||||
// Step 2: check non-predicate read costs to determine whether need lazy materialization
|
||||
// fill _non_predicate_columns.
|
||||
// note(wb) For block schema, query layer and storage layer may have some diff
|
||||
// query layer block schema not contains delete column, but storage layer appends delete column to end of block schema
|
||||
// When output block to query layer, delete column can be skipped.
|
||||
// _schema.column_ids() stands for storage layer block schema, so it contains delete columnid
|
||||
// we just regard delete column as common pred column here.
|
||||
// After some optimization, we suppose lazy materialization is better performance.
|
||||
if (_schema.column_ids().size() > pred_column_ids.size()) {
|
||||
for (auto cid : _schema.column_ids()) {
|
||||
if (!_is_pred_column[cid]) {
|
||||
_non_predicate_columns.push_back(cid);
|
||||
FieldType type = _schema.column(cid)->type();
|
||||
|
||||
// todo(wb) maybe we can make read char type faster
|
||||
// todo(wb) support map/array type
|
||||
// todo(wb) consider multiple integer columns cost, such as 1000 columns, maybe lazy materialization faster
|
||||
if (!_lazy_materialization_read &&
|
||||
(_is_need_vec_eval ||
|
||||
_is_need_short_eval) && // only when pred exists, we need to consider lazy materialization
|
||||
(type == OLAP_FIELD_TYPE_HLL || type == OLAP_FIELD_TYPE_OBJECT ||
|
||||
type == OLAP_FIELD_TYPE_VARCHAR || type == OLAP_FIELD_TYPE_CHAR ||
|
||||
type == OLAP_FIELD_TYPE_STRING || type == OLAP_FIELD_TYPE_BOOL ||
|
||||
type == OLAP_FIELD_TYPE_DATE || type == OLAP_FIELD_TYPE_DATETIME ||
|
||||
type == OLAP_FIELD_TYPE_DECIMAL)) {
|
||||
if (_is_need_vec_eval || _is_need_short_eval) {
|
||||
_lazy_materialization_read = true;
|
||||
}
|
||||
}
|
||||
@ -971,21 +955,13 @@ void SegmentIterator::_read_columns_by_rowids(std::vector<ColumnId>& read_column
|
||||
uint16_t* sel_rowid_idx, size_t select_size,
|
||||
vectorized::MutableColumns* mutable_columns) {
|
||||
SCOPED_RAW_TIMER(&_opts.stats->lazy_read_ns);
|
||||
size_t start_idx = 0;
|
||||
while (start_idx < select_size) {
|
||||
size_t end_idx = start_idx + 1;
|
||||
while (end_idx < select_size && (rowid_vector[sel_rowid_idx[end_idx - 1]] ==
|
||||
rowid_vector[sel_rowid_idx[end_idx]] - 1)) {
|
||||
end_idx++;
|
||||
}
|
||||
size_t range = end_idx - start_idx;
|
||||
{
|
||||
_opts.stats->block_lazy_read_seek_num += 1;
|
||||
SCOPED_RAW_TIMER(&_opts.stats->block_lazy_read_seek_ns);
|
||||
_seek_columns(read_column_ids, rowid_vector[sel_rowid_idx[start_idx]]);
|
||||
}
|
||||
_read_columns(read_column_ids, *mutable_columns, range);
|
||||
start_idx += range;
|
||||
std::vector<rowid_t> rowids(select_size);
|
||||
for (size_t i = 0; i < select_size; ++i) {
|
||||
rowids[i] = rowid_vector[sel_rowid_idx[i]];
|
||||
}
|
||||
for (auto cid : read_column_ids) {
|
||||
auto& column = (*mutable_columns)[cid];
|
||||
_column_iterators[cid]->read_by_rowids(rowids.data(), select_size, column);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@ -160,10 +160,23 @@ public:
|
||||
|
||||
void insert_many_binary_data(char* data_array, uint32_t* len_array,
|
||||
uint32_t* start_offset_array, size_t num) override {
|
||||
size_t new_size = 0;
|
||||
for (size_t i = 0; i < num; i++) {
|
||||
new_size += len_array[i] + 1;
|
||||
}
|
||||
|
||||
const size_t old_size = chars.size();
|
||||
chars.resize(old_size + new_size);
|
||||
|
||||
Char* data = chars.data();
|
||||
size_t offset = old_size;
|
||||
for (size_t i = 0; i < num; i++) {
|
||||
uint32_t len = len_array[i];
|
||||
uint32_t start_offset = start_offset_array[i];
|
||||
insert_data(data_array + start_offset, len);
|
||||
if (len) memcpy(data + offset, data_array + start_offset, len);
|
||||
data[offset + len] = 0;
|
||||
offset += len + 1;
|
||||
offsets.push_back(offset);
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
Reference in New Issue
Block a user