[improvement](stream-load) use vector instead of skiplist when insert dup keys (#18686)
This commit is contained in:
@ -214,8 +214,7 @@ void MemTable::_insert_one_row_from_block(RowInBlock* row_in_block) {
|
||||
_rows++;
|
||||
bool overwritten = false;
|
||||
if (_keys_type == KeysType::DUP_KEYS) {
|
||||
// TODO: dup keys only need sort opertaion. Rethink skiplist is the beat way to sort columns?
|
||||
_vec_skip_list->Insert(row_in_block, &overwritten);
|
||||
// for dup keys, already store row_in_block in vector and will sort it on flush stage.
|
||||
DCHECK(!overwritten) << "Duplicate key model meet overwrite in SkipList";
|
||||
return;
|
||||
}
|
||||
@ -266,11 +265,23 @@ void MemTable::_collect_vskiplist_results() {
|
||||
VecTable::Iterator it(_vec_skip_list.get());
|
||||
vectorized::Block in_block = _input_mutable_block.to_block();
|
||||
if (_keys_type == KeysType::DUP_KEYS) {
|
||||
vectorized::MutableBlock mutable_block =
|
||||
vectorized::MutableBlock::build_mutable_block(&in_block);
|
||||
_vec_row_comparator->set_block(&mutable_block);
|
||||
std::sort(_row_in_blocks.begin(), _row_in_blocks.end(),
|
||||
[this](const RowInBlock* l, const RowInBlock* r) -> bool {
|
||||
auto value = (*(this->_vec_row_comparator))(l, r);
|
||||
if (value == 0) {
|
||||
return l->_row_pos > r->_row_pos;
|
||||
} else {
|
||||
return value < 0;
|
||||
}
|
||||
});
|
||||
std::vector<int> row_pos_vec;
|
||||
DCHECK(in_block.rows() <= std::numeric_limits<int>::max());
|
||||
row_pos_vec.reserve(in_block.rows());
|
||||
for (it.SeekToFirst(); it.Valid(); it.Next()) {
|
||||
row_pos_vec.emplace_back(it.key()->_row_pos);
|
||||
for (int i = 0; i < _row_in_blocks.size(); i++) {
|
||||
row_pos_vec.emplace_back(_row_in_blocks[i]->_row_pos);
|
||||
}
|
||||
_output_mutable_block.add_rows(&in_block, row_pos_vec.data(),
|
||||
row_pos_vec.data() + in_block.rows());
|
||||
|
||||
Reference in New Issue
Block a user