[chore](scan) Disable low cardinality optimization for compaction (#18424)

This commit is contained in:
Jerry Hu
2023-04-07 14:19:11 +08:00
committed by GitHub
parent 5d876414b5
commit d36e9bd523
4 changed files with 12 additions and 9 deletions

View File

@ -861,6 +861,7 @@ Status FileColumnIterator::init(const ColumnIteratorOptions& opts) {
}
RETURN_IF_ERROR(get_block_compression_codec(_reader->get_compression(), &_compress_codec));
if (config::enable_low_cardinality_optimize &&
opts.io_ctx.reader_type == ReaderType::READER_QUERY &&
_reader->encoding_info()->encoding() == DICT_ENCODING) {
auto dict_encoding_type = _reader->get_dict_encoding_type();
if (dict_encoding_type == ColumnReader::UNKNOWN_DICT_ENCODING) {

View File

@ -1325,6 +1325,7 @@ bool SegmentIterator::_can_evaluated_by_vectorized(ColumnPredicate* predicate) {
if (field_type == OLAP_FIELD_TYPE_VARCHAR || field_type == OLAP_FIELD_TYPE_CHAR ||
field_type == OLAP_FIELD_TYPE_STRING) {
return config::enable_low_cardinality_optimize &&
_opts.io_ctx.reader_type == ReaderType::READER_QUERY &&
_column_iterators[_schema.unique_id(cid)]->is_all_dict_encoding();
} else if (field_type == OLAP_FIELD_TYPE_DECIMAL) {
return false;
@ -1625,7 +1626,8 @@ Status SegmentIterator::_next_batch_internal(vectorized::Block* block) {
auto cid = _schema.column_id(i);
auto column_desc = _schema.column(cid);
if (_is_pred_column[cid]) {
_current_return_columns[cid] = Schema::get_predicate_column_ptr(*column_desc);
_current_return_columns[cid] =
Schema::get_predicate_column_ptr(*column_desc, _opts.io_ctx.reader_type);
_current_return_columns[cid]->set_rowset_segment_id(
{_segment->rowset_id(), _segment->id()});
_current_return_columns[cid]->reserve(_opts.block_row_max);

View File

@ -118,7 +118,7 @@ vectorized::IColumn::MutablePtr Schema::get_column_by_field(const Field& field)
}
vectorized::IColumn::MutablePtr Schema::get_predicate_column_ptr(const Field& field,
bool is_nullable) {
const ReaderType reader_type) {
vectorized::IColumn::MutablePtr ptr = nullptr;
switch (field.type()) {
case OLAP_FIELD_TYPE_BOOL:
@ -160,7 +160,7 @@ vectorized::IColumn::MutablePtr Schema::get_predicate_column_ptr(const Field& fi
case OLAP_FIELD_TYPE_CHAR:
case OLAP_FIELD_TYPE_VARCHAR:
case OLAP_FIELD_TYPE_STRING:
if (config::enable_low_cardinality_optimize) {
if (config::enable_low_cardinality_optimize && reader_type == ReaderType::READER_QUERY) {
ptr = doris::vectorized::ColumnDictionary<doris::vectorized::Int32>::create(
field.type());
} else {
@ -181,29 +181,29 @@ vectorized::IColumn::MutablePtr Schema::get_predicate_column_ptr(const Field& fi
break;
case OLAP_FIELD_TYPE_ARRAY:
ptr = doris::vectorized::ColumnArray::create(
get_predicate_column_ptr(*field.get_sub_field(0)),
get_predicate_column_ptr(*field.get_sub_field(0), reader_type),
doris::vectorized::ColumnArray::ColumnOffsets::create());
break;
case OLAP_FIELD_TYPE_STRUCT: {
size_t field_size = field.get_sub_field_count();
doris::vectorized::MutableColumns columns(field_size);
for (size_t i = 0; i < field_size; i++) {
columns[i] = get_predicate_column_ptr(*field.get_sub_field(i));
columns[i] = get_predicate_column_ptr(*field.get_sub_field(i), reader_type);
}
ptr = doris::vectorized::ColumnStruct::create(std::move(columns));
break;
}
case OLAP_FIELD_TYPE_MAP:
ptr = doris::vectorized::ColumnMap::create(
get_predicate_column_ptr(*field.get_sub_field(0)),
get_predicate_column_ptr(*field.get_sub_field(1)),
get_predicate_column_ptr(*field.get_sub_field(0), reader_type),
get_predicate_column_ptr(*field.get_sub_field(1), reader_type),
doris::vectorized::ColumnArray::ColumnOffsets::create());
break;
default:
LOG(FATAL) << "Unexpected type when choosing predicate column, type=" << field.type();
}
if (field.is_nullable() || is_nullable) {
if (field.is_nullable()) {
return doris::vectorized::ColumnNullable::create(std::move(ptr),
doris::vectorized::ColumnUInt8::create());
}

View File

@ -132,7 +132,7 @@ public:
static vectorized::IColumn::MutablePtr get_column_by_field(const Field& field);
static vectorized::IColumn::MutablePtr get_predicate_column_ptr(const Field& field,
bool is_nullable = false);
const ReaderType reader_type);
const std::vector<Field*>& columns() const { return _cols; }