From c85271d2ae2dc8385af30b6ebc60f49f842f6d1d Mon Sep 17 00:00:00 2001 From: Qi Chen Date: Tue, 20 Jun 2023 12:27:16 +0800 Subject: [PATCH] [Fix](orc-reader) Fix filter size mismatch in orc reader. (#20998) Fix filter size mismatch in orc reader introduced by #20806 --- be/src/vec/exec/format/orc/vorc_reader.cpp | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/be/src/vec/exec/format/orc/vorc_reader.cpp b/be/src/vec/exec/format/orc/vorc_reader.cpp index f1d6fbc7dd..007aff91ca 100644 --- a/be/src/vec/exec/format/orc/vorc_reader.cpp +++ b/be/src/vec/exec/format/orc/vorc_reader.cpp @@ -1054,6 +1054,7 @@ Status OrcReader::_decode_string_dict_encoded_column(const std::string& col_name if (cvb->notNull[i]) { if constexpr (is_filter) { if (!filter_data[i]) { + string_values.emplace_back(empty_string.data(), 0); continue; } } @@ -1076,6 +1077,7 @@ Status OrcReader::_decode_string_dict_encoded_column(const std::string& col_name for (int i = 0; i < num_values; ++i) { if constexpr (is_filter) { if (!filter_data[i]) { + string_values.emplace_back(empty_string.data(), 0); continue; } } @@ -1095,6 +1097,7 @@ Status OrcReader::_decode_string_dict_encoded_column(const std::string& col_name if (cvb->notNull[i]) { if constexpr (is_filter) { if (!filter_data[i]) { + string_values.emplace_back(empty_string.data(), 0); continue; } } @@ -1110,6 +1113,7 @@ Status OrcReader::_decode_string_dict_encoded_column(const std::string& col_name for (int i = 0; i < num_values; ++i) { if constexpr (is_filter) { if (!filter_data[i]) { + string_values.emplace_back(empty_string.data(), 0); continue; } } @@ -1123,8 +1127,7 @@ Status OrcReader::_decode_string_dict_encoded_column(const std::string& col_name } } } - data_column->insert_many_strings_overflow(&string_values[0], string_values.size(), - max_value_length); + data_column->insert_many_strings(&string_values[0], string_values.size()); return Status::OK(); } @@ -1719,6 +1722,13 @@ Status OrcReader::on_string_dicts_loaded( std::unordered_map dict_value_to_code; size_t max_value_length = 0; uint64_t dictionaryCount = dict->dictionaryOffset.size() - 1; + if (dictionaryCount == 0) { + it = _dict_filter_cols.erase(it); + for (auto& ctx : ctxs) { + _non_dict_filter_conjuncts.emplace_back(ctx); + } + continue; + } dict_values.reserve(dictionaryCount); for (int i = 0; i < dictionaryCount; ++i) { char* val_ptr; @@ -2037,7 +2047,7 @@ MutableColumnPtr OrcReader::_convert_dict_column_to_string_column( } } } - res->insert_many_strings_overflow(&string_values[0], num_values, max_value_length); + res->insert_many_strings(&string_values[0], num_values); return res; }