[fix](parquet)Fixed the problem that when Parquert reader use index to read files, there will be multiple threads modify same object (#50161) (#50496)

bp #50161
2025-05-08 15:51:25 +08:00
parent 6f84c4d7e2
commit 5501e130bf
24 changed files with 225 additions and 59 deletions
--- a/be/src/vec/exec/format/avro/avro_jni_reader.cpp
+++ b/be/src/vec/exec/format/avro/avro_jni_reader.cpp
@ -55,7 +55,7 @@ Status AvroJNIReader::get_columns(std::unordered_map<std::string, TypeDescriptor
 }

 Status AvroJNIReader::init_fetch_table_reader(
-        std::unordered_map<std::string, ColumnValueRangeType>* colname_to_value_range) {
+        const std::unordered_map<std::string, ColumnValueRangeType>* colname_to_value_range) {
    _colname_to_value_range = colname_to_value_range;
    std::ostringstream required_fields;
    std::ostringstream columns_types;
--- a/be/src/vec/exec/format/avro/avro_jni_reader.h
+++ b/be/src/vec/exec/format/avro/avro_jni_reader.h
@ -71,7 +71,7 @@ public:
                       std::unordered_set<std::string>* missing_cols) override;

    Status init_fetch_table_reader(
-            std::unordered_map<std::string, ColumnValueRangeType>* colname_to_value_range);
+            const std::unordered_map<std::string, ColumnValueRangeType>* colname_to_value_range);

    TFileType::type get_file_type();

@ -85,7 +85,7 @@ public:
 private:
    const TFileScanRangeParams _params;
    const TFileRangeDesc _range;
-    std::unordered_map<std::string, ColumnValueRangeType>* _colname_to_value_range = nullptr;
+    const std::unordered_map<std::string, ColumnValueRangeType>* _colname_to_value_range = nullptr;
 };

 } // namespace doris::vectorized
--- a/be/src/vec/exec/format/jni_reader.cpp
+++ b/be/src/vec/exec/format/jni_reader.cpp
@ -79,7 +79,7 @@ Status MockJniReader::get_columns(std::unordered_map<std::string, TypeDescriptor
 }

 Status MockJniReader::init_reader(
-        std::unordered_map<std::string, ColumnValueRangeType>* colname_to_value_range) {
+        const std::unordered_map<std::string, ColumnValueRangeType>* colname_to_value_range) {
    _colname_to_value_range = colname_to_value_range;
    RETURN_IF_ERROR(_jni_connector->init(colname_to_value_range));
    return _jni_connector->open(_state, _profile);
--- a/be/src/vec/exec/format/jni_reader.h
+++ b/be/src/vec/exec/format/jni_reader.h
@ -83,7 +83,7 @@ public:
                       std::unordered_set<std::string>* missing_cols) override;

    Status init_reader(
-            std::unordered_map<std::string, ColumnValueRangeType>* colname_to_value_range);
+            const std::unordered_map<std::string, ColumnValueRangeType>* colname_to_value_range);

    Status close() override {
        if (_jni_connector) {
@ -100,7 +100,7 @@ protected:
    }

 private:
-    std::unordered_map<std::string, ColumnValueRangeType>* _colname_to_value_range;
+    const std::unordered_map<std::string, ColumnValueRangeType>* _colname_to_value_range;
 };

 } // namespace doris::vectorized
--- a/be/src/vec/exec/format/orc/vorc_reader.cpp
+++ b/be/src/vec/exec/format/orc/vorc_reader.cpp
@ -279,7 +279,7 @@ Status OrcReader::_create_file_reader() {

 Status OrcReader::init_reader(
        const std::vector<std::string>* column_names,
-        std::unordered_map<std::string, ColumnValueRangeType>* colname_to_value_range,
+        const std::unordered_map<std::string, ColumnValueRangeType>* colname_to_value_range,
        const VExprContextSPtrs& conjuncts, bool is_acid, const TupleDescriptor* tuple_descriptor,
        const RowDescriptor* row_descriptor,
        const VExprContextSPtrs* not_single_slot_filter_conjuncts,
@ -694,7 +694,7 @@ bool static build_search_argument(std::vector<OrcPredicate>& predicates, int ind
 }

 bool OrcReader::_init_search_argument(
-        std::unordered_map<std::string, ColumnValueRangeType>* colname_to_value_range) {
+        const std::unordered_map<std::string, ColumnValueRangeType>* colname_to_value_range) {
    if ((!_enable_filter_by_min_max) || colname_to_value_range->empty()) {
        return false;
    }
--- a/be/src/vec/exec/format/orc/vorc_reader.h
+++ b/be/src/vec/exec/format/orc/vorc_reader.h
@ -142,7 +142,7 @@ public:
    //If you want to read the file by index instead of column name, set hive_use_column_names to false.
    Status init_reader(
            const std::vector<std::string>* column_names,
-            std::unordered_map<std::string, ColumnValueRangeType>* colname_to_value_range,
+            const std::unordered_map<std::string, ColumnValueRangeType>* colname_to_value_range,
            const VExprContextSPtrs& conjuncts, bool is_acid,
            const TupleDescriptor* tuple_descriptor, const RowDescriptor* row_descriptor,
            const VExprContextSPtrs* not_single_slot_filter_conjuncts,
@ -291,7 +291,7 @@ private:
    static bool _check_acid_schema(const orc::Type& type);
    static const orc::Type& _remove_acid(const orc::Type& type);
    bool _init_search_argument(
-            std::unordered_map<std::string, ColumnValueRangeType>* colname_to_value_range);
+            const std::unordered_map<std::string, ColumnValueRangeType>* colname_to_value_range);
    void _init_bloom_filter(
            std::unordered_map<std::string, ColumnValueRangeType>* colname_to_value_range);
    void _init_system_properties();
@ -598,7 +598,7 @@ private:
    std::vector<DecimalScaleParams> _decimal_scale_params;
    size_t _decimal_scale_params_index;

-    std::unordered_map<std::string, ColumnValueRangeType>* _colname_to_value_range;
+    const std::unordered_map<std::string, ColumnValueRangeType>* _colname_to_value_range = nullptr;
    bool _is_acid = false;
    std::unique_ptr<IColumn::Filter> _filter;
    LazyReadContext _lazy_read_ctx;
--- a/be/src/vec/exec/format/parquet/vparquet_page_index.cpp
+++ b/be/src/vec/exec/format/parquet/vparquet_page_index.cpp
@ -57,7 +57,7 @@ Status PageIndex::create_skipped_row_range(tparquet::OffsetIndex& offset_index,
 }

 Status PageIndex::collect_skipped_page_range(tparquet::ColumnIndex* column_index,
-                                             ColumnValueRangeType& col_val_range,
+                                             const ColumnValueRangeType& col_val_range,
                                             const FieldSchema* col_schema,
                                             std::vector<int>& skipped_ranges,
                                             const cctz::time_zone& ctz) {
--- a/be/src/vec/exec/format/parquet/vparquet_page_index.h
+++ b/be/src/vec/exec/format/parquet/vparquet_page_index.h
@ -47,7 +47,7 @@ public:
    Status create_skipped_row_range(tparquet::OffsetIndex& offset_index, int total_rows_of_group,
                                    int page_idx, RowRange* row_range);
    Status collect_skipped_page_range(tparquet::ColumnIndex* column_index,
-                                      ColumnValueRangeType& col_val_range,
+                                      const ColumnValueRangeType& col_val_range,
                                      const FieldSchema* col_schema,
                                      std::vector<int>& skipped_ranges, const cctz::time_zone& ctz);
    bool check_and_get_page_index_ranges(const std::vector<tparquet::ColumnChunk>& columns);
--- a/be/src/vec/exec/format/parquet/vparquet_reader.cpp
+++ b/be/src/vec/exec/format/parquet/vparquet_reader.cpp
@ -295,7 +295,7 @@ void ParquetReader::iceberg_sanitize(const std::vector<std::string>& read_column
 Status ParquetReader::init_reader(
        const std::vector<std::string>& all_column_names,
        const std::vector<std::string>& missing_column_names,
-        std::unordered_map<std::string, ColumnValueRangeType>* colname_to_value_range,
+        const std::unordered_map<std::string, ColumnValueRangeType>* colname_to_value_range,
        const VExprContextSPtrs& conjuncts, const TupleDescriptor* tuple_descriptor,
        const RowDescriptor* row_descriptor,
        const std::unordered_map<std::string, int>* colname_to_slot_id,
@ -346,7 +346,6 @@ Status ParquetReader::init_reader(
            _missing_cols.emplace_back(name);
        }
    } else {
-        std::unordered_map<std::string, ColumnValueRangeType> new_colname_to_value_range;
        const auto& table_column_idxs = _scan_params.column_idxs;
        std::map<int, int> table_col_id_to_idx;
        for (int i = 0; i < table_column_idxs.size(); i++) {
@ -360,21 +359,15 @@ Status ParquetReader::init_reader(
                auto& table_col = all_column_names[idx];
                auto file_col = schema_desc.get_column(id)->name;
                _read_columns.emplace_back(file_col);
+                _table_col_to_file_col[table_col] = file_col;

-                if (table_col != file_col) {
-                    _table_col_to_file_col[table_col] = file_col;
-                    auto iter = _colname_to_value_range->find(table_col);
-                    if (iter != _colname_to_value_range->end()) {
-                        continue;
-                    }
-                    new_colname_to_value_range[file_col] = iter->second;
-                    _colname_to_value_range->erase(iter->first);
+                auto iter = _colname_to_value_range->find(table_col);
+                if (iter != _colname_to_value_range->end()) {
+                    _colname_to_value_range_index_read.emplace(file_col, iter->second);
                }
            }
        }
-        for (auto it : new_colname_to_value_range) {
-            _colname_to_value_range->emplace(it.first, std::move(it.second));
-        }
+        _colname_to_value_range = &_colname_to_value_range_index_read;
    }
    // build column predicates for column lazy read
    _lazy_read_ctx.conjuncts = conjuncts;
--- a/be/src/vec/exec/format/parquet/vparquet_reader.h
+++ b/be/src/vec/exec/format/parquet/vparquet_reader.h
@ -111,7 +111,7 @@ public:
    Status init_reader(
            const std::vector<std::string>& all_column_names,
            const std::vector<std::string>& missing_column_names,
-            std::unordered_map<std::string, ColumnValueRangeType>* colname_to_value_range,
+            const std::unordered_map<std::string, ColumnValueRangeType>* colname_to_value_range,
            const VExprContextSPtrs& conjuncts, const TupleDescriptor* tuple_descriptor,
            const RowDescriptor* row_descriptor,
            const std::unordered_map<std::string, int>* colname_to_slot_id,
@ -251,7 +251,12 @@ private:
    int32_t _total_groups; // num of groups(stripes) of a parquet(orc) file
    // table column name to file column name map. For iceberg schema evolution.
    std::unordered_map<std::string, std::string> _table_col_to_file_col;
-    std::unordered_map<std::string, ColumnValueRangeType>* _colname_to_value_range = nullptr;
+    const std::unordered_map<std::string, ColumnValueRangeType>* _colname_to_value_range = nullptr;
+
+    // During initialization, multiple vfile_scanner's _colname_to_value_range will point to the same object,
+    // so the content in the object cannot be modified (there is a multi-threading problem).
+    // _colname_to_value_range_index_read used when _hive_use_column_names = false.
+    std::unordered_map<std::string, ColumnValueRangeType> _colname_to_value_range_index_read;
    std::vector<std::string> _read_columns;
    RowRange _whole_range = RowRange(0, 0);
    const std::vector<int64_t>* _delete_rows = nullptr;
--- a/be/src/vec/exec/format/table/hudi_jni_reader.cpp
+++ b/be/src/vec/exec/format/table/hudi_jni_reader.cpp
@ -95,7 +95,7 @@ Status HudiJniReader::get_columns(std::unordered_map<std::string, TypeDescriptor
 }

 Status HudiJniReader::init_reader(
-        std::unordered_map<std::string, ColumnValueRangeType>* colname_to_value_range) {
+        const std::unordered_map<std::string, ColumnValueRangeType>* colname_to_value_range) {
    _colname_to_value_range = colname_to_value_range;
    RETURN_IF_ERROR(_jni_connector->init(colname_to_value_range));
    return _jni_connector->open(_state, _profile);
--- a/be/src/vec/exec/format/table/hudi_jni_reader.h
+++ b/be/src/vec/exec/format/table/hudi_jni_reader.h
@ -58,12 +58,12 @@ public:
                       std::unordered_set<std::string>* missing_cols) override;

    Status init_reader(
-            std::unordered_map<std::string, ColumnValueRangeType>* colname_to_value_range);
+            const std::unordered_map<std::string, ColumnValueRangeType>* colname_to_value_range);

 private:
    const TFileScanRangeParams& _scan_params;
    const THudiFileDesc& _hudi_params;
-    std::unordered_map<std::string, ColumnValueRangeType>* _colname_to_value_range;
+    const std::unordered_map<std::string, ColumnValueRangeType>* _colname_to_value_range;
 };

 } // namespace doris::vectorized
--- a/be/src/vec/exec/format/table/iceberg_reader.cpp
+++ b/be/src/vec/exec/format/table/iceberg_reader.cpp
@ -530,7 +530,7 @@ void IcebergTableReader::_gen_position_delete_file_range(Block& block, DeleteFil
 Status IcebergParquetReader::init_reader(
        const std::vector<std::string>& file_col_names,
        const std::unordered_map<int, std::string>& col_id_name_map,
-        std::unordered_map<std::string, ColumnValueRangeType>* colname_to_value_range,
+        const std::unordered_map<std::string, ColumnValueRangeType>* colname_to_value_range,
        const VExprContextSPtrs& conjuncts, const TupleDescriptor* tuple_descriptor,
        const RowDescriptor* row_descriptor,
        const std::unordered_map<std::string, int>* colname_to_slot_id,
@ -603,7 +603,7 @@ Status IcebergParquetReader ::_read_position_delete_file(const TFileRangeDesc* d
 Status IcebergOrcReader::init_reader(
        const std::vector<std::string>& file_col_names,
        const std::unordered_map<int, std::string>& col_id_name_map,
-        std::unordered_map<std::string, ColumnValueRangeType>* colname_to_value_range,
+        const std::unordered_map<std::string, ColumnValueRangeType>* colname_to_value_range,
        const VExprContextSPtrs& conjuncts, const TupleDescriptor* tuple_descriptor,
        const RowDescriptor* row_descriptor,
        const std::unordered_map<std::string, int>* colname_to_slot_id,
--- a/be/src/vec/exec/format/table/iceberg_reader.h
+++ b/be/src/vec/exec/format/table/iceberg_reader.h
@ -150,7 +150,7 @@ protected:
    std::unordered_map<std::string, std::string> _file_col_to_table_col;
    // table column name to file column name map. For iceberg schema evolution.
    std::unordered_map<std::string, std::string> _table_col_to_file_col;
-    std::unordered_map<std::string, ColumnValueRangeType>* _colname_to_value_range;
+    const std::unordered_map<std::string, ColumnValueRangeType>* _colname_to_value_range;
    // copy from _colname_to_value_range with new column name that is in parquet/orc file, to support schema evolution.
    std::unordered_map<std::string, ColumnValueRangeType> _new_colname_to_value_range;
    // column id to name map. Collect from FE slot descriptor.
@ -205,7 +205,7 @@ public:
    Status init_reader(
            const std::vector<std::string>& file_col_names,
            const std::unordered_map<int, std::string>& col_id_name_map,
-            std::unordered_map<std::string, ColumnValueRangeType>* colname_to_value_range,
+            const std::unordered_map<std::string, ColumnValueRangeType>* colname_to_value_range,
            const VExprContextSPtrs& conjuncts, const TupleDescriptor* tuple_descriptor,
            const RowDescriptor* row_descriptor,
            const std::unordered_map<std::string, int>* colname_to_slot_id,
@ -251,7 +251,7 @@ public:
    Status init_reader(
            const std::vector<std::string>& file_col_names,
            const std::unordered_map<int, std::string>& col_id_name_map,
-            std::unordered_map<std::string, ColumnValueRangeType>* colname_to_value_range,
+            const std::unordered_map<std::string, ColumnValueRangeType>* colname_to_value_range,
            const VExprContextSPtrs& conjuncts, const TupleDescriptor* tuple_descriptor,
            const RowDescriptor* row_descriptor,
            const std::unordered_map<std::string, int>* colname_to_slot_id,
--- a/be/src/vec/exec/format/table/max_compute_jni_reader.cpp
+++ b/be/src/vec/exec/format/table/max_compute_jni_reader.cpp
@ -104,7 +104,7 @@ Status MaxComputeJniReader::get_columns(
 }

 Status MaxComputeJniReader::init_reader(
-        std::unordered_map<std::string, ColumnValueRangeType>* colname_to_value_range) {
+        const std::unordered_map<std::string, ColumnValueRangeType>* colname_to_value_range) {
    _colname_to_value_range = colname_to_value_range;
    RETURN_IF_ERROR(_jni_connector->init(colname_to_value_range));
    return _jni_connector->open(_state, _profile);
--- a/be/src/vec/exec/format/table/max_compute_jni_reader.h
+++ b/be/src/vec/exec/format/table/max_compute_jni_reader.h
@ -65,13 +65,13 @@ public:
                       std::unordered_set<std::string>* missing_cols) override;

    Status init_reader(
-            std::unordered_map<std::string, ColumnValueRangeType>* colname_to_value_range);
+            const std::unordered_map<std::string, ColumnValueRangeType>* colname_to_value_range);

 private:
    const MaxComputeTableDescriptor* _table_desc = nullptr;
    const TMaxComputeFileDesc& _max_compute_params;
    const TFileRangeDesc& _range;
-    std::unordered_map<std::string, ColumnValueRangeType>* _colname_to_value_range = nullptr;
+    const std::unordered_map<std::string, ColumnValueRangeType>* _colname_to_value_range = nullptr;
 };

 } // namespace doris::vectorized
--- a/be/src/vec/exec/format/table/paimon_jni_reader.cpp
+++ b/be/src/vec/exec/format/table/paimon_jni_reader.cpp
@ -96,7 +96,7 @@ Status PaimonJniReader::get_columns(std::unordered_map<std::string, TypeDescript
 }

 Status PaimonJniReader::init_reader(
-        std::unordered_map<std::string, ColumnValueRangeType>* colname_to_value_range) {
+        const std::unordered_map<std::string, ColumnValueRangeType>* colname_to_value_range) {
    _colname_to_value_range = colname_to_value_range;
    RETURN_IF_ERROR(_jni_connector->init(colname_to_value_range));
    return _jni_connector->open(_state, _profile);
--- a/be/src/vec/exec/format/table/paimon_jni_reader.h
+++ b/be/src/vec/exec/format/table/paimon_jni_reader.h
@ -64,10 +64,10 @@ public:
                       std::unordered_set<std::string>* missing_cols) override;

    Status init_reader(
-            std::unordered_map<std::string, ColumnValueRangeType>* colname_to_value_range);
+            const std::unordered_map<std::string, ColumnValueRangeType>* colname_to_value_range);

 private:
-    std::unordered_map<std::string, ColumnValueRangeType>* _colname_to_value_range;
+    const std::unordered_map<std::string, ColumnValueRangeType>* _colname_to_value_range;
 };

 } // namespace doris::vectorized
--- a/be/src/vec/exec/format/table/transactional_hive_reader.cpp
+++ b/be/src/vec/exec/format/table/transactional_hive_reader.cpp
@ -56,7 +56,7 @@ TransactionalHiveReader::TransactionalHiveReader(std::unique_ptr<GenericReader>

 Status TransactionalHiveReader::init_reader(
        const std::vector<std::string>& column_names,
-        std::unordered_map<std::string, ColumnValueRangeType>* colname_to_value_range,
+        const std::unordered_map<std::string, ColumnValueRangeType>* colname_to_value_range,
        const VExprContextSPtrs& conjuncts, const TupleDescriptor* tuple_descriptor,
        const RowDescriptor* row_descriptor,
        const VExprContextSPtrs* not_single_slot_filter_conjuncts,
--- a/be/src/vec/exec/format/table/transactional_hive_reader.h
+++ b/be/src/vec/exec/format/table/transactional_hive_reader.h
@ -98,7 +98,7 @@ public:

    Status init_reader(
            const std::vector<std::string>& column_names,
-            std::unordered_map<std::string, ColumnValueRangeType>* colname_to_value_range,
+            const std::unordered_map<std::string, ColumnValueRangeType>* colname_to_value_range,
            const VExprContextSPtrs& conjuncts, const TupleDescriptor* tuple_descriptor,
            const RowDescriptor* row_descriptor,
            const VExprContextSPtrs* not_single_slot_filter_conjuncts,
--- a/be/src/vec/exec/jni_connector.cpp
+++ b/be/src/vec/exec/jni_connector.cpp
@ -91,7 +91,7 @@ Status JniConnector::open(RuntimeState* state, RuntimeProfile* profile) {
 }

 Status JniConnector::init(
-        std::unordered_map<std::string, ColumnValueRangeType>* colname_to_value_range) {
+        const std::unordered_map<std::string, ColumnValueRangeType>* colname_to_value_range) {
    // TODO: This logic need to be changed.
    // See the comment of "predicates" field in JniScanner.java

@ -408,7 +408,7 @@ Status JniConnector::_fill_struct_column(TableMetaAddress& address, MutableColum
 }

 void JniConnector::_generate_predicates(
-        std::unordered_map<std::string, ColumnValueRangeType>* colname_to_value_range) {
+        const std::unordered_map<std::string, ColumnValueRangeType>* colname_to_value_range) {
    if (colname_to_value_range == nullptr) {
        return;
    }
--- a/be/src/vec/exec/jni_connector.h
+++ b/be/src/vec/exec/jni_connector.h
@ -222,7 +222,8 @@ public:
     * number_filters(4) | length(4) | column_name | op(4) | scale(4) | num_values(4) | value_length(4) | value | ...
     * Then, pass the byte array address in configuration map, like "push_down_predicates=${address}"
     */
-    Status init(std::unordered_map<std::string, ColumnValueRangeType>* colname_to_value_range);
+    Status init(
+            const std::unordered_map<std::string, ColumnValueRangeType>* colname_to_value_range);

    /**
     * Call java side function JniScanner.getNextBatchMeta. The columns information are stored as long array:
@ -353,7 +354,7 @@ private:
    }

    void _generate_predicates(
-            std::unordered_map<std::string, ColumnValueRangeType>* colname_to_value_range);
+            const std::unordered_map<std::string, ColumnValueRangeType>* colname_to_value_range);

    template <PrimitiveType primitive_type>
    void _parse_value_range(const ColumnValueRange<primitive_type>& col_val_range,
--- a/be/src/vec/exec/scan/vfile_scanner.h
+++ b/be/src/vec/exec/scan/vfile_scanner.h
@ -107,7 +107,7 @@ protected:

    std::unique_ptr<GenericReader> _cur_reader;
    bool _cur_reader_eof;
-    std::unordered_map<std::string, ColumnValueRangeType>* _colname_to_value_range = nullptr;
+    const std::unordered_map<std::string, ColumnValueRangeType>* _colname_to_value_range = nullptr;
    // File source slot descriptors
    std::vector<SlotDescriptor*> _file_slot_descs;
    // col names from _file_slot_descs
--- a/be/test/vec/exec/format/parquet/parquet_reader_test.cpp
+++ b/be/test/vec/exec/format/parquet/parquet_reader_test.cpp
@ -56,10 +56,9 @@ public:
    ParquetReaderTest() {}
 };

-TEST_F(ParquetReaderTest, normal) {
-    TDescriptorTable t_desc_table;
-    TTableDescriptor t_table_desc;
-
+static void create_table_desc(TDescriptorTable& t_desc_table, TTableDescriptor& t_table_desc,
+                              std::vector<std::string> table_column_names,
+                              std::vector<TPrimitiveType::type> types) {
    t_table_desc.id = 0;
    t_table_desc.tableType = TTableType::OLAP_TABLE;
    t_table_desc.numCols = 0;
@ -68,10 +67,7 @@ TEST_F(ParquetReaderTest, normal) {
    t_desc_table.__isset.tableDescriptors = true;

    // init boolean and numeric slot
-    std::vector<std::string> numeric_types = {"boolean_col", "tinyint_col", "smallint_col",
-                                              "int_col",     "bigint_col",  "float_col",
-                                              "double_col"};
-    for (int i = 0; i < numeric_types.size(); i++) {
+    for (int i = 0; i < table_column_names.size(); i++) {
        TSlotDescriptor tslot_desc;
        {
            tslot_desc.id = i;
@ -81,7 +77,7 @@ TEST_F(ParquetReaderTest, normal) {
                TTypeNode node;
                node.__set_type(TTypeNodeType::SCALAR);
                TScalarType scalar_type;
-                scalar_type.__set_type(TPrimitiveType::type(i + 2));
+                scalar_type.__set_type(types[i]);
                node.__set_scalar_type(scalar_type);
                type.types.push_back(node);
            }
@ -90,7 +86,7 @@ TEST_F(ParquetReaderTest, normal) {
            tslot_desc.byteOffset = 0;
            tslot_desc.nullIndicatorByte = 0;
            tslot_desc.nullIndicatorBit = -1;
-            tslot_desc.colName = numeric_types[i];
+            tslot_desc.colName = table_column_names[i];
            tslot_desc.slotIdx = 0;
            tslot_desc.isMaterialized = true;
            t_desc_table.slotDescriptors.push_back(tslot_desc);
@ -108,6 +104,19 @@ TEST_F(ParquetReaderTest, normal) {
        t_tuple_desc.__isset.tableId = true;
        t_desc_table.tupleDescriptors.push_back(t_tuple_desc);
    }
+};
+
+TEST_F(ParquetReaderTest, normal) {
+    TDescriptorTable t_desc_table;
+    TTableDescriptor t_table_desc;
+    std::vector<std::string> table_column_names = {"boolean_col", "tinyint_col", "smallint_col",
+                                                   "int_col",     "bigint_col",  "float_col",
+                                                   "double_col"};
+    std::vector<TPrimitiveType::type> table_column_types = {
+            TPrimitiveType::BOOLEAN, TPrimitiveType::TINYINT, TPrimitiveType::SMALLINT,
+            TPrimitiveType::INT,     TPrimitiveType::BIGINT,  TPrimitiveType::FLOAT,
+            TPrimitiveType::DOUBLE};
+    create_table_desc(t_desc_table, t_table_desc, table_column_names, table_column_types);
    DescriptorTbl* desc_tbl;
    ObjectPool obj_pool;
    static_cast<void>(DescriptorTbl::create(&obj_pool, t_desc_table, &desc_tbl));
@ -164,5 +173,163 @@ TEST_F(ParquetReaderTest, normal) {
    delete p_reader;
 }

+static ParquetReader* create_parquet_reader(TFileScanRangeParams& scan_params,
+                                            std::vector<std::string> table_column_names,
+                                            std::vector<TPrimitiveType::type> types) {
+    TDescriptorTable t_desc_table;
+    TTableDescriptor t_table_desc;
+
+    create_table_desc(t_desc_table, t_table_desc, table_column_names, types);
+    DescriptorTbl* desc_tbl;
+    ObjectPool obj_pool;
+    static_cast<void>(DescriptorTbl::create(&obj_pool, t_desc_table, &desc_tbl));
+
+    auto slot_descs = desc_tbl->get_tuple_descriptor(0)->slots();
+    auto local_fs = io::global_local_filesystem();
+    io::FileReaderSPtr reader;
+    static_cast<void>(local_fs->open_file(
+            "./be/test/exec/test_data/parquet_scanner/type-decoder.parquet", &reader));
+
+    cctz::time_zone ctz;
+    TimezoneUtils::find_cctz_time_zone(TimezoneUtils::default_time_zone, ctz);
+    std::vector<std::string> column_names;
+    std::vector<std::string> missing_column_names;
+    for (int i = 0; i < slot_descs.size(); i++) {
+        column_names.push_back(slot_descs[i]->col_name());
+    }
+    TFileRangeDesc scan_range;
+    {
+        scan_range.start_offset = 0;
+        scan_range.size = 1000;
+    }
+    auto p_reader =
+            new ParquetReader(nullptr, scan_params, scan_range, 992, &ctz, nullptr, nullptr);
+    p_reader->set_file_reader(reader);
+    return p_reader;
+}
+
+TEST_F(ParquetReaderTest, use_column_name) {
+    bool use_column_name = true;
+
+    std::vector<std::string> table_column_names = {"boolean_col", "tinyint_col", "smallint_col",
+                                                   "int_col",     "bigint_col",  "float_col",
+                                                   "double_col"};
+    std::vector<TPrimitiveType::type> table_column_types = {
+            TPrimitiveType::BOOLEAN, TPrimitiveType::TINYINT, TPrimitiveType::SMALLINT,
+            TPrimitiveType::INT,     TPrimitiveType::BIGINT,  TPrimitiveType::FLOAT,
+            TPrimitiveType::DOUBLE};
+    TFileScanRangeParams scan_params;
+
+    auto p_reader = create_parquet_reader(scan_params, table_column_names, table_column_types);
+    std::unordered_map<std::string, ColumnValueRangeType> colname_to_value_range;
+    colname_to_value_range.emplace("boolean_col", ColumnValueRange<TYPE_BOOLEAN>("boolean_col"));
+    colname_to_value_range.emplace("tinyint_col", ColumnValueRange<TYPE_TINYINT>("tinyint_col"));
+    colname_to_value_range.emplace("smallint_col", ColumnValueRange<TYPE_SMALLINT>("smallint_col"));
+    colname_to_value_range.emplace("int_col", ColumnValueRange<TYPE_INT>("int_col"));
+
+    static_cast<void>(p_reader->open());
+    static_cast<void>(p_reader->init_reader(table_column_names, {}, &colname_to_value_range, {},
+                                            nullptr, nullptr, nullptr, nullptr, nullptr, false,
+                                            use_column_name));
+
+    std::vector<std::string> read_columns_ans = {"tinyint_col", "smallint_col", "int_col",
+                                                 "bigint_col",  "boolean_col",  "float_col",
+                                                 "double_col"};
+    EXPECT_EQ(p_reader->_read_columns, read_columns_ans);
+
+    std::vector<std::string> miss_columns_ans = {};
+    EXPECT_EQ(p_reader->_missing_cols, miss_columns_ans);
+    std::vector<std::string> colname_to_value_range_names_ans = {"tinyint_col", "smallint_col",
+                                                                 "int_col", "boolean_col"};
+    for (auto col : colname_to_value_range_names_ans) {
+        EXPECT_TRUE(p_reader->_colname_to_value_range->contains(col));
+    }
+    EXPECT_EQ(p_reader->_colname_to_value_range->size(), colname_to_value_range_names_ans.size());
+    delete p_reader;
+}
+
+TEST_F(ParquetReaderTest, use_column_name2) {
+    bool use_column_name = true;
+
+    std::vector<std::string> table_column_names = {"boolean_col", "tinyint_col", "smallint_col",
+                                                   "int_col",     "bigint_col",  "float_col",
+                                                   "test1",       "double_col",  "test2"};
+    std::vector<TPrimitiveType::type> table_column_types = {
+            TPrimitiveType::BOOLEAN, TPrimitiveType::TINYINT, TPrimitiveType::SMALLINT,
+            TPrimitiveType::INT,     TPrimitiveType::BIGINT,  TPrimitiveType::FLOAT,
+            TPrimitiveType::FLOAT,   TPrimitiveType::DOUBLE,  TPrimitiveType::DOUBLE};
+    TFileScanRangeParams scan_params;
+
+    auto p_reader = create_parquet_reader(scan_params, table_column_names, table_column_types);
+    std::unordered_map<std::string, ColumnValueRangeType> colname_to_value_range;
+    colname_to_value_range.emplace("boolean_col", ColumnValueRange<TYPE_BOOLEAN>("boolean_col"));
+    colname_to_value_range.emplace("tinyint_col", ColumnValueRange<TYPE_TINYINT>("tinyint_col"));
+    colname_to_value_range.emplace("smallint_col", ColumnValueRange<TYPE_SMALLINT>("smallint_col"));
+    colname_to_value_range.emplace("int_col", ColumnValueRange<TYPE_INT>("int_col"));
+
+    static_cast<void>(p_reader->open());
+    static_cast<void>(p_reader->init_reader(table_column_names, {"boolean_col"},
+                                            &colname_to_value_range, {}, nullptr, nullptr, nullptr,
+                                            nullptr, nullptr, false, use_column_name));
+
+    std::vector<std::string> read_columns_ans = {"tinyint_col", "smallint_col", "int_col",
+                                                 "bigint_col",  "float_col",    "double_col"};
+    EXPECT_EQ(p_reader->_read_columns, read_columns_ans);
+
+    std::vector<std::string> miss_columns_ans = {"boolean_col", "test1", "test2"};
+    EXPECT_EQ(p_reader->_missing_cols, miss_columns_ans);
+    std::vector<std::string> colname_to_value_range_names_ans = {"tinyint_col", "smallint_col",
+                                                                 "int_col", "boolean_col"};
+    for (auto col : colname_to_value_range_names_ans) {
+        EXPECT_TRUE(p_reader->_colname_to_value_range->contains(col));
+    }
+    EXPECT_EQ(p_reader->_colname_to_value_range->size(), colname_to_value_range_names_ans.size());
+    delete p_reader;
+}
+
+TEST_F(ParquetReaderTest, use_column_idx) {
+    bool use_column_name = false;
+
+    std::vector<std::string> table_column_names = {"col0", "col1",   "col3",
+                                                   "col7", "col100", "col102"};
+    std::vector<TPrimitiveType::type> table_column_types = {
+            TPrimitiveType::BOOLEAN, TPrimitiveType::TINYINT, TPrimitiveType::SMALLINT,
+            TPrimitiveType::INT,     TPrimitiveType::BIGINT,  TPrimitiveType::BIGINT};
+    TFileScanRangeParams scan_params;
+    scan_params.column_idxs.emplace_back(0);
+    scan_params.column_idxs.emplace_back(1);
+    scan_params.column_idxs.emplace_back(3);
+    scan_params.column_idxs.emplace_back(7);
+    scan_params.column_idxs.emplace_back(100);
+    scan_params.column_idxs.emplace_back(102);
+
+    auto p_reader = create_parquet_reader(scan_params, table_column_names, table_column_types);
+    std::unordered_map<std::string, ColumnValueRangeType> colname_to_value_range;
+    colname_to_value_range.emplace("col0", ColumnValueRange<TYPE_BOOLEAN>("col0"));
+    colname_to_value_range.emplace("col1", ColumnValueRange<TYPE_TINYINT>("col1"));
+    colname_to_value_range.emplace("col3", ColumnValueRange<TYPE_SMALLINT>("col3"));
+    colname_to_value_range.emplace("col102", ColumnValueRange<TYPE_SMALLINT>("col102"));
+
+    static_cast<void>(p_reader->open());
+    static_cast<void>(p_reader->init_reader(table_column_names, {}, &colname_to_value_range, {},
+                                            nullptr, nullptr, nullptr, nullptr, nullptr, false,
+                                            use_column_name));
+
+    std::vector<std::string> read_columns_ans = {"tinyint_col", "smallint_col", "bigint_col",
+                                                 "string_col"};
+    EXPECT_EQ(p_reader->_read_columns, read_columns_ans);
+
+    std::vector<std::string> miss_columns_ans = {"col100", "col102"};
+    EXPECT_EQ(p_reader->_missing_cols, miss_columns_ans);
+
+    std::vector<std::string> colname_to_value_range_names_ans = {"tinyint_col", "smallint_col",
+                                                                 "bigint_col"};
+    for (auto col : colname_to_value_range_names_ans) {
+        EXPECT_TRUE(p_reader->_colname_to_value_range->contains(col));
+    }
+    EXPECT_EQ(p_reader->_colname_to_value_range->size(), colname_to_value_range_names_ans.size());
+    delete p_reader;
+}
+
 } // namespace vectorized
 } // namespace doris