From f28c75bd808d6d2fc69348c4b7a3aca5de0ed0bb Mon Sep 17 00:00:00 2001 From: Ashin Gau Date: Thu, 6 Apr 2023 10:00:29 +0800 Subject: [PATCH] [fix](file_reader) bad_typeid when reading csv&json files (#18400) PR(#18340) resolve the conflict with PR(#18301) has changed the file_reader to create, resulting in e: [E-123] std::bad_typeid exception. --- be/src/vec/exec/format/csv/csv_reader.cpp | 2 +- be/src/vec/exec/format/json/new_json_reader.cpp | 2 +- be/src/vec/exec/format/orc/vorc_reader.h | 4 ++-- be/src/vec/exec/format/parquet/vparquet_reader.cpp | 2 ++ 4 files changed, 6 insertions(+), 4 deletions(-) diff --git a/be/src/vec/exec/format/csv/csv_reader.cpp b/be/src/vec/exec/format/csv/csv_reader.cpp index 7f114167ea..ee15846cde 100644 --- a/be/src/vec/exec/format/csv/csv_reader.cpp +++ b/be/src/vec/exec/format/csv/csv_reader.cpp @@ -145,7 +145,7 @@ Status CsvReader::init_reader(bool is_load) { io::FileCachePolicy cache_policy = FileFactory::get_cache_policy(_state); RETURN_IF_ERROR(FileFactory::create_file_reader(_profile, _system_properties, _file_description, &_file_system, - &_file_reader, cache_policy)); + &csv_file_reader, cache_policy)); } if (typeid_cast(csv_file_reader.get()) != nullptr || typeid_cast(csv_file_reader.get()) != nullptr) { diff --git a/be/src/vec/exec/format/json/new_json_reader.cpp b/be/src/vec/exec/format/json/new_json_reader.cpp index cea79dad75..0f6fc0030e 100644 --- a/be/src/vec/exec/format/json/new_json_reader.cpp +++ b/be/src/vec/exec/format/json/new_json_reader.cpp @@ -342,7 +342,7 @@ Status NewJsonReader::_open_file_reader() { io::FileCachePolicy cache_policy = FileFactory::get_cache_policy(_state); RETURN_IF_ERROR(FileFactory::create_file_reader(_profile, _system_properties, _file_description, &_file_system, - &_file_reader, cache_policy)); + &json_file_reader, cache_policy)); } if (typeid_cast(json_file_reader.get()) != nullptr || typeid_cast(json_file_reader.get()) != nullptr) { diff --git a/be/src/vec/exec/format/orc/vorc_reader.h b/be/src/vec/exec/format/orc/vorc_reader.h index 5430922833..6e520dce15 100644 --- a/be/src/vec/exec/format/orc/vorc_reader.h +++ b/be/src/vec/exec/format/orc/vorc_reader.h @@ -247,8 +247,8 @@ private: void _collect_profile_on_close(); private: - RuntimeProfile* _profile; - RuntimeState* _state; + RuntimeProfile* _profile = nullptr; + RuntimeState* _state = nullptr; const TFileScanRangeParams& _scan_params; const TFileRangeDesc& _scan_range; FileSystemProperties _system_properties; diff --git a/be/src/vec/exec/format/parquet/vparquet_reader.cpp b/be/src/vec/exec/format/parquet/vparquet_reader.cpp index 1c34ea8412..6a7fb58362 100644 --- a/be/src/vec/exec/format/parquet/vparquet_reader.cpp +++ b/be/src/vec/exec/format/parquet/vparquet_reader.cpp @@ -205,6 +205,7 @@ Status ParquetReader::_open_file() { _file_description.path); } _column_statistics.read_bytes += meta_size; + // read twice: parse magic number & parse meta data _column_statistics.read_calls += 2; } return Status::OK(); @@ -631,6 +632,7 @@ Status ParquetReader::_process_page_index(const tparquet::RowGroup& row_group, RETURN_IF_ERROR( _file_reader->read_at(page_index._offset_index_start, res, &bytes_read, _io_ctx)); _column_statistics.read_bytes += bytes_read; + // read twice: parse column index & parse offset index _column_statistics.read_calls += 2; for (auto& read_col : _read_columns) { auto conjunct_iter = _colname_to_value_range->find(read_col._file_slot_name);