From 2331ce10f18074d2a135ad8079c6df863719d920 Mon Sep 17 00:00:00 2001 From: xinghuayu007 <1450306854@qq.com> Date: Sat, 28 Nov 2020 09:56:29 +0800 Subject: [PATCH] [Bug]Parquet map/list/struct structure recognize (#4968) When a parquet file contains a `Map/List/Struct` structure, Doris can not recognize the column correctly, and throws exception 'Invalid column: xxxx', that means Doris can not find the column. The `Map` structure will be recognized into two columns: `key and value`. The follow is the schema of a parquet file recognized by Doris. This patch tries to solve this problem. --- be/src/exec/parquet_reader.cpp | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/be/src/exec/parquet_reader.cpp b/be/src/exec/parquet_reader.cpp index 6ed2020650..0b38c16010 100644 --- a/be/src/exec/parquet_reader.cpp +++ b/be/src/exec/parquet_reader.cpp @@ -68,7 +68,11 @@ Status ParquetReaderWrap::init_parquet_reader(const std::vector auto *schemaDescriptor = _file_metadata->schema(); for (int i = 0; i < _file_metadata->num_columns(); ++i) { // Get the Column Reader for the boolean column - _map_column.emplace(schemaDescriptor->Column(i)->name(), i); + if (schemaDescriptor->Column(i)->max_definition_level() > 1) { + _map_column.emplace(schemaDescriptor->Column(i)->path()->ToDotVector()[0], i); + } else { + _map_column.emplace(schemaDescriptor->Column(i)->name(), i); + } } _timezone = timezone;