[fix](parquet) return error if schema changed in complex types (#31128)

Check the column type of complex type to prevent core dump in BE. ColumnReader will throw segmentation fault in the following case:
Change complex types in hive:

hive> create table struct_test(
           id int,
           sf struct<f1: int, f2: map<string, string>>) stored as parquet;

hive> insert into struct_test values
          (1, named_struct('f1', 1, 'f2', str_to_map('1:s2,2:s2'))),
          (2, named_struct('f1', 2, 'f2', str_to_map('k1:s3,k2:s4'))),
          (3, named_struct('f1', 3, 'f2', str_to_map('k1:s5,k2:s6')));

hive> alter table struct_test change sf sf struct<f1:int, f2: string>;
This commit is contained in:
Ashin Gau
2024-02-19 23:37:28 +08:00
committed by yiguolei
parent 8f70c00a26
commit 7ca3be6d51

View File

@ -418,7 +418,7 @@ Status ScalarColumnReader::_read_nested_column(ColumnPtr& doris_column, DataType
}
RETURN_IF_ERROR(_chunk_reader->decode_values(data_column, type, select_vector, is_dict_filter));
if (ancestor_nulls != 0) {
static_cast<void>(_chunk_reader->skip_values(ancestor_nulls, false));
RETURN_IF_ERROR(_chunk_reader->skip_values(ancestor_nulls, false));
}
if (!align_rows) {
@ -608,6 +608,9 @@ Status ArrayColumnReader::read_column_data(ColumnPtr& doris_column, DataTypePtr&
}
data_column = doris_column->assume_mutable();
}
if (remove_nullable(type)->get_type_id() != TypeIndex::Array) {
return Status::Corruption("Wrong data type for column '{}'", _field_schema->name);
}
ColumnPtr& element_column = static_cast<ColumnArray&>(*data_column).get_data_ptr();
DataTypePtr& element_type = const_cast<DataTypePtr&>(
@ -654,6 +657,9 @@ Status MapColumnReader::read_column_data(ColumnPtr& doris_column, DataTypePtr& t
}
data_column = doris_column->assume_mutable();
}
if (remove_nullable(type)->get_type_id() != TypeIndex::Map) {
return Status::Corruption("Wrong data type for column '{}'", _field_schema->name);
}
auto& map = static_cast<ColumnMap&>(*data_column);
DataTypePtr& key_type = const_cast<DataTypePtr&>(
@ -717,6 +723,9 @@ Status StructColumnReader::read_column_data(ColumnPtr& doris_column, DataTypePtr
}
data_column = doris_column->assume_mutable();
}
if (remove_nullable(type)->get_type_id() != TypeIndex::Struct) {
return Status::Corruption("Wrong data type for column '{}'", _field_schema->name);
}
auto& doris_struct = static_cast<ColumnStruct&>(*data_column);
if (_child_readers.size() != doris_struct.tuple_size()) {
@ -731,7 +740,7 @@ Status StructColumnReader::read_column_data(ColumnPtr& doris_column, DataTypePtr
size_t field_rows = 0;
bool field_eof = false;
if (i == 0) {
static_cast<void>(_child_readers[i]->read_column_data(
RETURN_IF_ERROR(_child_readers[i]->read_column_data(
doris_field, doris_type, select_vector, batch_size, &field_rows, &field_eof,
is_dict_filter));
*read_rows = field_rows;
@ -740,7 +749,7 @@ Status StructColumnReader::read_column_data(ColumnPtr& doris_column, DataTypePtr
while (field_rows < *read_rows && !field_eof) {
size_t loop_rows = 0;
select_vector.reset();
static_cast<void>(_child_readers[i]->read_column_data(
RETURN_IF_ERROR(_child_readers[i]->read_column_data(
doris_field, doris_type, select_vector, *read_rows - field_rows, &loop_rows,
&field_eof, is_dict_filter));
field_rows += loop_rows;