[fix](parquet-reader) fix coredump when load datatime data to doris from parquet (#15794)
`date_time_v2` will check scale when constructed datatimev2:
```
LOG(FATAL) << fmt::format("Scale {} is out of bounds", scale);
```
This [PR](https://github.com/apache/doris/pull/15510) has fixed this issue, but parquet does not use constructor to create `TypeDescriptor`, leading the `scale = -1` when reading datetimev2 data.
This commit is contained in:
@ -69,8 +69,12 @@ HdfsFileSystem::HdfsFileSystem(const THdfsParams& hdfs_params, const std::string
|
||||
}
|
||||
|
||||
HdfsFileSystem::~HdfsFileSystem() {
|
||||
if (_fs_handle && _fs_handle->from_cache) {
|
||||
_fs_handle->dec_ref();
|
||||
if (_fs_handle != nullptr) {
|
||||
if (_fs_handle->from_cache) {
|
||||
_fs_handle->dec_ref();
|
||||
} else {
|
||||
delete _fs_handle;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@ -74,6 +74,7 @@ struct TypeDescriptor {
|
||||
precision = 27;
|
||||
scale = 9;
|
||||
} else if (type == TYPE_DATETIMEV2) {
|
||||
precision = 18;
|
||||
scale = 6;
|
||||
}
|
||||
}
|
||||
|
||||
@ -167,24 +167,27 @@ TypeDescriptor FieldDescriptor::get_doris_type(const tparquet::SchemaElement& ph
|
||||
if (type.type == INVALID_TYPE) {
|
||||
switch (physical_schema.type) {
|
||||
case tparquet::Type::BOOLEAN:
|
||||
type.type = TYPE_BOOLEAN;
|
||||
type = TypeDescriptor(TYPE_BOOLEAN);
|
||||
break;
|
||||
case tparquet::Type::INT32:
|
||||
type.type = TYPE_INT;
|
||||
type = TypeDescriptor(TYPE_INT);
|
||||
break;
|
||||
case tparquet::Type::INT64:
|
||||
type = TypeDescriptor(TYPE_BIGINT);
|
||||
break;
|
||||
case tparquet::Type::INT96:
|
||||
type.type = TYPE_BIGINT;
|
||||
// in most cases, it's a nano timestamp
|
||||
type = TypeDescriptor(TYPE_DATETIMEV2);
|
||||
break;
|
||||
case tparquet::Type::FLOAT:
|
||||
type.type = TYPE_FLOAT;
|
||||
type = TypeDescriptor(TYPE_FLOAT);
|
||||
break;
|
||||
case tparquet::Type::DOUBLE:
|
||||
type.type = TYPE_DOUBLE;
|
||||
type = TypeDescriptor(TYPE_DOUBLE);
|
||||
break;
|
||||
case tparquet::Type::BYTE_ARRAY:
|
||||
case tparquet::Type::FIXED_LEN_BYTE_ARRAY:
|
||||
type.type = TYPE_STRING;
|
||||
type = TypeDescriptor(TYPE_STRING);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
@ -196,33 +199,31 @@ TypeDescriptor FieldDescriptor::get_doris_type(const tparquet::SchemaElement& ph
|
||||
TypeDescriptor FieldDescriptor::convert_to_doris_type(tparquet::LogicalType logicalType) {
|
||||
TypeDescriptor type;
|
||||
if (logicalType.__isset.STRING) {
|
||||
type.type = TYPE_STRING;
|
||||
type = TypeDescriptor(TYPE_STRING);
|
||||
} else if (logicalType.__isset.DECIMAL) {
|
||||
type.type = TYPE_DECIMALV2;
|
||||
type.precision = 27;
|
||||
type.scale = 9;
|
||||
type = TypeDescriptor(TYPE_DECIMALV2);
|
||||
} else if (logicalType.__isset.DATE) {
|
||||
type.type = TYPE_DATEV2;
|
||||
type = TypeDescriptor(TYPE_DATEV2);
|
||||
} else if (logicalType.__isset.INTEGER) {
|
||||
if (logicalType.INTEGER.isSigned) {
|
||||
if (logicalType.INTEGER.bitWidth <= 32) {
|
||||
type.type = TYPE_INT;
|
||||
type = TypeDescriptor(TYPE_INT);
|
||||
} else {
|
||||
type.type = TYPE_BIGINT;
|
||||
type = TypeDescriptor(TYPE_BIGINT);
|
||||
}
|
||||
} else {
|
||||
if (logicalType.INTEGER.bitWidth <= 16) {
|
||||
type.type = TYPE_INT;
|
||||
type = TypeDescriptor(TYPE_INT);
|
||||
} else {
|
||||
type.type = TYPE_BIGINT;
|
||||
type = TypeDescriptor(TYPE_BIGINT);
|
||||
}
|
||||
}
|
||||
} else if (logicalType.__isset.TIME) {
|
||||
type.type = TYPE_TIMEV2;
|
||||
type = TypeDescriptor(TYPE_TIMEV2);
|
||||
} else if (logicalType.__isset.TIMESTAMP) {
|
||||
type.type = TYPE_DATETIMEV2;
|
||||
type = TypeDescriptor(TYPE_DATETIMEV2);
|
||||
} else {
|
||||
type.type = INVALID_TYPE;
|
||||
type = TypeDescriptor(INVALID_TYPE);
|
||||
}
|
||||
return type;
|
||||
}
|
||||
@ -231,39 +232,41 @@ TypeDescriptor FieldDescriptor::convert_to_doris_type(tparquet::ConvertedType::t
|
||||
TypeDescriptor type;
|
||||
switch (convertedType) {
|
||||
case tparquet::ConvertedType::type::UTF8:
|
||||
type.type = TYPE_STRING;
|
||||
type = TypeDescriptor(TYPE_STRING);
|
||||
break;
|
||||
case tparquet::ConvertedType::type::DECIMAL:
|
||||
type.type = TYPE_DECIMALV2;
|
||||
type.precision = 27;
|
||||
type.scale = 9;
|
||||
type = TypeDescriptor(TYPE_DECIMALV2);
|
||||
break;
|
||||
case tparquet::ConvertedType::type::DATE:
|
||||
type.type = TYPE_DATEV2;
|
||||
type = TypeDescriptor(TYPE_DATEV2);
|
||||
break;
|
||||
case tparquet::ConvertedType::type::TIME_MILLIS:
|
||||
case tparquet::ConvertedType::type::TIME_MICROS:
|
||||
type.type = TYPE_TIMEV2;
|
||||
type = TypeDescriptor(TYPE_TIMEV2);
|
||||
break;
|
||||
case tparquet::ConvertedType::type::TIMESTAMP_MILLIS:
|
||||
case tparquet::ConvertedType::type::TIMESTAMP_MICROS:
|
||||
type.type = TYPE_DATETIMEV2;
|
||||
type = TypeDescriptor(TYPE_DATETIMEV2);
|
||||
break;
|
||||
case tparquet::ConvertedType::type::INT_8:
|
||||
type = TypeDescriptor(TYPE_TINYINT);
|
||||
break;
|
||||
case tparquet::ConvertedType::type::UINT_8:
|
||||
case tparquet::ConvertedType::type::UINT_16:
|
||||
case tparquet::ConvertedType::type::INT_8:
|
||||
case tparquet::ConvertedType::type::INT_16:
|
||||
type = TypeDescriptor(TYPE_SMALLINT);
|
||||
break;
|
||||
case tparquet::ConvertedType::type::UINT_16:
|
||||
case tparquet::ConvertedType::type::INT_32:
|
||||
type.type = TYPE_INT;
|
||||
type = TypeDescriptor(TYPE_INT);
|
||||
break;
|
||||
case tparquet::ConvertedType::type::UINT_32:
|
||||
case tparquet::ConvertedType::type::UINT_64:
|
||||
case tparquet::ConvertedType::type::INT_64:
|
||||
type.type = TYPE_BIGINT;
|
||||
type = TypeDescriptor(TYPE_BIGINT);
|
||||
break;
|
||||
default:
|
||||
LOG(WARNING) << "Not supported parquet ConvertedType: " << convertedType;
|
||||
type = INVALID_TYPE;
|
||||
type = TypeDescriptor(INVALID_TYPE);
|
||||
break;
|
||||
}
|
||||
return type;
|
||||
|
||||
Reference in New Issue
Block a user