diff --git a/be/src/io/fs/hdfs_file_system.cpp b/be/src/io/fs/hdfs_file_system.cpp index 65330b8015..0cbfc32f2a 100644 --- a/be/src/io/fs/hdfs_file_system.cpp +++ b/be/src/io/fs/hdfs_file_system.cpp @@ -69,8 +69,12 @@ HdfsFileSystem::HdfsFileSystem(const THdfsParams& hdfs_params, const std::string } HdfsFileSystem::~HdfsFileSystem() { - if (_fs_handle && _fs_handle->from_cache) { - _fs_handle->dec_ref(); + if (_fs_handle != nullptr) { + if (_fs_handle->from_cache) { + _fs_handle->dec_ref(); + } else { + delete _fs_handle; + } } } diff --git a/be/src/runtime/types.h b/be/src/runtime/types.h index 06629a655e..aca6336f6b 100644 --- a/be/src/runtime/types.h +++ b/be/src/runtime/types.h @@ -74,6 +74,7 @@ struct TypeDescriptor { precision = 27; scale = 9; } else if (type == TYPE_DATETIMEV2) { + precision = 18; scale = 6; } } diff --git a/be/src/vec/exec/format/parquet/schema_desc.cpp b/be/src/vec/exec/format/parquet/schema_desc.cpp index 2af4d40ea2..b8b9b07184 100644 --- a/be/src/vec/exec/format/parquet/schema_desc.cpp +++ b/be/src/vec/exec/format/parquet/schema_desc.cpp @@ -167,24 +167,27 @@ TypeDescriptor FieldDescriptor::get_doris_type(const tparquet::SchemaElement& ph if (type.type == INVALID_TYPE) { switch (physical_schema.type) { case tparquet::Type::BOOLEAN: - type.type = TYPE_BOOLEAN; + type = TypeDescriptor(TYPE_BOOLEAN); break; case tparquet::Type::INT32: - type.type = TYPE_INT; + type = TypeDescriptor(TYPE_INT); break; case tparquet::Type::INT64: + type = TypeDescriptor(TYPE_BIGINT); + break; case tparquet::Type::INT96: - type.type = TYPE_BIGINT; + // in most cases, it's a nano timestamp + type = TypeDescriptor(TYPE_DATETIMEV2); break; case tparquet::Type::FLOAT: - type.type = TYPE_FLOAT; + type = TypeDescriptor(TYPE_FLOAT); break; case tparquet::Type::DOUBLE: - type.type = TYPE_DOUBLE; + type = TypeDescriptor(TYPE_DOUBLE); break; case tparquet::Type::BYTE_ARRAY: case tparquet::Type::FIXED_LEN_BYTE_ARRAY: - type.type = TYPE_STRING; + type = TypeDescriptor(TYPE_STRING); break; default: break; @@ -196,33 +199,31 @@ TypeDescriptor FieldDescriptor::get_doris_type(const tparquet::SchemaElement& ph TypeDescriptor FieldDescriptor::convert_to_doris_type(tparquet::LogicalType logicalType) { TypeDescriptor type; if (logicalType.__isset.STRING) { - type.type = TYPE_STRING; + type = TypeDescriptor(TYPE_STRING); } else if (logicalType.__isset.DECIMAL) { - type.type = TYPE_DECIMALV2; - type.precision = 27; - type.scale = 9; + type = TypeDescriptor(TYPE_DECIMALV2); } else if (logicalType.__isset.DATE) { - type.type = TYPE_DATEV2; + type = TypeDescriptor(TYPE_DATEV2); } else if (logicalType.__isset.INTEGER) { if (logicalType.INTEGER.isSigned) { if (logicalType.INTEGER.bitWidth <= 32) { - type.type = TYPE_INT; + type = TypeDescriptor(TYPE_INT); } else { - type.type = TYPE_BIGINT; + type = TypeDescriptor(TYPE_BIGINT); } } else { if (logicalType.INTEGER.bitWidth <= 16) { - type.type = TYPE_INT; + type = TypeDescriptor(TYPE_INT); } else { - type.type = TYPE_BIGINT; + type = TypeDescriptor(TYPE_BIGINT); } } } else if (logicalType.__isset.TIME) { - type.type = TYPE_TIMEV2; + type = TypeDescriptor(TYPE_TIMEV2); } else if (logicalType.__isset.TIMESTAMP) { - type.type = TYPE_DATETIMEV2; + type = TypeDescriptor(TYPE_DATETIMEV2); } else { - type.type = INVALID_TYPE; + type = TypeDescriptor(INVALID_TYPE); } return type; } @@ -231,39 +232,41 @@ TypeDescriptor FieldDescriptor::convert_to_doris_type(tparquet::ConvertedType::t TypeDescriptor type; switch (convertedType) { case tparquet::ConvertedType::type::UTF8: - type.type = TYPE_STRING; + type = TypeDescriptor(TYPE_STRING); break; case tparquet::ConvertedType::type::DECIMAL: - type.type = TYPE_DECIMALV2; - type.precision = 27; - type.scale = 9; + type = TypeDescriptor(TYPE_DECIMALV2); break; case tparquet::ConvertedType::type::DATE: - type.type = TYPE_DATEV2; + type = TypeDescriptor(TYPE_DATEV2); break; case tparquet::ConvertedType::type::TIME_MILLIS: case tparquet::ConvertedType::type::TIME_MICROS: - type.type = TYPE_TIMEV2; + type = TypeDescriptor(TYPE_TIMEV2); break; case tparquet::ConvertedType::type::TIMESTAMP_MILLIS: case tparquet::ConvertedType::type::TIMESTAMP_MICROS: - type.type = TYPE_DATETIMEV2; + type = TypeDescriptor(TYPE_DATETIMEV2); + break; + case tparquet::ConvertedType::type::INT_8: + type = TypeDescriptor(TYPE_TINYINT); break; case tparquet::ConvertedType::type::UINT_8: - case tparquet::ConvertedType::type::UINT_16: - case tparquet::ConvertedType::type::INT_8: case tparquet::ConvertedType::type::INT_16: + type = TypeDescriptor(TYPE_SMALLINT); + break; + case tparquet::ConvertedType::type::UINT_16: case tparquet::ConvertedType::type::INT_32: - type.type = TYPE_INT; + type = TypeDescriptor(TYPE_INT); break; case tparquet::ConvertedType::type::UINT_32: case tparquet::ConvertedType::type::UINT_64: case tparquet::ConvertedType::type::INT_64: - type.type = TYPE_BIGINT; + type = TypeDescriptor(TYPE_BIGINT); break; default: LOG(WARNING) << "Not supported parquet ConvertedType: " << convertedType; - type = INVALID_TYPE; + type = TypeDescriptor(INVALID_TYPE); break; } return type;