[fix](new-scan)Fix new scanner load job bugs (#12903)

Fix bugs:
1. Fe need to send file format (e.g. parquet, orc ...) to be while processing load jobs using new scanner.
2. Try to get parquet file column type from SchemaElement.type before getting from Logical type and Converted type.
This commit is contained in:
Jibing-Li
2022-09-24 17:21:19 +08:00
committed by GitHub
parent 3bb920ba54
commit f1a64ea09f
3 changed files with 33 additions and 4 deletions

View File

@ -152,11 +152,37 @@ void FieldDescriptor::parse_physical_field(const tparquet::SchemaElement& physic
physical_field->physical_type = physical_schema.type;
_physical_fields.push_back(physical_field);
physical_field->physical_column_index = _physical_fields.size() - 1;
if (physical_schema.__isset.logicalType) {
physical_field->type = convert_to_doris_type(physical_schema.logicalType);
} else if (physical_schema.__isset.converted_type) {
physical_field->type = convert_to_doris_type(physical_schema.converted_type);
physical_field->type = get_doris_type(physical_schema);
}
TypeDescriptor FieldDescriptor::get_doris_type(const tparquet::SchemaElement& physical_schema) {
TypeDescriptor type;
switch (physical_schema.type) {
case tparquet::Type::BOOLEAN:
type.type = TYPE_BOOLEAN;
return type;
case tparquet::Type::INT32:
type.type = TYPE_INT;
return type;
case tparquet::Type::INT64:
case tparquet::Type::INT96:
type.type = TYPE_BIGINT;
return type;
case tparquet::Type::FLOAT:
type.type = TYPE_FLOAT;
return type;
case tparquet::Type::DOUBLE:
type.type = TYPE_DOUBLE;
return type;
default:
break;
}
if (physical_schema.__isset.logicalType) {
type = convert_to_doris_type(physical_schema.logicalType);
} else if (physical_schema.__isset.converted_type) {
type = convert_to_doris_type(physical_schema.converted_type);
}
return type;
}
TypeDescriptor FieldDescriptor::convert_to_doris_type(tparquet::LogicalType logicalType) {

View File

@ -82,6 +82,8 @@ private:
TypeDescriptor convert_to_doris_type(tparquet::ConvertedType::type convertedType);
TypeDescriptor get_doris_type(const tparquet::SchemaElement& physical_schema);
public:
FieldDescriptor() = default;
~FieldDescriptor() = default;

View File

@ -93,6 +93,7 @@ public class LoadScanProvider implements FileScanProviderIf {
ctx.timezone = analyzer.getTimezone();
TFileScanRangeParams params = new TFileScanRangeParams();
params.format_type = formatType(fileGroupInfo.getFileGroup().getFileFormat(), "");
params.setStrictMode(fileGroupInfo.isStrictMode());
params.setProperties(fileGroupInfo.getBrokerDesc().getProperties());
if (fileGroupInfo.getBrokerDesc().getFileType() == TFileType.FILE_HDFS) {