[fix](parquet-reader) reset value idx in bool rle decoder and support iceberg datetime(3) (#18245)
1. Fix value idx in bool rle decoder 2. Iceberg table support datetimev2(3). In the previous version, we converted hive timestamp to datetimev2(0) default.
This commit is contained in:
@ -24,7 +24,7 @@ void BoolRLEDecoder::set_data(Slice* slice) {
|
||||
_data = slice;
|
||||
_num_bytes = slice->size;
|
||||
_offset = 0;
|
||||
|
||||
_current_value_idx = 0;
|
||||
if (_num_bytes < 4) {
|
||||
LOG(FATAL) << "Received invalid length : " + std::to_string(_num_bytes) +
|
||||
" (corrupt data page?)";
|
||||
@ -51,12 +51,11 @@ Status BoolRLEDecoder::decode_values(MutableColumnPtr& doris_column, DataTypePtr
|
||||
auto& column_data = static_cast<ColumnVector<UInt8>&>(*doris_column).get_data();
|
||||
size_t data_index = column_data.size();
|
||||
column_data.resize(data_index + select_vector.num_values() - select_vector.num_filtered());
|
||||
size_t max_values = column_data.size();
|
||||
size_t max_values = select_vector.num_values() - select_vector.num_nulls();
|
||||
_values.resize(max_values);
|
||||
if (!_decoder.get_values(_values.data(), max_values)) {
|
||||
return Status::IOError("Can't read enough booleans in rle decoder");
|
||||
}
|
||||
// _num_bytes -= max_values;
|
||||
ColumnSelectVector::DataReadType read_type;
|
||||
while (size_t run_length = select_vector.get_next_run(&read_type)) {
|
||||
switch (read_type) {
|
||||
@ -83,6 +82,7 @@ Status BoolRLEDecoder::decode_values(MutableColumnPtr& doris_column, DataTypePtr
|
||||
}
|
||||
}
|
||||
}
|
||||
_current_value_idx = 0;
|
||||
return Status::OK();
|
||||
}
|
||||
} // namespace doris::vectorized
|
||||
|
||||
@ -698,6 +698,13 @@ public class HiveMetaStoreClientHelper {
|
||||
* Convert hive type to doris type.
|
||||
*/
|
||||
public static Type hiveTypeToDorisType(String hiveType) {
|
||||
return hiveTypeToDorisType(hiveType, 0);
|
||||
}
|
||||
|
||||
/**
|
||||
* Convert hive type to doris type with timescale.
|
||||
*/
|
||||
public static Type hiveTypeToDorisType(String hiveType, int timeScale) {
|
||||
String lowerCaseType = hiveType.toLowerCase();
|
||||
switch (lowerCaseType) {
|
||||
case "boolean":
|
||||
@ -713,7 +720,7 @@ public class HiveMetaStoreClientHelper {
|
||||
case "date":
|
||||
return ScalarType.createDateV2Type();
|
||||
case "timestamp":
|
||||
return ScalarType.createDatetimeV2Type(0);
|
||||
return ScalarType.createDatetimeV2Type(timeScale);
|
||||
case "float":
|
||||
return Type.FLOAT;
|
||||
case "double":
|
||||
|
||||
@ -318,7 +318,8 @@ public class HMSExternalTable extends ExternalTable {
|
||||
List<Column> tmpSchema = Lists.newArrayListWithCapacity(hmsSchema.size());
|
||||
for (FieldSchema field : hmsSchema) {
|
||||
tmpSchema.add(new Column(field.getName(),
|
||||
HiveMetaStoreClientHelper.hiveTypeToDorisType(field.getType()), true, null,
|
||||
HiveMetaStoreClientHelper.hiveTypeToDorisType(field.getType(),
|
||||
IcebergExternalTable.ICEBERG_DATETIME_SCALE_MS), true, null,
|
||||
true, null, field.getComment(), true, null,
|
||||
schema.caseInsensitiveFindField(field.getName()).fieldId(), null));
|
||||
}
|
||||
|
||||
@ -36,6 +36,8 @@ import java.util.List;
|
||||
|
||||
public class IcebergExternalTable extends ExternalTable {
|
||||
|
||||
public static final int ICEBERG_DATETIME_SCALE_MS = 3;
|
||||
|
||||
public IcebergExternalTable(long id, String name, String dbName, IcebergExternalCatalog catalog) {
|
||||
super(id, name, catalog, dbName, TableType.ICEBERG_EXTERNAL_TABLE);
|
||||
}
|
||||
@ -88,7 +90,7 @@ public class IcebergExternalTable extends ExternalTable {
|
||||
case DATE:
|
||||
return ScalarType.createDateV2Type();
|
||||
case TIMESTAMP:
|
||||
return ScalarType.createDatetimeV2Type(0);
|
||||
return ScalarType.createDatetimeV2Type(ICEBERG_DATETIME_SCALE_MS);
|
||||
case TIME:
|
||||
return Type.UNSUPPORTED;
|
||||
default:
|
||||
|
||||
@ -35,26 +35,26 @@
|
||||
1876.4831949153224
|
||||
|
||||
-- !q06 --
|
||||
2023-03-07 20:34:59
|
||||
2023-03-07 20:34:59
|
||||
2023-03-07 20:34:59
|
||||
2023-03-07 20:34:59
|
||||
2023-03-07 20:34:59
|
||||
2023-03-07 20:34:59
|
||||
2023-03-07 20:34:59
|
||||
2023-03-07 20:35
|
||||
2023-03-07 20:35
|
||||
2023-03-07 20:35
|
||||
2023-03-07 20:35
|
||||
2023-03-07 20:35
|
||||
2023-03-07 20:35
|
||||
2023-03-07 20:35
|
||||
2023-03-07 20:35
|
||||
2023-03-07 20:35
|
||||
2023-03-07 20:35
|
||||
2023-03-07 20:35
|
||||
2023-03-07 20:35
|
||||
2023-03-07 20:35
|
||||
2023-03-07 20:34:59.601
|
||||
2023-03-07 20:34:59.693
|
||||
2023-03-07 20:34:59.708
|
||||
2023-03-07 20:34:59.782
|
||||
2023-03-07 20:34:59.836
|
||||
2023-03-07 20:34:59.934
|
||||
2023-03-07 20:34:59.950
|
||||
2023-03-07 20:35:00.042
|
||||
2023-03-07 20:35:00.053
|
||||
2023-03-07 20:35:00.114
|
||||
2023-03-07 20:35:00.134
|
||||
2023-03-07 20:35:00.201
|
||||
2023-03-07 20:35:00.272
|
||||
2023-03-07 20:35:00.316
|
||||
2023-03-07 20:35:00.337
|
||||
2023-03-07 20:35:00.409
|
||||
2023-03-07 20:35:00.420
|
||||
2023-03-07 20:35:00.428
|
||||
2023-03-07 20:35:00.500
|
||||
2023-03-07 20:35:00.535
|
||||
|
||||
-- !q07 --
|
||||
6f77a7baae184d
|
||||
@ -82,7 +82,7 @@ f14889
|
||||
66.8626 true
|
||||
66.9046 true
|
||||
67.0202 true
|
||||
67.7351 false
|
||||
67.7351 true
|
||||
|
||||
-- !q11 --
|
||||
54078 8184
|
||||
@ -124,8 +124,15 @@ b5e6bf2b5
|
||||
5000
|
||||
|
||||
-- !q16 --
|
||||
2023-03-07 20:35:59
|
||||
2023-03-07 20:35:59
|
||||
2023-03-07 20:35:59
|
||||
2023-03-07 20:35:59
|
||||
2023-03-07 20:35:59
|
||||
2023-03-07 20:35:59.064
|
||||
2023-03-07 20:35:59.087
|
||||
2023-03-07 20:35:59.110
|
||||
2023-03-07 20:35:59.129
|
||||
2023-03-07 20:35:59.224
|
||||
|
||||
-- !q17 --
|
||||
14040216 \N 2147483647 2023-03-07 20:38:02.140 81.607142423775869 b1d54a8ac60a4c8aa 66.6566 a54742979109 9a8247ed7c74 false
|
||||
7847742 17740 2147483647 2023-03-07 20:36:02.376 1740.7904511543441 ff588a918be 66.8626 41c532d698024 18d9fa638cd449d893 true
|
||||
9045125 27361 2147483647 2023-03-07 20:35:51.997 1245.2170379359104 b31a143e67 66.9046 52ab9d8a748f4c9 5d70ec319e true
|
||||
10410585 \N 1938534851 2023-03-07 20:35:17.731 955.1760424982325 643e7c71b83d444e9261 67.0202 6a15d14103dc4 55b15adbec34 true
|
||||
10055090 \N 2147483647 2023-03-07 20:38:59.078 1387.1527042831178 47 67.7351 c4c5 960637955914682b6 true
|
||||
|
||||
@ -47,6 +47,7 @@ suite("test_external_catalog_glue_table", "p2") {
|
||||
qt_q14 """ select glue_string from iceberg_glue_types where glue_string>'040abff1da4748e4b' order by glue_int limit 5 """
|
||||
qt_q15 """ select count(1) from iceberg_glue_types """
|
||||
qt_q16 """ select glue_timstamp from iceberg_glue_types where glue_timstamp > '2023-03-07 20:35:59' order by glue_timstamp limit 5 """
|
||||
qt_q17 """ select * from iceberg_glue_types order by glue_decimal limit 5 """
|
||||
}
|
||||
sql """ use `iceberg_catalog`; """
|
||||
q01()
|
||||
|
||||
Reference in New Issue
Block a user