branch-2.1: [fix](mc)Fixed the issue that maxcompute catalog can only read part of the timestamp data #49600 (#49706)
Cherry-picked from #49600 Co-authored-by: daidai <changyuwei@selectdb.com>
This commit is contained in:
committed by
GitHub
parent
a9939c09c1
commit
e898dbbba0
@ -29,9 +29,9 @@ import org.apache.arrow.vector.Float4Vector;
|
||||
import org.apache.arrow.vector.Float8Vector;
|
||||
import org.apache.arrow.vector.IntVector;
|
||||
import org.apache.arrow.vector.SmallIntVector;
|
||||
import org.apache.arrow.vector.TimeStampMicroTZVector;
|
||||
import org.apache.arrow.vector.TimeStampMicroVector;
|
||||
import org.apache.arrow.vector.TimeStampMilliTZVector;
|
||||
import org.apache.arrow.vector.TimeStampNanoTZVector;
|
||||
import org.apache.arrow.vector.TimeStampNanoVector;
|
||||
import org.apache.arrow.vector.TinyIntVector;
|
||||
import org.apache.arrow.vector.ValueVector;
|
||||
import org.apache.arrow.vector.VarBinaryVector;
|
||||
@ -39,7 +39,7 @@ import org.apache.arrow.vector.VarCharVector;
|
||||
import org.apache.arrow.vector.complex.ListVector;
|
||||
import org.apache.arrow.vector.complex.MapVector;
|
||||
import org.apache.arrow.vector.complex.StructVector;
|
||||
import org.apache.arrow.vector.holders.NullableTimeStampNanoHolder;
|
||||
import org.apache.arrow.vector.holders.NullableTimeStampMicroHolder;
|
||||
import org.apache.arrow.vector.types.pojo.ArrowType;
|
||||
import org.apache.log4j.Logger;
|
||||
|
||||
@ -246,48 +246,12 @@ public class MaxComputeColumnValue implements ColumnValue {
|
||||
if (timestampType.getUnit() == org.apache.arrow.vector.types.TimeUnit.MILLISECOND) { //DATETIME
|
||||
result = convertToLocalDateTime((TimeStampMilliTZVector) column, idx);
|
||||
} else if (timestampType.getTimezone() == null) { // TIMESTAMP_NTZ
|
||||
NullableTimeStampNanoHolder valueHoder = new NullableTimeStampNanoHolder();
|
||||
((TimeStampNanoVector) column).get(idx, valueHoder);
|
||||
long timestampNanos = valueHoder.value;
|
||||
|
||||
result = LocalDateTime.ofEpochSecond(timestampNanos / 1_000_000_000,
|
||||
(int) (timestampNanos % 1_000_000_000), java.time.ZoneOffset.UTC);
|
||||
NullableTimeStampMicroHolder valueHoder = new NullableTimeStampMicroHolder();
|
||||
((TimeStampMicroVector) column).get(idx, valueHoder);
|
||||
result = microsToInstant(valueHoder.value).atZone(java.time.ZoneOffset.UTC).toLocalDateTime();
|
||||
} else { // TIMESTAMP
|
||||
result = convertToLocalDateTime((TimeStampNanoTZVector) column, idx);
|
||||
result = convertToLocalDateTime((TimeStampMicroTZVector) column, idx);
|
||||
}
|
||||
|
||||
/*
|
||||
timestampType.getUnit()
|
||||
result = switch (timestampType.getUnit()) {
|
||||
case MICROSECOND -> convertToLocalDateTime((TimeStampMicroTZVector) column, idx);
|
||||
case SECOND -> convertToLocalDateTime((TimeStampSecTZVector) column, idx);
|
||||
case MILLISECOND -> convertToLocalDateTime((TimeStampMilliTZVector) column, idx);
|
||||
case NANOSECOND -> convertToLocalDateTime((TimeStampNanoTZVector) column, idx);
|
||||
};
|
||||
|
||||
Because :
|
||||
MaxCompute type => Doris Type
|
||||
DATETIME => ScalarType.createDatetimeV2Type(3)
|
||||
TIMESTAMP_NTZ => ScalarType.createDatetimeV2Type(6);
|
||||
|
||||
and
|
||||
TableBatchReadSession
|
||||
.withArrowOptions (
|
||||
ArrowOptions.newBuilder()
|
||||
.withDatetimeUnit(TimestampUnit.MILLI)
|
||||
.withTimestampUnit(TimestampUnit.NANO)
|
||||
.build()
|
||||
)
|
||||
,
|
||||
TIMESTAMP_NTZ is NTZ => column is TimeStampNanoVector
|
||||
|
||||
So:
|
||||
case SECOND -> convertToLocalDateTime((TimeStampSecTZVector) column, idx);
|
||||
case MICROSECOND -> convertToLocalDateTime((TimeStampMicroTZVector) column, idx);
|
||||
case NANOSECOND -> convertToLocalDateTime((TimeStampNanoTZVector) column, idx);
|
||||
may never be used.
|
||||
*/
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
@ -342,9 +306,14 @@ public class MaxComputeColumnValue implements ColumnValue {
|
||||
return LocalDateTime.ofInstant(Instant.ofEpochMilli(timestampMillis), timeZone);
|
||||
}
|
||||
|
||||
public LocalDateTime convertToLocalDateTime(TimeStampNanoTZVector nanoTZVector, int index) {
|
||||
long timestampNano = nanoTZVector.get(index);
|
||||
return Instant.ofEpochSecond(timestampNano / 1_000_000_000, timestampNano % 1_000_000_000)
|
||||
.atZone(timeZone).toLocalDateTime();
|
||||
public LocalDateTime convertToLocalDateTime(TimeStampMicroTZVector nanoTZVector, int index) {
|
||||
long timestampMicro = nanoTZVector.get(index);
|
||||
return microsToInstant(timestampMicro).atZone(timeZone).toLocalDateTime();
|
||||
}
|
||||
|
||||
private static Instant microsToInstant(long timestampMicro) {
|
||||
long epochSecond = Math.floorDiv(timestampMicro, 1_000_000);
|
||||
long microAdjustment = timestampMicro - epochSecond * 1_000_000;
|
||||
return Instant.ofEpochSecond(epochSecond, microAdjustment * 1000);
|
||||
}
|
||||
}
|
||||
|
||||
@ -202,7 +202,7 @@ public class MaxComputeScanNode extends FileQueryScanNode {
|
||||
.withArrowOptions(
|
||||
ArrowOptions.newBuilder()
|
||||
.withDatetimeUnit(TimestampUnit.MILLI)
|
||||
.withTimestampUnit(TimestampUnit.NANO)
|
||||
.withTimestampUnit(TimestampUnit.MICRO)
|
||||
.build()
|
||||
).buildBatchReadSession();
|
||||
}
|
||||
|
||||
@ -1,4 +1,43 @@
|
||||
-- This file is automatically generated. You should know what you did if you want to edit this
|
||||
-- !0_1 --
|
||||
0001-01-01T00:00
|
||||
1523-03-10T08:15:30
|
||||
1969-02-02T00:00
|
||||
1969-12-31T00:00:01
|
||||
2023-02-02T00:00
|
||||
3256-07-22T14:45:10
|
||||
4789-09-05T20:30:45
|
||||
6210-12-17T03:55:20
|
||||
7854-05-29T12:10:05
|
||||
9234-11-11T18:40:50
|
||||
9999-12-31T23:59:59
|
||||
|
||||
-- !0_2 --
|
||||
0001-01-01T00:00 0001-01-01T00:00
|
||||
1523-03-10T08:15:30.987654 1523-03-10T08:15:30.987654
|
||||
1969-02-02T00:00:00.543210 1969-02-02T00:00:00.543210
|
||||
1969-12-31T00:00:01.678901 1969-12-31T00:00:01.678901
|
||||
2023-02-02T00:00:00.123456 2023-02-02T00:00:00.123456
|
||||
3256-07-22T14:45:10.234567 3256-07-22T14:45:10.234567
|
||||
4789-09-05T20:30:45.876543 4789-09-05T20:30:45.876543
|
||||
6210-12-17T03:55:20.345678 6210-12-17T03:55:20.345678
|
||||
7854-05-29T12:10:05.456789 7854-05-29T12:10:05.456789
|
||||
9234-11-11T18:40:50.567890 9234-11-11T18:40:50.567890
|
||||
9999-12-31T23:59:59.999999 9999-12-31T23:59:59.999999
|
||||
|
||||
-- !0_3 --
|
||||
0001-01-01T00:00:00.654321 0001-01-01T00:00:00.654321
|
||||
1523-03-10T08:15:30.987654 1523-03-10T08:15:30.987654
|
||||
1969-02-02T00:00:00.543210 1969-02-02T00:00:00.543210
|
||||
1969-12-31T00:00:01.678901 1969-12-31T00:00:01.678901
|
||||
2023-02-02T00:00:00.123456 2023-02-02T00:00:00.123456
|
||||
3256-07-22T14:45:10.234567 3256-07-22T14:45:10.234567
|
||||
4789-09-05T20:30:45.876543 4789-09-05T20:30:45.876543
|
||||
6210-12-17T03:55:20.345678 6210-12-17T03:55:20.345678
|
||||
7854-05-29T12:10:05.456789 7854-05-29T12:10:05.456789
|
||||
9234-11-11T18:40:50.567890 9234-11-11T18:40:50.567890
|
||||
9999-12-31T23:59:59.999999 9999-12-31T23:59:59.999999
|
||||
|
||||
-- !1_1 --
|
||||
2023-02-02T00:00
|
||||
|
||||
|
||||
@ -31,6 +31,56 @@ INSERT INTO TABLE timestamp_tb1 VALUES(timestamp "2023-02-02 00:00:00.123456789"
|
||||
drop table if EXISTS timestamp_tb2;
|
||||
CREATE TABLE timestamp_tb2 (col1 TIMESTAMP,col2 TIMESTAMP_NTZ);
|
||||
INSERT INTO TABLE timestamp_tb2 VALUES(timestamp "2023-02-02 00:00:00.123456", timestamp_ntz "2023-02-02 00:00:00.123456" );
|
||||
|
||||
|
||||
drop table if EXISTS datetime_tb2;
|
||||
CREATE TABLE datetime_tb2 (col1 datetime);
|
||||
INSERT INTO TABLE datetime_tb2 VALUES
|
||||
(datetime '0001-01-01 00:00:00'),
|
||||
(datetime '1523-03-10 08:15:30'),
|
||||
(datetime '1969-02-02 00:00:00'),
|
||||
(datetime '1969-12-31 00:00:01'),
|
||||
(datetime "2023-02-02 00:00:00"),
|
||||
(datetime '3256-07-22 14:45:10'),
|
||||
(datetime '4789-09-05 20:30:45'),
|
||||
(datetime '6210-12-17 03:55:20'),
|
||||
(datetime '7854-05-29 12:10:05'),
|
||||
(datetime '9234-11-11 18:40:50'),
|
||||
(datetime '9999-12-31 23:59:59');
|
||||
|
||||
|
||||
|
||||
|
||||
drop table if EXISTS timestamp_tb3;
|
||||
CREATE TABLE timestamp_tb3 (col1 TIMESTAMP,col2 TIMESTAMP_NTZ);
|
||||
INSERT INTO TABLE timestamp_tb3 VALUES
|
||||
(timestamp '0001-01-01 00:00:00.000000', timestamp_ntz '0001-01-01 00:00:00.000000'),
|
||||
(timestamp '1523-03-10 08:15:30.987654', timestamp_ntz '1523-03-10 08:15:30.987654'),
|
||||
(timestamp '1969-02-02 00:00:00.543210', timestamp_ntz '1969-02-02 00:00:00.543210'),
|
||||
(timestamp '1969-12-31 00:00:01.678901', timestamp_ntz '1969-12-31 00:00:01.678901'),
|
||||
(timestamp '2023-02-02 00:00:00.123456', timestamp_ntz '2023-02-02 00:00:00.123456'),
|
||||
(timestamp '3256-07-22 14:45:10.234567', timestamp_ntz '3256-07-22 14:45:10.234567'),
|
||||
(timestamp '4789-09-05 20:30:45.876543', timestamp_ntz '4789-09-05 20:30:45.876543'),
|
||||
(timestamp '6210-12-17 03:55:20.345678', timestamp_ntz '6210-12-17 03:55:20.345678'),
|
||||
(timestamp '7854-05-29 12:10:05.456789', timestamp_ntz '7854-05-29 12:10:05.456789'),
|
||||
(timestamp '9234-11-11 18:40:50.567890', timestamp_ntz '9234-11-11 18:40:50.567890'),
|
||||
(timestamp '9999-12-31 23:59:59.999999', timestamp_ntz '9999-12-31 23:59:59.999999');
|
||||
|
||||
|
||||
drop table if EXISTS timestamp_tb4;
|
||||
CREATE TABLE timestamp_tb4 (col1 TIMESTAMP,col2 TIMESTAMP_NTZ);
|
||||
INSERT INTO TABLE timestamp_tb4 VALUES
|
||||
(timestamp '0001-01-01 00:00:00.654321789', timestamp_ntz '0001-01-01 00:00:00.654321789'),
|
||||
(timestamp '1523-03-10 08:15:30.987654123', timestamp_ntz '1523-03-10 08:15:30.987654123'),
|
||||
(timestamp '1969-02-02 00:00:00.543210567', timestamp_ntz '1969-02-02 00:00:00.543210567'),
|
||||
(timestamp '1969-12-31 00:00:01.678901234', timestamp_ntz '1969-12-31 00:00:01.678901234'),
|
||||
(timestamp '2023-02-02 00:00:00.123456890', timestamp_ntz '2023-02-02 00:00:00.123456890'),
|
||||
(timestamp '3256-07-22 14:45:10.234567345', timestamp_ntz '3256-07-22 14:45:10.234567345'),
|
||||
(timestamp '4789-09-05 20:30:45.876543678', timestamp_ntz '4789-09-05 20:30:45.876543678'),
|
||||
(timestamp '6210-12-17 03:55:20.345678901', timestamp_ntz '6210-12-17 03:55:20.345678901'),
|
||||
(timestamp '7854-05-29 12:10:05.456789432', timestamp_ntz '7854-05-29 12:10:05.456789432'),
|
||||
(timestamp '9234-11-11 18:40:50.567890765', timestamp_ntz '9234-11-11 18:40:50.567890765'),
|
||||
(timestamp '9999-12-31 23:59:59.999999876', timestamp_ntz '9999-12-31 23:59:59.999999876');
|
||||
*/
|
||||
|
||||
suite("test_max_compute_timestamp", "p2,external,maxcompute,external_remote,external_remote_maxcompute") {
|
||||
@ -57,6 +107,10 @@ suite("test_max_compute_timestamp", "p2,external,maxcompute,external_remote,exte
|
||||
sql """ switch ${mc_catalog_name} """
|
||||
sql """ use ${mc_db}"""
|
||||
|
||||
qt_0_1 """ select * from datetime_tb2 order by col1"""
|
||||
qt_0_2 """ select * from timestamp_tb3 order by col1 """
|
||||
qt_0_3 """ select * from timestamp_tb4 order by col1 """
|
||||
|
||||
sql """ set time_zone = "Asia/Shanghai" """
|
||||
qt_1_1 """ select * from datetime_tb1;"""
|
||||
qt_1_2 """ select * from datetime_tb1 where col1 > "2023-02-02 00:00:00.000";"""
|
||||
|
||||
Reference in New Issue
Block a user