[fix](multi-catalog) verify the precision of datetime types for each data source (#19544)

Fix threes bugs of timestampv2 precision:
1. Hive catalog doesn't set the precision of timestampv2, and can't get the precision from hive metastore, so set the largest precision for timestampv2;
2. Jdbc catalog use datetimev1 to parse timestamp, and convert to timestampv2, so the precision is lost.
3. TVF doesn't use the precision from meta data of file format.
This commit is contained in:
Ashin Gau
2023-05-17 20:50:15 +08:00
committed by GitHub
parent 73be97f8d8
commit 30c4f25cb3
20 changed files with 2309 additions and 2334 deletions

View File

@ -700,7 +700,8 @@ public class HiveMetaStoreClientHelper {
* Convert hive type to doris type.
*/
public static Type hiveTypeToDorisType(String hiveType) {
return hiveTypeToDorisType(hiveType, 0);
// use the largest scale as default time scale.
return hiveTypeToDorisType(hiveType, 6);
}
/**

View File

@ -36,7 +36,9 @@ import java.util.List;
public class IcebergExternalTable extends ExternalTable {
public static final int ICEBERG_DATETIME_SCALE_MS = 3;
// https://iceberg.apache.org/spec/#schemas-and-data-types
// All time and timestamp values are stored with microsecond precision
public static final int ICEBERG_DATETIME_SCALE_MS = 6;
public IcebergExternalTable(long id, String name, String dbName, IcebergExternalCatalog catalog) {
super(id, name, catalog, dbName, TableType.ICEBERG_EXTERNAL_TABLE);

View File

@ -51,6 +51,8 @@ public class JdbcClient {
private static final int HTTP_TIMEOUT_MS = 10000;
public static final int JDBC_DATETIME_SCALE = 6;
private String dbType;
private String jdbcUser;
@ -530,7 +532,9 @@ public class JdbcClient {
case "TIMESTAMP":
case "DATETIME":
case "DATETIMEV2": // for jdbc catalog connecting Doris database
return ScalarType.createDatetimeV2Type(0);
// mysql can support microsecond
// todo(gaoxin): Get real precision of DATETIMEV2
return ScalarType.createDatetimeV2Type(JDBC_DATETIME_SCALE);
case "FLOAT":
return Type.FLOAT;
case "DOUBLE":
@ -598,7 +602,8 @@ public class JdbcClient {
return charType;
case "timestamp":
case "timestamptz":
return ScalarType.createDatetimeV2Type(0);
// postgres can support microsecond
return ScalarType.createDatetimeV2Type(JDBC_DATETIME_SCALE);
case "date":
return ScalarType.createDateV2Type();
case "bool":
@ -649,7 +654,13 @@ public class JdbcClient {
|| ckType.startsWith("FixedString")) {
return ScalarType.createStringType();
} else if (ckType.startsWith("DateTime")) {
return ScalarType.createDatetimeV2Type(6);
// DateTime with second precision, DateTime64 with [0~9] precision
if (ckType.equals("DateTime")) {
return ScalarType.createDatetimeV2Type(0);
} else {
// will lose precision
return ScalarType.createDatetimeV2Type(JDBC_DATETIME_SCALE);
}
} else if (ckType.startsWith("Array")) {
String cktype = ckType.substring(6, ckType.length() - 1);
fieldSchema.setDataTypeName(cktype);
@ -697,7 +708,8 @@ public class JdbcClient {
if (oracleType.equals("TIMESTAMPTZ") || oracleType.equals("TIMESTAMPLTZ")) {
return Type.UNSUPPORTED;
}
return ScalarType.createDatetimeV2Type(0);
// oracle can support nanosecond, will lose precision
return ScalarType.createDatetimeV2Type(JDBC_DATETIME_SCALE);
}
switch (oracleType) {
/**
@ -746,6 +758,7 @@ public class JdbcClient {
case "FLOAT":
return Type.DOUBLE;
case "DATE":
// can save date and time with second precision
return ScalarType.createDatetimeV2Type(0);
case "VARCHAR2":
case "NVARCHAR2":
@ -796,9 +809,14 @@ public class JdbcClient {
case "date":
return ScalarType.createDateV2Type();
case "datetime":
// datetime with millisecond precision
return ScalarType.createDatetimeV2Type(3);
case "datetime2":
case "smalldatetime":
// datetime2 with 100 nanoseconds precision, will lose precision
return ScalarType.createDatetimeV2Type(6);
case "smalldatetime":
// smalldatetime with second precision
return ScalarType.createDatetimeV2Type(0);
case "char":
case "varchar":
case "nchar":
@ -838,8 +856,11 @@ public class JdbcClient {
case "DOUBLE":
return Type.DOUBLE;
case "TIMESTAMP":
case "SECONDDATE":
// TIMESTAMP with 100 nanoseconds precision, will lose precision
return ScalarType.createDatetimeV2Type(6);
case "SECONDDATE":
// SECONDDATE with second precision
return ScalarType.createDatetimeV2Type(0);
case "DATE":
return ScalarType.createDateV2Type();
case "BOOLEAN":
@ -882,7 +903,8 @@ public class JdbcClient {
charType.setLength(fieldSchema.columnSize);
return charType;
} else if (trinoType.startsWith("timestamp")) {
return ScalarType.createDatetimeV2Type(6);
// timestamp with picoseconds precision, will lose precision
return ScalarType.createDatetimeV2Type(JDBC_DATETIME_SCALE);
} else if (trinoType.startsWith("array")) {
String trinoArrType = trinoType.substring(6, trinoType.length() - 1);
fieldSchema.setDataTypeName(trinoArrType);

View File

@ -117,8 +117,8 @@ public class MetadataGenerator {
LocalDateTime committedAt = LocalDateTime.ofInstant(Instant.ofEpochMilli(
snapshot.timestampMillis()), TimeUtils.getTimeZone().toZoneId());
long encodedDatetime = convertToDateTimeV2(committedAt.getYear(), committedAt.getMonthValue(),
committedAt.getDayOfMonth(), committedAt.getHour(),
committedAt.getMinute(), committedAt.getSecond());
committedAt.getDayOfMonth(), committedAt.getHour(), committedAt.getMinute(),
committedAt.getSecond(), committedAt.getNano() / 1000);
trow.addToColumnValue(new TCell().setLongVal(encodedDatetime));
trow.addToColumnValue(new TCell().setLongVal(snapshot.snapshotId()));
@ -303,8 +303,9 @@ public class MetadataGenerator {
return hiveCatalog.loadTable(TableIdentifier.of(db, tbl));
}
private static long convertToDateTimeV2(int year, int month, int day, int hour, int minute, int second) {
return (long) second << 20 | (long) minute << 26 | (long) hour << 32
private static long convertToDateTimeV2(
int year, int month, int day, int hour, int minute, int second, int microsecond) {
return (long) microsecond | (long) second << 20 | (long) minute << 26 | (long) hour << 32
| (long) day << 37 | (long) month << 42 | (long) year << 46;
}
}