[fix](multi-catalog) verify the precision of datetime types for each data source (#19544)
Fix threes bugs of timestampv2 precision: 1. Hive catalog doesn't set the precision of timestampv2, and can't get the precision from hive metastore, so set the largest precision for timestampv2; 2. Jdbc catalog use datetimev1 to parse timestamp, and convert to timestampv2, so the precision is lost. 3. TVF doesn't use the precision from meta data of file format.
This commit is contained in:
@ -700,7 +700,8 @@ public class HiveMetaStoreClientHelper {
|
||||
* Convert hive type to doris type.
|
||||
*/
|
||||
public static Type hiveTypeToDorisType(String hiveType) {
|
||||
return hiveTypeToDorisType(hiveType, 0);
|
||||
// use the largest scale as default time scale.
|
||||
return hiveTypeToDorisType(hiveType, 6);
|
||||
}
|
||||
|
||||
/**
|
||||
|
||||
@ -36,7 +36,9 @@ import java.util.List;
|
||||
|
||||
public class IcebergExternalTable extends ExternalTable {
|
||||
|
||||
public static final int ICEBERG_DATETIME_SCALE_MS = 3;
|
||||
// https://iceberg.apache.org/spec/#schemas-and-data-types
|
||||
// All time and timestamp values are stored with microsecond precision
|
||||
public static final int ICEBERG_DATETIME_SCALE_MS = 6;
|
||||
|
||||
public IcebergExternalTable(long id, String name, String dbName, IcebergExternalCatalog catalog) {
|
||||
super(id, name, catalog, dbName, TableType.ICEBERG_EXTERNAL_TABLE);
|
||||
|
||||
@ -51,6 +51,8 @@ public class JdbcClient {
|
||||
|
||||
private static final int HTTP_TIMEOUT_MS = 10000;
|
||||
|
||||
public static final int JDBC_DATETIME_SCALE = 6;
|
||||
|
||||
private String dbType;
|
||||
private String jdbcUser;
|
||||
|
||||
@ -530,7 +532,9 @@ public class JdbcClient {
|
||||
case "TIMESTAMP":
|
||||
case "DATETIME":
|
||||
case "DATETIMEV2": // for jdbc catalog connecting Doris database
|
||||
return ScalarType.createDatetimeV2Type(0);
|
||||
// mysql can support microsecond
|
||||
// todo(gaoxin): Get real precision of DATETIMEV2
|
||||
return ScalarType.createDatetimeV2Type(JDBC_DATETIME_SCALE);
|
||||
case "FLOAT":
|
||||
return Type.FLOAT;
|
||||
case "DOUBLE":
|
||||
@ -598,7 +602,8 @@ public class JdbcClient {
|
||||
return charType;
|
||||
case "timestamp":
|
||||
case "timestamptz":
|
||||
return ScalarType.createDatetimeV2Type(0);
|
||||
// postgres can support microsecond
|
||||
return ScalarType.createDatetimeV2Type(JDBC_DATETIME_SCALE);
|
||||
case "date":
|
||||
return ScalarType.createDateV2Type();
|
||||
case "bool":
|
||||
@ -649,7 +654,13 @@ public class JdbcClient {
|
||||
|| ckType.startsWith("FixedString")) {
|
||||
return ScalarType.createStringType();
|
||||
} else if (ckType.startsWith("DateTime")) {
|
||||
return ScalarType.createDatetimeV2Type(6);
|
||||
// DateTime with second precision, DateTime64 with [0~9] precision
|
||||
if (ckType.equals("DateTime")) {
|
||||
return ScalarType.createDatetimeV2Type(0);
|
||||
} else {
|
||||
// will lose precision
|
||||
return ScalarType.createDatetimeV2Type(JDBC_DATETIME_SCALE);
|
||||
}
|
||||
} else if (ckType.startsWith("Array")) {
|
||||
String cktype = ckType.substring(6, ckType.length() - 1);
|
||||
fieldSchema.setDataTypeName(cktype);
|
||||
@ -697,7 +708,8 @@ public class JdbcClient {
|
||||
if (oracleType.equals("TIMESTAMPTZ") || oracleType.equals("TIMESTAMPLTZ")) {
|
||||
return Type.UNSUPPORTED;
|
||||
}
|
||||
return ScalarType.createDatetimeV2Type(0);
|
||||
// oracle can support nanosecond, will lose precision
|
||||
return ScalarType.createDatetimeV2Type(JDBC_DATETIME_SCALE);
|
||||
}
|
||||
switch (oracleType) {
|
||||
/**
|
||||
@ -746,6 +758,7 @@ public class JdbcClient {
|
||||
case "FLOAT":
|
||||
return Type.DOUBLE;
|
||||
case "DATE":
|
||||
// can save date and time with second precision
|
||||
return ScalarType.createDatetimeV2Type(0);
|
||||
case "VARCHAR2":
|
||||
case "NVARCHAR2":
|
||||
@ -796,9 +809,14 @@ public class JdbcClient {
|
||||
case "date":
|
||||
return ScalarType.createDateV2Type();
|
||||
case "datetime":
|
||||
// datetime with millisecond precision
|
||||
return ScalarType.createDatetimeV2Type(3);
|
||||
case "datetime2":
|
||||
case "smalldatetime":
|
||||
// datetime2 with 100 nanoseconds precision, will lose precision
|
||||
return ScalarType.createDatetimeV2Type(6);
|
||||
case "smalldatetime":
|
||||
// smalldatetime with second precision
|
||||
return ScalarType.createDatetimeV2Type(0);
|
||||
case "char":
|
||||
case "varchar":
|
||||
case "nchar":
|
||||
@ -838,8 +856,11 @@ public class JdbcClient {
|
||||
case "DOUBLE":
|
||||
return Type.DOUBLE;
|
||||
case "TIMESTAMP":
|
||||
case "SECONDDATE":
|
||||
// TIMESTAMP with 100 nanoseconds precision, will lose precision
|
||||
return ScalarType.createDatetimeV2Type(6);
|
||||
case "SECONDDATE":
|
||||
// SECONDDATE with second precision
|
||||
return ScalarType.createDatetimeV2Type(0);
|
||||
case "DATE":
|
||||
return ScalarType.createDateV2Type();
|
||||
case "BOOLEAN":
|
||||
@ -882,7 +903,8 @@ public class JdbcClient {
|
||||
charType.setLength(fieldSchema.columnSize);
|
||||
return charType;
|
||||
} else if (trinoType.startsWith("timestamp")) {
|
||||
return ScalarType.createDatetimeV2Type(6);
|
||||
// timestamp with picoseconds precision, will lose precision
|
||||
return ScalarType.createDatetimeV2Type(JDBC_DATETIME_SCALE);
|
||||
} else if (trinoType.startsWith("array")) {
|
||||
String trinoArrType = trinoType.substring(6, trinoType.length() - 1);
|
||||
fieldSchema.setDataTypeName(trinoArrType);
|
||||
|
||||
@ -117,8 +117,8 @@ public class MetadataGenerator {
|
||||
LocalDateTime committedAt = LocalDateTime.ofInstant(Instant.ofEpochMilli(
|
||||
snapshot.timestampMillis()), TimeUtils.getTimeZone().toZoneId());
|
||||
long encodedDatetime = convertToDateTimeV2(committedAt.getYear(), committedAt.getMonthValue(),
|
||||
committedAt.getDayOfMonth(), committedAt.getHour(),
|
||||
committedAt.getMinute(), committedAt.getSecond());
|
||||
committedAt.getDayOfMonth(), committedAt.getHour(), committedAt.getMinute(),
|
||||
committedAt.getSecond(), committedAt.getNano() / 1000);
|
||||
|
||||
trow.addToColumnValue(new TCell().setLongVal(encodedDatetime));
|
||||
trow.addToColumnValue(new TCell().setLongVal(snapshot.snapshotId()));
|
||||
@ -303,8 +303,9 @@ public class MetadataGenerator {
|
||||
return hiveCatalog.loadTable(TableIdentifier.of(db, tbl));
|
||||
}
|
||||
|
||||
private static long convertToDateTimeV2(int year, int month, int day, int hour, int minute, int second) {
|
||||
return (long) second << 20 | (long) minute << 26 | (long) hour << 32
|
||||
private static long convertToDateTimeV2(
|
||||
int year, int month, int day, int hour, int minute, int second, int microsecond) {
|
||||
return (long) microsecond | (long) second << 20 | (long) minute << 26 | (long) hour << 32
|
||||
| (long) day << 37 | (long) month << 42 | (long) year << 46;
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user