[bugfix](paimon)Fixed the reading of timestamp with time zone type data for 2.1 (#37716) (#38592)

bp: #37716
This commit is contained in:
wuwenchi
2024-08-01 10:23:06 +08:00
committed by GitHub
parent 9f1e41c623
commit 41fa7bc9fd
7 changed files with 400 additions and 51 deletions

View File

@ -823,20 +823,9 @@ Status VFileScanner::_get_next_reader() {
break;
}
case TFileFormatType::FORMAT_PARQUET: {
static const cctz::time_zone utc0 = cctz::utc_time_zone();
cctz::time_zone* tz;
if (range.__isset.table_format_params &&
range.table_format_params.table_format_type == "paimon") {
// The timestmap generated by paimon does not carry metadata information (e.g., isAdjustToUTC, etc.),
// and the stored data is UTC0 by default, so it is directly set to the UTC time zone.
// In version 0.7, paimon fixed this issue and can remove the judgment here
tz = const_cast<cctz::time_zone*>(&utc0);
} else {
tz = const_cast<cctz::time_zone*>(&_state->timezone_obj());
}
std::unique_ptr<ParquetReader> parquet_reader = ParquetReader::create_unique(
_profile, *_params, range, _state->query_options().batch_size, tz,
_io_ctx.get(), _state,
_profile, *_params, range, _state->query_options().batch_size,
const_cast<cctz::time_zone*>(&_state->timezone_obj()), _io_ctx.get(), _state,
_shoudl_enable_file_meta_cache() ? ExecEnv::GetInstance()->file_meta_cache()
: nullptr,
_state->query_options().enable_parquet_lazy_mat);

View File

@ -21,3 +21,7 @@ docker exec iceberg-rest bash -c 'cp /tmp/iceberg_rest_mode\=memory /mnt/data/in
# save iceberg from s3
docker exec mc bash -c 'mc cp -r minio/warehouse /mnt/data/input/minio'
# package zip
cp -r data iceberg_data
zip -rq iceberg_data.zip iceberg_data

View File

@ -24,6 +24,12 @@ import org.apache.paimon.data.DataGetters;
import org.apache.paimon.data.InternalArray;
import org.apache.paimon.data.InternalMap;
import org.apache.paimon.data.InternalRow;
import org.apache.paimon.data.Timestamp;
import org.apache.paimon.types.ArrayType;
import org.apache.paimon.types.DataType;
import org.apache.paimon.types.LocalZonedTimestampType;
import org.apache.paimon.types.MapType;
import org.apache.paimon.types.RowType;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@ -31,6 +37,7 @@ import java.math.BigDecimal;
import java.math.BigInteger;
import java.time.LocalDate;
import java.time.LocalDateTime;
import java.time.ZoneId;
import java.util.List;
public class PaimonColumnValue implements ColumnValue {
@ -38,19 +45,22 @@ public class PaimonColumnValue implements ColumnValue {
private int idx;
private DataGetters record;
private ColumnType dorisType;
private DataType dataType;
public PaimonColumnValue() {
}
public PaimonColumnValue(DataGetters record, int idx, ColumnType columnType) {
public PaimonColumnValue(DataGetters record, int idx, ColumnType columnType, DataType dataType) {
this.idx = idx;
this.record = record;
this.dorisType = columnType;
this.dataType = dataType;
}
public void setIdx(int idx, ColumnType dorisType) {
public void setIdx(int idx, ColumnType dorisType, DataType dataType) {
this.idx = idx;
this.dorisType = dorisType;
this.dataType = dataType;
}
public void setOffsetRow(InternalRow record) {
@ -124,7 +134,12 @@ public class PaimonColumnValue implements ColumnValue {
@Override
public LocalDateTime getDateTime() {
return record.getTimestamp(idx, dorisType.getPrecision()).toLocalDateTime();
Timestamp ts = record.getTimestamp(idx, dorisType.getPrecision());
if (dataType instanceof LocalZonedTimestampType) {
return LocalDateTime.ofInstant(ts.toInstant(), ZoneId.systemDefault());
} else {
return ts.toLocalDateTime();
}
}
@Override
@ -142,7 +157,7 @@ public class PaimonColumnValue implements ColumnValue {
InternalArray recordArray = record.getArray(idx);
for (int i = 0; i < recordArray.size(); i++) {
PaimonColumnValue arrayColumnValue = new PaimonColumnValue((DataGetters) recordArray, i,
dorisType.getChildTypes().get(0));
dorisType.getChildTypes().get(0), ((ArrayType) dataType).getElementType());
values.add(arrayColumnValue);
}
}
@ -153,13 +168,13 @@ public class PaimonColumnValue implements ColumnValue {
InternalArray key = map.keyArray();
for (int i = 0; i < key.size(); i++) {
PaimonColumnValue keyColumnValue = new PaimonColumnValue((DataGetters) key, i,
dorisType.getChildTypes().get(0));
dorisType.getChildTypes().get(0), ((MapType) dataType).getKeyType());
keys.add(keyColumnValue);
}
InternalArray value = map.valueArray();
for (int i = 0; i < value.size(); i++) {
PaimonColumnValue valueColumnValue = new PaimonColumnValue((DataGetters) value, i,
dorisType.getChildTypes().get(1));
dorisType.getChildTypes().get(1), ((MapType) dataType).getValueType());
values.add(valueColumnValue);
}
}
@ -169,7 +184,8 @@ public class PaimonColumnValue implements ColumnValue {
// todo: support pruned struct fields
InternalRow row = record.getRow(idx, structFieldIndex.size());
for (int i : structFieldIndex) {
values.add(new PaimonColumnValue(row, i, dorisType.getChildTypes().get(i)));
values.add(new PaimonColumnValue(row, i, dorisType.getChildTypes().get(i),
((RowType) dataType).getFields().get(i).type()));
}
}
}

View File

@ -53,6 +53,7 @@ public class PaimonJniScanner extends JniScanner {
private RecordReader<InternalRow> reader;
private final PaimonColumnValue columnValue = new PaimonColumnValue();
private List<String> paimonAllFieldNames;
private List<DataType> paimonDataTypeList;
private long ctlId;
private long dbId;
@ -99,7 +100,7 @@ public class PaimonJniScanner extends JniScanner {
initTable();
initReader();
resetDatetimeV2Precision();
} catch (Exception e) {
} catch (Throwable e) {
LOG.warn("Failed to open paimon_scanner: " + e.getMessage(), e);
throw e;
}
@ -114,9 +115,12 @@ public class PaimonJniScanner extends JniScanner {
+ " Please refresh table and try again",
fields.length, paimonAllFieldNames.size()));
}
readBuilder.withProjection(getProjected());
int[] projected = getProjected();
readBuilder.withProjection(projected);
readBuilder.withFilter(getPredicates());
reader = readBuilder.newRead().createReader(getSplit());
paimonDataTypeList =
Arrays.stream(projected).mapToObj(i -> table.rowType().getTypeAt(i)).collect(Collectors.toList());
}
private int[] getProjected() {
@ -175,7 +179,7 @@ public class PaimonJniScanner extends JniScanner {
while ((record = recordIterator.next()) != null) {
columnValue.setOffsetRow(record);
for (int i = 0; i < fields.length; i++) {
columnValue.setIdx(i, types[i]);
columnValue.setIdx(i, types[i], paimonDataTypeList.get(i));
appendData(i, columnValue);
}
rows++;
@ -189,8 +193,8 @@ public class PaimonJniScanner extends JniScanner {
} catch (Exception e) {
close();
LOG.warn("Failed to get the next batch of paimon. "
+ "split: {}, requiredFieldNames: {}, paimonAllFieldNames: {}",
getSplit(), params.get("required_fields"), paimonAllFieldNames, e);
+ "split: {}, requiredFieldNames: {}, paimonAllFieldNames: {}, dataType: {}",
getSplit(), params.get("required_fields"), paimonAllFieldNames, paimonDataTypeList, e);
throw new IOException(e);
}
return rows;

View File

@ -88,6 +88,7 @@ public class PaimonExternalTable extends ExternalTable {
}
private Type paimonPrimitiveTypeToDorisType(org.apache.paimon.types.DataType dataType) {
int tsScale = 3; // default
switch (dataType.getTypeRoot()) {
case BOOLEAN:
return Type.BOOLEAN;
@ -114,20 +115,26 @@ public class PaimonExternalTable extends ExternalTable {
case DATE:
return ScalarType.createDateV2Type();
case TIMESTAMP_WITHOUT_TIME_ZONE:
case TIMESTAMP_WITH_LOCAL_TIME_ZONE:
int scale = 3; // default
if (dataType instanceof org.apache.paimon.types.TimestampType) {
scale = ((org.apache.paimon.types.TimestampType) dataType).getPrecision();
if (scale > 6) {
scale = 6;
tsScale = ((org.apache.paimon.types.TimestampType) dataType).getPrecision();
if (tsScale > 6) {
tsScale = 6;
}
} else if (dataType instanceof org.apache.paimon.types.LocalZonedTimestampType) {
scale = ((org.apache.paimon.types.LocalZonedTimestampType) dataType).getPrecision();
if (scale > 6) {
scale = 6;
tsScale = ((org.apache.paimon.types.LocalZonedTimestampType) dataType).getPrecision();
if (tsScale > 6) {
tsScale = 6;
}
}
return ScalarType.createDatetimeV2Type(scale);
return ScalarType.createDatetimeV2Type(tsScale);
case TIMESTAMP_WITH_LOCAL_TIME_ZONE:
if (dataType instanceof org.apache.paimon.types.LocalZonedTimestampType) {
tsScale = ((org.apache.paimon.types.LocalZonedTimestampType) dataType).getPrecision();
if (tsScale > 6) {
tsScale = 6;
}
}
return ScalarType.createDatetimeV2Type(tsScale);
case ARRAY:
ArrayType arrayType = (ArrayType) dataType;
Type innerType = paimonPrimitiveTypeToDorisType(arrayType.getElementType());

View File

@ -0,0 +1,139 @@
-- This file is automatically generated. You should know what you did if you want to edit this
-- !c1 --
1 2024-01-02T10:04:05.100 2024-01-02T10:04:05.120 2024-01-02T10:04:05.123 2024-01-02T10:04:05.123400 2024-01-02T10:04:05.123450 2024-01-02T10:04:05.123456 2024-01-02T10:04:05.123456 2024-01-02T10:04:05.123456 2024-01-02T10:04:05.123456 2024-01-02T10:04:05.100 2024-01-02T10:04:05.120 2024-01-02T10:04:05.123 2024-01-02T10:04:05.123400 2024-01-02T10:04:05.123450 2024-01-02T10:04:05.123456 2024-01-02T10:04:05.123456 2024-01-02T10:04:05.123456 2024-01-02T10:04:05.123456
-- !c2 --
1 2024-01-02T10:04:05.100 2024-01-02T10:04:05.120 2024-01-02T10:04:05.123 2024-01-02T10:04:05.123400 2024-01-02T10:04:05.123450 2024-01-02T10:04:05.123456 2024-01-02T10:04:05.123456 2024-01-02T10:04:05.123456 2024-01-02T10:04:05.123456 2024-01-02T10:04:05.100 2024-01-02T10:04:05.120 2024-01-02T10:04:05.123 2024-01-02T10:04:05.123400 2024-01-02T10:04:05.123450 2024-01-02T10:04:05.123456 2024-01-02T10:04:05.123456 2024-01-02T10:04:05.123456 2024-01-02T10:04:05.123456
-- !ltz_ntz_simple2 --
1 {"2024-01-01 10:12:34.123456":"2024-01-02 10:12:34.123456", "2024-01-03 10:12:34.123456":"2024-01-04 10:12:34.123456"} {"2024-01-03 10:12:34.123456":"2024-01-04 10:12:34.123456", "2024-01-01 10:12:34.123456":"2024-01-02 10:12:34.123456"} ["2024-01-01 10:12:34.123456", "2024-01-02 10:12:34.123456", "2024-01-03 10:12:34.123456"] ["2024-01-01 10:12:34.123456", "2024-01-02 10:12:34.123456", "2024-01-03 10:12:34.123456"] {"crow1":"2024-01-01 10:12:34.123456", "crow2":"2024-01-02 10:12:34.123456"}
-- !ltz_ntz_simple3 --
{"2024-01-01 10:12:34.123456":"2024-01-02 10:12:34.123456", "2024-01-03 10:12:34.123456":"2024-01-04 10:12:34.123456"}
-- !ltz_ntz_simple4 --
2024-01-02T10:12:34.123456
-- !ltz_ntz_simple5 --
2024-01-04T10:12:34.123456
-- !ltz_ntz_simple6 --
{"2024-01-03 10:12:34.123456":"2024-01-04 10:12:34.123456", "2024-01-01 10:12:34.123456":"2024-01-02 10:12:34.123456"}
-- !ltz_ntz_simple7 --
2024-01-02T10:12:34.123456
-- !ltz_ntz_simple8 --
2024-01-04T10:12:34.123456
-- !ltz_ntz_simple9 --
["2024-01-01 10:12:34.123456", "2024-01-02 10:12:34.123456", "2024-01-03 10:12:34.123456"]
-- !ltz_ntz_simple10 --
2024-01-02T10:12:34.123456
-- !ltz_ntz_simple11 --
["2024-01-01 10:12:34.123456", "2024-01-02 10:12:34.123456", "2024-01-03 10:12:34.123456"]
-- !ltz_ntz_simple12 --
2024-01-02T10:12:34.123456
-- !ltz_ntz_simple13 --
{"crow1":"2024-01-01 10:12:34.123456", "crow2":"2024-01-02 10:12:34.123456"}
-- !ltz_ntz_simple14 --
2024-01-01T10:12:34.123456
-- !ltz_ntz_simple15 --
2024-01-02T10:12:34.123456
-- !c1 --
1 2024-01-02T10:04:05.100 2024-01-02T10:04:05.120 2024-01-02T10:04:05.123 2024-01-02T10:04:05.123400 2024-01-02T10:04:05.123450 2024-01-02T10:04:05.123456 2024-01-02T10:04:05.123456 2024-01-02T10:04:05.123456 2024-01-02T10:04:05.123456 2024-01-02T02:04:05.100 2024-01-02T02:04:05.120 2024-01-02T02:04:05.123 2024-01-02T02:04:05.123400 2024-01-02T02:04:05.123450 2024-01-02T02:04:05.123456 2024-01-02T02:04:05.123456 2024-01-02T02:04:05.123456 2024-01-02T02:04:05.123456
-- !c2 --
1 2024-01-02T10:04:05.100 2024-01-02T10:04:05.120 2024-01-02T10:04:05.123 2024-01-02T10:04:05.123400 2024-01-02T10:04:05.123450 2024-01-02T10:04:05.123456 2024-01-02T18:04:05.123456 2024-01-02T18:04:05.123456 2024-01-02T18:04:05.123456 2024-01-02T10:04:05.100 2024-01-02T10:04:05.120 2024-01-02T10:04:05.123 2024-01-02T10:04:05.123400 2024-01-02T10:04:05.123450 2024-01-02T10:04:05.123456 2024-01-02T10:04:05.123456 2024-01-02T10:04:05.123456 2024-01-02T10:04:05.123456
-- !ltz_ntz_simple2 --
1 {"2024-01-01 10:12:34.123456":"2024-01-02 10:12:34.123456", "2024-01-03 10:12:34.123456":"2024-01-04 10:12:34.123456"} {"2024-01-03 02:12:34.123456":"2024-01-04 02:12:34.123456", "2024-01-01 02:12:34.123456":"2024-01-02 02:12:34.123456"} ["2024-01-01 10:12:34.123456", "2024-01-02 10:12:34.123456", "2024-01-03 10:12:34.123456"] ["2024-01-01 02:12:34.123456", "2024-01-02 02:12:34.123456", "2024-01-03 02:12:34.123456"] {"crow1":"2024-01-01 10:12:34.123456", "crow2":"2024-01-02 02:12:34.123456"}
-- !ltz_ntz_simple3 --
{"2024-01-01 10:12:34.123456":"2024-01-02 10:12:34.123456", "2024-01-03 10:12:34.123456":"2024-01-04 10:12:34.123456"}
-- !ltz_ntz_simple4 --
2024-01-02T10:12:34.123456
-- !ltz_ntz_simple5 --
2024-01-04T10:12:34.123456
-- !ltz_ntz_simple6 --
{"2024-01-03 02:12:34.123456":"2024-01-04 02:12:34.123456", "2024-01-01 02:12:34.123456":"2024-01-02 02:12:34.123456"}
-- !ltz_ntz_simple7 --
\N
-- !ltz_ntz_simple8 --
\N
-- !ltz_ntz_simple9 --
["2024-01-01 10:12:34.123456", "2024-01-02 10:12:34.123456", "2024-01-03 10:12:34.123456"]
-- !ltz_ntz_simple10 --
2024-01-02T10:12:34.123456
-- !ltz_ntz_simple11 --
["2024-01-01 02:12:34.123456", "2024-01-02 02:12:34.123456", "2024-01-03 02:12:34.123456"]
-- !ltz_ntz_simple12 --
2024-01-02T02:12:34.123456
-- !ltz_ntz_simple13 --
{"crow1":"2024-01-01 10:12:34.123456", "crow2":"2024-01-02 02:12:34.123456"}
-- !ltz_ntz_simple14 --
2024-01-01T10:12:34.123456
-- !ltz_ntz_simple15 --
2024-01-02T02:12:34.123456
-- !ltz_ntz_simple2 --
1 {"2024-01-01 10:12:34.123456":"2024-01-02 10:12:34.123456", "2024-01-03 10:12:34.123456":"2024-01-04 10:12:34.123456"} {"2024-01-03 10:12:34.123456":"2024-01-04 10:12:34.123456", "2024-01-01 10:12:34.123456":"2024-01-02 10:12:34.123456"} ["2024-01-01 10:12:34.123456", "2024-01-02 10:12:34.123456", "2024-01-03 10:12:34.123456"] ["2024-01-01 10:12:34.123456", "2024-01-02 10:12:34.123456", "2024-01-03 10:12:34.123456"] {"crow1":"2024-01-01 10:12:34.123456", "crow2":"2024-01-02 10:12:34.123456"}
-- !ltz_ntz_simple3 --
{"2024-01-01 10:12:34.123456":"2024-01-02 10:12:34.123456", "2024-01-03 10:12:34.123456":"2024-01-04 10:12:34.123456"}
-- !ltz_ntz_simple4 --
2024-01-02T10:12:34.123456
-- !ltz_ntz_simple5 --
2024-01-04T10:12:34.123456
-- !ltz_ntz_simple6 --
{"2024-01-03 10:12:34.123456":"2024-01-04 10:12:34.123456", "2024-01-01 10:12:34.123456":"2024-01-02 10:12:34.123456"}
-- !ltz_ntz_simple7 --
2024-01-02T10:12:34.123456
-- !ltz_ntz_simple8 --
2024-01-04T10:12:34.123456
-- !ltz_ntz_simple9 --
["2024-01-01 10:12:34.123456", "2024-01-02 10:12:34.123456", "2024-01-03 10:12:34.123456"]
-- !ltz_ntz_simple10 --
2024-01-02T10:12:34.123456
-- !ltz_ntz_simple11 --
["2024-01-01 10:12:34.123456", "2024-01-02 10:12:34.123456", "2024-01-03 10:12:34.123456"]
-- !ltz_ntz_simple12 --
2024-01-02T10:12:34.123456
-- !ltz_ntz_simple13 --
{"crow1":"2024-01-01 10:12:34.123456", "crow2":"2024-01-02 10:12:34.123456"}
-- !ltz_ntz_simple14 --
2024-01-01T10:12:34.123456
-- !ltz_ntz_simple15 --
2024-01-02T10:12:34.123456

View File

@ -17,14 +17,9 @@
suite("paimon_timestamp_types", "p0,external,doris,external_docker,external_docker_doris") {
def ts_orc = """select * from ts_orc"""
def ts_parquet = """select * from ts_parquet"""
String enabled = context.config.otherConfigs.get("enablePaimonTest")
// The timestamp type of paimon has no logical or converted type,
// and is conflict with column type change from bigint to timestamp.
// Deprecated currently.
if (enabled == null || !enabled.equalsIgnoreCase("enable_deprecated_case")) {
if (enabled == null || !enabled.equalsIgnoreCase("true")) {
logger.info("disabled paimon test")
return
}
@ -46,20 +41,85 @@ suite("paimon_timestamp_types", "p0,external,doris,external_docker,external_dock
logger.info("catalog " + catalog_name + " created")
sql """switch ${catalog_name};"""
logger.info("switched to catalog " + catalog_name)
sql """use test_paimon_db;"""
sql """use flink_paimon;"""
logger.info("use test_paimon_db")
def test_ltz_ntz = { table ->
qt_ltz_ntz2 """ select * from ${table} """
qt_ltz_ntz3 """ select cmap1 from ${table} """
qt_ltz_ntz4 """ select cmap1['2024-01-01 10:12:34.123456'] from ${table} """
qt_ltz_ntz5 """ select cmap1['2024-01-03 10:12:34.123456'] from ${table} """
qt_ltz_ntz6 """ select cmap2 from ${table} """
qt_ltz_ntz7 """ select cmap2['2024-01-01 10:12:34.123456'] from ${table} """
qt_ltz_ntz8 """ select cmap2['2024-01-03 10:12:34.123456'] from ${table} """
qt_ltz_ntz9 """ select cmap3 from ${table} """
qt_ltz_ntz10 """ select cmap4 from ${table} """
qt_ltz_ntz11 """ select cmap4['1'] from ${table} """
qt_ltz_ntz12 """ select cmap4['2'] from ${table} """
qt_ltz_ntz13 """ select cmap4['3'] from ${table} """
qt_ltz_ntz14 """ select cmap4['4'] from ${table} """
qt_ltz_ntz15 """ select carray1 from ${table} """
qt_ltz_ntz16 """ select carray1[2] from ${table} """
qt_ltz_ntz17 """ select carray2 from ${table} """
qt_ltz_ntz18 """ select carray2[2] from ${table} """
qt_ltz_ntz19 """ select crow1 from ${table} """
qt_ltz_ntz20 """ select crow2 from ${table} """
qt_ltz_ntz21 """ select crow3 from ${table} """
}
def test_ltz_ntz_simple = { table ->
qt_ltz_ntz_simple2 """ select * from ${table} """
qt_ltz_ntz_simple3 """ select cmap1 from ${table} """
qt_ltz_ntz_simple4 """ select cmap1['2024-01-01 10:12:34.123456'] from ${table} """
qt_ltz_ntz_simple5 """ select cmap1['2024-01-03 10:12:34.123456'] from ${table} """
qt_ltz_ntz_simple6 """ select cmap2 from ${table} """
qt_ltz_ntz_simple7 """ select cmap2['2024-01-01 10:12:34.123456'] from ${table} """
qt_ltz_ntz_simple8 """ select cmap2['2024-01-03 10:12:34.123456'] from ${table} """
qt_ltz_ntz_simple9 """ select carray1 from ${table} """
qt_ltz_ntz_simple10 """ select carray1[2] from ${table} """
qt_ltz_ntz_simple11 """ select carray2 from ${table} """
qt_ltz_ntz_simple12 """ select carray2[2] from ${table} """
qt_ltz_ntz_simple13 """ select crow from ${table} """
qt_ltz_ntz_simple14 """ select STRUCT_ELEMENT(crow, 'crow1') from ${table} """
qt_ltz_ntz_simple15 """ select STRUCT_ELEMENT(crow, 'crow2') from ${table} """
}
def test_scale = {
def ts_scale_orc = """select * from ts_scale_orc"""
def ts_scale_parquet = """select * from ts_scale_parquet"""
qt_c1 ts_scale_orc
qt_c2 ts_scale_parquet
}
sql """set force_jni_scanner=true"""
qt_c1 ts_orc
qt_c2 ts_parquet
test_scale()
// test_ltz_ntz("test_timestamp_ntz_ltz_orc")
// test_ltz_ntz("test_timestamp_ntz_ltz_parquet")
test_ltz_ntz_simple("test_timestamp_ntz_ltz_simple_orc")
// test_ltz_ntz_simple("test_timestamp_ntz_ltz_simple_parquet")
sql """set force_jni_scanner=false"""
qt_c3 ts_orc
qt_c4 ts_parquet
test_scale()
// test_ltz_ntz("test_timestamp_ntz_ltz_orc")
// test_ltz_ntz("test_timestamp_ntz_ltz_parquet")
test_ltz_ntz_simple("test_timestamp_ntz_ltz_simple_orc")
test_ltz_ntz_simple("test_timestamp_ntz_ltz_simple_parquet")
} finally {
sql """set force_jni_scanner=false"""
}
// TODO:
// 1. Fix: native read + parquet + timestamp(7/8/9) (ts7,ts8,ts9), it will be 8 hour more
// 2. paimon bugs: native read + orc + timestamp_ltz.
// In the Shanghai time zone, the read data will be 8 hours less,
// because the data written by Flink to the orc file is UTC, but the time zone saved in the orc file is Shanghai.
// Currently, Paimon will not fix this problem, but recommends using the parquet format.
// 3. paimon bugs: jni read + parquet + row types + timestamp.
// Data of the timestamp type should be converted to the timestamp type, but paimon converted it to the long type.
// Will be fixed in paimon0.9
}
@ -69,7 +129,22 @@ suite("paimon_timestamp_types", "p0,external,doris,external_docker,external_dock
SET 'table.local-time-zone' = 'Asia/Shanghai';
create table ts_orc (
CREATE CATALOG paimon_minio WITH (
'type' = 'paimon',
'warehouse' = 's3://warehouse/wh',
's3.endpoint' = 'http://172.21.0.101:19001',
's3.access-key' = 'admin',
's3.secret-key' = 'password',
's3.path.style.access' = 'true'
);
use catalog paimon_minio;
CREATE DATABASE IF NOT EXISTS flink_paimon;
use flink_paimon;
create table ts_scale_orc (
id int,
ts1 timestamp(1),
ts2 timestamp(2),
@ -91,7 +166,7 @@ ts18 timestamp_ltz(8),
ts19 timestamp_ltz(9))
WITH ('file.format' = 'orc','write-only'='true');
create table ts_parquet (
create table ts_scale_parquet (
id int,
ts1 timestamp(1),
ts2 timestamp(2),
@ -113,7 +188,7 @@ ts18 timestamp_ltz(8),
ts19 timestamp_ltz(9))
WITH ('file.format' = 'parquet','write-only'='true');
insert into ts_orc values (
insert into ts_scale_orc values (
1,
timestamp '2024-01-02 10:04:05.123456789',
timestamp '2024-01-02 10:04:05.123456789',
@ -134,7 +209,7 @@ insert into ts_orc values (
timestamp '2024-01-02 10:04:05.123456789',
timestamp '2024-01-02 10:04:05.123456789');
insert into ts_parquet values (
insert into ts_scale_parquet values (
1,
timestamp '2024-01-02 10:04:05.123456789',
timestamp '2024-01-02 10:04:05.123456789',
@ -155,4 +230,119 @@ insert into ts_parquet values (
timestamp '2024-01-02 10:04:05.123456789',
timestamp '2024-01-02 10:04:05.123456789');
create table test_timestamp_ntz_ltz_orc (
id int,
cmap1 MAP<timestamp, timestamp>,
cmap2 MAP<TIMESTAMP_LTZ, TIMESTAMP_LTZ>,
cmap3 MAP<int, ROW<cmap_row1 timestamp>>,
cmap4 MAP<int, ROW<cmap_rowid int, cmap_row1 timestamp, cmap_row2 TIMESTAMP_LTZ>>,
carray1 ARRAY<timestamp>,
carray2 ARRAY<TIMESTAMP_LTZ>,
crow1 ROW<crow1 timestamp, crow2 TIMESTAMP_LTZ>,
crow2 ROW<crow1 timestamp, crow2 TIMESTAMP_LTZ, crow_row ROW<crow_row1 timestamp, crow_row2 TIMESTAMP_LTZ>>,
crow3 ROW<crow1 timestamp, crow2 TIMESTAMP_LTZ, crow_array1 ARRAY<timestamp>, crow_array2 ARRAY<TIMESTAMP_LTZ>>
) with (
'write-only' = 'true',
'file.format' = 'orc'
);
create table test_timestamp_ntz_ltz_parquet (
id int,
cmap1 MAP<timestamp, timestamp>,
cmap2 MAP<TIMESTAMP_LTZ, TIMESTAMP_LTZ>,
cmap3 MAP<int, ROW<cmap_row1 timestamp>>,
cmap4 MAP<int, ROW<cmap_rowid int, cmap_row1 timestamp, cmap_row2 TIMESTAMP_LTZ>>,
carray1 ARRAY<timestamp>,
carray2 ARRAY<TIMESTAMP_LTZ>,
crow1 ROW<crow1 timestamp, crow2 TIMESTAMP_LTZ>,
crow2 ROW<crow1 timestamp, crow2 TIMESTAMP_LTZ, crow_row ROW<crow_row1 timestamp, crow_row2 TIMESTAMP_LTZ>>,
crow3 ROW<crow1 timestamp, crow2 TIMESTAMP_LTZ, crow_array1 ARRAY<timestamp>, crow_array2 ARRAY<TIMESTAMP_LTZ>>
) with (
'write-only' = 'true',
'file.format' = 'parquet'
);
insert into test_timestamp_ntz_ltz_orc values (
1,
MAP[timestamp '2024-01-01 10:12:34.123456', timestamp '2024-01-02 10:12:34.123456', timestamp '2024-01-03 10:12:34.123456', timestamp '2024-01-04 10:12:34.123456'],
MAP[timestamp '2024-01-01 10:12:34.123456', timestamp '2024-01-02 10:12:34.123456', timestamp '2024-01-03 10:12:34.123456', timestamp '2024-01-04 10:12:34.123456'],
MAP[1, ROW(timestamp '2024-01-01 10:12:34.123456'),
2, ROW(timestamp '2024-01-02 10:12:34.123456'),
3, ROW(timestamp '2024-01-03 10:12:34.123456')
],
MAP[1, ROW(1, timestamp '2024-01-01 10:12:34.123456', timestamp '2024-01-02 10:12:34.123456'),
2, ROW(2, timestamp '2024-01-03 10:12:34.123456', timestamp '2024-01-04 10:12:34.123456'),
3, ROW(3, timestamp '2024-01-05 10:12:34.123456', timestamp '2024-01-06 10:12:34.123456'),
4, ROW(4, timestamp '2024-01-07 10:12:34.123456', timestamp '2024-01-08 10:12:34.123456')
],
ARRAY[timestamp '2024-01-01 10:12:34.123456', timestamp '2024-01-02 10:12:34.123456', timestamp '2024-01-03 10:12:34.123456'],
ARRAY[timestamp '2024-01-01 10:12:34.123456', timestamp '2024-01-02 10:12:34.123456', timestamp '2024-01-03 10:12:34.123456'],
ROW(timestamp '2024-01-01 10:12:34.123456', timestamp '2024-01-02 10:12:34.123456'),
ROW(timestamp '2024-01-01 10:12:34.123456', timestamp '2024-01-02 10:12:34.123456', ROW(timestamp '2024-01-03 10:12:34.123456', timestamp '2024-01-04 10:12:34.123456')),
ROW(timestamp '2024-01-01 10:12:34.123456', timestamp '2024-01-02 10:12:34.123456', ARRAY[timestamp '2024-01-03 10:12:34.123456', timestamp '2024-01-04 10:12:34.123456', timestamp '2024-01-05 10:12:34.123456'], ARRAY[timestamp '2024-01-06 10:12:34.123456', timestamp '2024-01-07 10:12:34.123456', timestamp '2024-01-08 10:12:34.123456'])
);
-- Currently paimon does not support nested parquet formats
insert into test_timestamp_ntz_ltz_parquet values (
1,
MAP[timestamp '2024-01-01 10:12:34.123456', timestamp '2024-01-02 10:12:34.123456', timestamp '2024-01-03 10:12:34.123456', timestamp '2024-01-04 10:12:34.123456'],
MAP[timestamp '2024-01-01 10:12:34.123456', timestamp '2024-01-02 10:12:34.123456', timestamp '2024-01-03 10:12:34.123456', timestamp '2024-01-04 10:12:34.123456'],
MAP[1, ROW(timestamp '2024-01-01 10:12:34.123456'),
2, ROW(timestamp '2024-01-02 10:12:34.123456'),
3, ROW(timestamp '2024-01-03 10:12:34.123456')
],
MAP[1, ROW(1, timestamp '2024-01-01 10:12:34.123456', timestamp '2024-01-02 10:12:34.123456'),
2, ROW(2, timestamp '2024-01-03 10:12:34.123456', timestamp '2024-01-04 10:12:34.123456'),
3, ROW(3, timestamp '2024-01-05 10:12:34.123456', timestamp '2024-01-06 10:12:34.123456'),
4, ROW(4, timestamp '2024-01-07 10:12:34.123456', timestamp '2024-01-08 10:12:34.123456')
],
ARRAY[timestamp '2024-01-01 10:12:34.123456', timestamp '2024-01-02 10:12:34.123456', timestamp '2024-01-03 10:12:34.123456'],
ARRAY[timestamp '2024-01-01 10:12:34.123456', timestamp '2024-01-02 10:12:34.123456', timestamp '2024-01-03 10:12:34.123456'],
ROW(timestamp '2024-01-01 10:12:34.123456', timestamp '2024-01-02 10:12:34.123456'),
ROW(timestamp '2024-01-01 10:12:34.123456', timestamp '2024-01-02 10:12:34.123456', ROW(timestamp '2024-01-03 10:12:34.123456', timestamp '2024-01-04 10:12:34.123456')),
ROW(timestamp '2024-01-01 10:12:34.123456', timestamp '2024-01-02 10:12:34.123456', ARRAY[timestamp '2024-01-03 10:12:34.123456', timestamp '2024-01-04 10:12:34.123456', timestamp '2024-01-05 10:12:34.123456'], ARRAY[timestamp '2024-01-06 10:12:34.123456', timestamp '2024-01-07 10:12:34.123456', timestamp '2024-01-08 10:12:34.123456'])
);
create table test_timestamp_ntz_ltz_simple_orc (
id int,
cmap1 MAP<timestamp, timestamp>,
cmap2 MAP<TIMESTAMP_LTZ, TIMESTAMP_LTZ>,
carray1 ARRAY<timestamp>,
carray2 ARRAY<TIMESTAMP_LTZ>,
crow ROW<crow1 timestamp, crow2 TIMESTAMP_LTZ>
) with (
'write-only' = 'true',
'file.format' = 'orc'
);
create table test_timestamp_ntz_ltz_simple_parquet (
id int,
cmap1 MAP<timestamp, timestamp>,
cmap2 MAP<TIMESTAMP_LTZ, TIMESTAMP_LTZ>,
carray1 ARRAY<timestamp>,
carray2 ARRAY<TIMESTAMP_LTZ>,
crow ROW<crow1 timestamp, crow2 TIMESTAMP_LTZ>
) with (
'write-only' = 'true',
'file.format' = 'parquet'
);
insert into test_timestamp_ntz_ltz_simple_orc values (
1,
MAP[timestamp '2024-01-01 10:12:34.123456', timestamp '2024-01-02 10:12:34.123456', timestamp '2024-01-03 10:12:34.123456', timestamp '2024-01-04 10:12:34.123456'],
MAP[timestamp '2024-01-01 10:12:34.123456', timestamp '2024-01-02 10:12:34.123456', timestamp '2024-01-03 10:12:34.123456', timestamp '2024-01-04 10:12:34.123456'],
ARRAY[timestamp '2024-01-01 10:12:34.123456', timestamp '2024-01-02 10:12:34.123456', timestamp '2024-01-03 10:12:34.123456'],
ARRAY[timestamp '2024-01-01 10:12:34.123456', timestamp '2024-01-02 10:12:34.123456', timestamp '2024-01-03 10:12:34.123456'],
ROW(timestamp '2024-01-01 10:12:34.123456', timestamp '2024-01-02 10:12:34.123456')
);
insert into test_timestamp_ntz_ltz_simple_parquet values (
1,
MAP[timestamp '2024-01-01 10:12:34.123456', timestamp '2024-01-02 10:12:34.123456', timestamp '2024-01-03 10:12:34.123456', timestamp '2024-01-04 10:12:34.123456'],
MAP[timestamp '2024-01-01 10:12:34.123456', timestamp '2024-01-02 10:12:34.123456', timestamp '2024-01-03 10:12:34.123456', timestamp '2024-01-04 10:12:34.123456'],
ARRAY[timestamp '2024-01-01 10:12:34.123456', timestamp '2024-01-02 10:12:34.123456', timestamp '2024-01-03 10:12:34.123456'],
ARRAY[timestamp '2024-01-01 10:12:34.123456', timestamp '2024-01-02 10:12:34.123456', timestamp '2024-01-03 10:12:34.123456'],
ROW(timestamp '2024-01-01 10:12:34.123456', timestamp '2024-01-02 10:12:34.123456')
);
*/