bp: #37716
This commit is contained in:
@ -823,20 +823,9 @@ Status VFileScanner::_get_next_reader() {
|
||||
break;
|
||||
}
|
||||
case TFileFormatType::FORMAT_PARQUET: {
|
||||
static const cctz::time_zone utc0 = cctz::utc_time_zone();
|
||||
cctz::time_zone* tz;
|
||||
if (range.__isset.table_format_params &&
|
||||
range.table_format_params.table_format_type == "paimon") {
|
||||
// The timestmap generated by paimon does not carry metadata information (e.g., isAdjustToUTC, etc.),
|
||||
// and the stored data is UTC0 by default, so it is directly set to the UTC time zone.
|
||||
// In version 0.7, paimon fixed this issue and can remove the judgment here
|
||||
tz = const_cast<cctz::time_zone*>(&utc0);
|
||||
} else {
|
||||
tz = const_cast<cctz::time_zone*>(&_state->timezone_obj());
|
||||
}
|
||||
std::unique_ptr<ParquetReader> parquet_reader = ParquetReader::create_unique(
|
||||
_profile, *_params, range, _state->query_options().batch_size, tz,
|
||||
_io_ctx.get(), _state,
|
||||
_profile, *_params, range, _state->query_options().batch_size,
|
||||
const_cast<cctz::time_zone*>(&_state->timezone_obj()), _io_ctx.get(), _state,
|
||||
_shoudl_enable_file_meta_cache() ? ExecEnv::GetInstance()->file_meta_cache()
|
||||
: nullptr,
|
||||
_state->query_options().enable_parquet_lazy_mat);
|
||||
|
||||
@ -21,3 +21,7 @@ docker exec iceberg-rest bash -c 'cp /tmp/iceberg_rest_mode\=memory /mnt/data/in
|
||||
|
||||
# save iceberg from s3
|
||||
docker exec mc bash -c 'mc cp -r minio/warehouse /mnt/data/input/minio'
|
||||
|
||||
# package zip
|
||||
cp -r data iceberg_data
|
||||
zip -rq iceberg_data.zip iceberg_data
|
||||
@ -24,6 +24,12 @@ import org.apache.paimon.data.DataGetters;
|
||||
import org.apache.paimon.data.InternalArray;
|
||||
import org.apache.paimon.data.InternalMap;
|
||||
import org.apache.paimon.data.InternalRow;
|
||||
import org.apache.paimon.data.Timestamp;
|
||||
import org.apache.paimon.types.ArrayType;
|
||||
import org.apache.paimon.types.DataType;
|
||||
import org.apache.paimon.types.LocalZonedTimestampType;
|
||||
import org.apache.paimon.types.MapType;
|
||||
import org.apache.paimon.types.RowType;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
@ -31,6 +37,7 @@ import java.math.BigDecimal;
|
||||
import java.math.BigInteger;
|
||||
import java.time.LocalDate;
|
||||
import java.time.LocalDateTime;
|
||||
import java.time.ZoneId;
|
||||
import java.util.List;
|
||||
|
||||
public class PaimonColumnValue implements ColumnValue {
|
||||
@ -38,19 +45,22 @@ public class PaimonColumnValue implements ColumnValue {
|
||||
private int idx;
|
||||
private DataGetters record;
|
||||
private ColumnType dorisType;
|
||||
private DataType dataType;
|
||||
|
||||
public PaimonColumnValue() {
|
||||
}
|
||||
|
||||
public PaimonColumnValue(DataGetters record, int idx, ColumnType columnType) {
|
||||
public PaimonColumnValue(DataGetters record, int idx, ColumnType columnType, DataType dataType) {
|
||||
this.idx = idx;
|
||||
this.record = record;
|
||||
this.dorisType = columnType;
|
||||
this.dataType = dataType;
|
||||
}
|
||||
|
||||
public void setIdx(int idx, ColumnType dorisType) {
|
||||
public void setIdx(int idx, ColumnType dorisType, DataType dataType) {
|
||||
this.idx = idx;
|
||||
this.dorisType = dorisType;
|
||||
this.dataType = dataType;
|
||||
}
|
||||
|
||||
public void setOffsetRow(InternalRow record) {
|
||||
@ -124,7 +134,12 @@ public class PaimonColumnValue implements ColumnValue {
|
||||
|
||||
@Override
|
||||
public LocalDateTime getDateTime() {
|
||||
return record.getTimestamp(idx, dorisType.getPrecision()).toLocalDateTime();
|
||||
Timestamp ts = record.getTimestamp(idx, dorisType.getPrecision());
|
||||
if (dataType instanceof LocalZonedTimestampType) {
|
||||
return LocalDateTime.ofInstant(ts.toInstant(), ZoneId.systemDefault());
|
||||
} else {
|
||||
return ts.toLocalDateTime();
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
@ -142,7 +157,7 @@ public class PaimonColumnValue implements ColumnValue {
|
||||
InternalArray recordArray = record.getArray(idx);
|
||||
for (int i = 0; i < recordArray.size(); i++) {
|
||||
PaimonColumnValue arrayColumnValue = new PaimonColumnValue((DataGetters) recordArray, i,
|
||||
dorisType.getChildTypes().get(0));
|
||||
dorisType.getChildTypes().get(0), ((ArrayType) dataType).getElementType());
|
||||
values.add(arrayColumnValue);
|
||||
}
|
||||
}
|
||||
@ -153,13 +168,13 @@ public class PaimonColumnValue implements ColumnValue {
|
||||
InternalArray key = map.keyArray();
|
||||
for (int i = 0; i < key.size(); i++) {
|
||||
PaimonColumnValue keyColumnValue = new PaimonColumnValue((DataGetters) key, i,
|
||||
dorisType.getChildTypes().get(0));
|
||||
dorisType.getChildTypes().get(0), ((MapType) dataType).getKeyType());
|
||||
keys.add(keyColumnValue);
|
||||
}
|
||||
InternalArray value = map.valueArray();
|
||||
for (int i = 0; i < value.size(); i++) {
|
||||
PaimonColumnValue valueColumnValue = new PaimonColumnValue((DataGetters) value, i,
|
||||
dorisType.getChildTypes().get(1));
|
||||
dorisType.getChildTypes().get(1), ((MapType) dataType).getValueType());
|
||||
values.add(valueColumnValue);
|
||||
}
|
||||
}
|
||||
@ -169,7 +184,8 @@ public class PaimonColumnValue implements ColumnValue {
|
||||
// todo: support pruned struct fields
|
||||
InternalRow row = record.getRow(idx, structFieldIndex.size());
|
||||
for (int i : structFieldIndex) {
|
||||
values.add(new PaimonColumnValue(row, i, dorisType.getChildTypes().get(i)));
|
||||
values.add(new PaimonColumnValue(row, i, dorisType.getChildTypes().get(i),
|
||||
((RowType) dataType).getFields().get(i).type()));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -53,6 +53,7 @@ public class PaimonJniScanner extends JniScanner {
|
||||
private RecordReader<InternalRow> reader;
|
||||
private final PaimonColumnValue columnValue = new PaimonColumnValue();
|
||||
private List<String> paimonAllFieldNames;
|
||||
private List<DataType> paimonDataTypeList;
|
||||
|
||||
private long ctlId;
|
||||
private long dbId;
|
||||
@ -99,7 +100,7 @@ public class PaimonJniScanner extends JniScanner {
|
||||
initTable();
|
||||
initReader();
|
||||
resetDatetimeV2Precision();
|
||||
} catch (Exception e) {
|
||||
} catch (Throwable e) {
|
||||
LOG.warn("Failed to open paimon_scanner: " + e.getMessage(), e);
|
||||
throw e;
|
||||
}
|
||||
@ -114,9 +115,12 @@ public class PaimonJniScanner extends JniScanner {
|
||||
+ " Please refresh table and try again",
|
||||
fields.length, paimonAllFieldNames.size()));
|
||||
}
|
||||
readBuilder.withProjection(getProjected());
|
||||
int[] projected = getProjected();
|
||||
readBuilder.withProjection(projected);
|
||||
readBuilder.withFilter(getPredicates());
|
||||
reader = readBuilder.newRead().createReader(getSplit());
|
||||
paimonDataTypeList =
|
||||
Arrays.stream(projected).mapToObj(i -> table.rowType().getTypeAt(i)).collect(Collectors.toList());
|
||||
}
|
||||
|
||||
private int[] getProjected() {
|
||||
@ -175,7 +179,7 @@ public class PaimonJniScanner extends JniScanner {
|
||||
while ((record = recordIterator.next()) != null) {
|
||||
columnValue.setOffsetRow(record);
|
||||
for (int i = 0; i < fields.length; i++) {
|
||||
columnValue.setIdx(i, types[i]);
|
||||
columnValue.setIdx(i, types[i], paimonDataTypeList.get(i));
|
||||
appendData(i, columnValue);
|
||||
}
|
||||
rows++;
|
||||
@ -189,8 +193,8 @@ public class PaimonJniScanner extends JniScanner {
|
||||
} catch (Exception e) {
|
||||
close();
|
||||
LOG.warn("Failed to get the next batch of paimon. "
|
||||
+ "split: {}, requiredFieldNames: {}, paimonAllFieldNames: {}",
|
||||
getSplit(), params.get("required_fields"), paimonAllFieldNames, e);
|
||||
+ "split: {}, requiredFieldNames: {}, paimonAllFieldNames: {}, dataType: {}",
|
||||
getSplit(), params.get("required_fields"), paimonAllFieldNames, paimonDataTypeList, e);
|
||||
throw new IOException(e);
|
||||
}
|
||||
return rows;
|
||||
|
||||
@ -88,6 +88,7 @@ public class PaimonExternalTable extends ExternalTable {
|
||||
}
|
||||
|
||||
private Type paimonPrimitiveTypeToDorisType(org.apache.paimon.types.DataType dataType) {
|
||||
int tsScale = 3; // default
|
||||
switch (dataType.getTypeRoot()) {
|
||||
case BOOLEAN:
|
||||
return Type.BOOLEAN;
|
||||
@ -114,20 +115,26 @@ public class PaimonExternalTable extends ExternalTable {
|
||||
case DATE:
|
||||
return ScalarType.createDateV2Type();
|
||||
case TIMESTAMP_WITHOUT_TIME_ZONE:
|
||||
case TIMESTAMP_WITH_LOCAL_TIME_ZONE:
|
||||
int scale = 3; // default
|
||||
if (dataType instanceof org.apache.paimon.types.TimestampType) {
|
||||
scale = ((org.apache.paimon.types.TimestampType) dataType).getPrecision();
|
||||
if (scale > 6) {
|
||||
scale = 6;
|
||||
tsScale = ((org.apache.paimon.types.TimestampType) dataType).getPrecision();
|
||||
if (tsScale > 6) {
|
||||
tsScale = 6;
|
||||
}
|
||||
} else if (dataType instanceof org.apache.paimon.types.LocalZonedTimestampType) {
|
||||
scale = ((org.apache.paimon.types.LocalZonedTimestampType) dataType).getPrecision();
|
||||
if (scale > 6) {
|
||||
scale = 6;
|
||||
tsScale = ((org.apache.paimon.types.LocalZonedTimestampType) dataType).getPrecision();
|
||||
if (tsScale > 6) {
|
||||
tsScale = 6;
|
||||
}
|
||||
}
|
||||
return ScalarType.createDatetimeV2Type(scale);
|
||||
return ScalarType.createDatetimeV2Type(tsScale);
|
||||
case TIMESTAMP_WITH_LOCAL_TIME_ZONE:
|
||||
if (dataType instanceof org.apache.paimon.types.LocalZonedTimestampType) {
|
||||
tsScale = ((org.apache.paimon.types.LocalZonedTimestampType) dataType).getPrecision();
|
||||
if (tsScale > 6) {
|
||||
tsScale = 6;
|
||||
}
|
||||
}
|
||||
return ScalarType.createDatetimeV2Type(tsScale);
|
||||
case ARRAY:
|
||||
ArrayType arrayType = (ArrayType) dataType;
|
||||
Type innerType = paimonPrimitiveTypeToDorisType(arrayType.getElementType());
|
||||
|
||||
@ -0,0 +1,139 @@
|
||||
-- This file is automatically generated. You should know what you did if you want to edit this
|
||||
-- !c1 --
|
||||
1 2024-01-02T10:04:05.100 2024-01-02T10:04:05.120 2024-01-02T10:04:05.123 2024-01-02T10:04:05.123400 2024-01-02T10:04:05.123450 2024-01-02T10:04:05.123456 2024-01-02T10:04:05.123456 2024-01-02T10:04:05.123456 2024-01-02T10:04:05.123456 2024-01-02T10:04:05.100 2024-01-02T10:04:05.120 2024-01-02T10:04:05.123 2024-01-02T10:04:05.123400 2024-01-02T10:04:05.123450 2024-01-02T10:04:05.123456 2024-01-02T10:04:05.123456 2024-01-02T10:04:05.123456 2024-01-02T10:04:05.123456
|
||||
|
||||
-- !c2 --
|
||||
1 2024-01-02T10:04:05.100 2024-01-02T10:04:05.120 2024-01-02T10:04:05.123 2024-01-02T10:04:05.123400 2024-01-02T10:04:05.123450 2024-01-02T10:04:05.123456 2024-01-02T10:04:05.123456 2024-01-02T10:04:05.123456 2024-01-02T10:04:05.123456 2024-01-02T10:04:05.100 2024-01-02T10:04:05.120 2024-01-02T10:04:05.123 2024-01-02T10:04:05.123400 2024-01-02T10:04:05.123450 2024-01-02T10:04:05.123456 2024-01-02T10:04:05.123456 2024-01-02T10:04:05.123456 2024-01-02T10:04:05.123456
|
||||
|
||||
-- !ltz_ntz_simple2 --
|
||||
1 {"2024-01-01 10:12:34.123456":"2024-01-02 10:12:34.123456", "2024-01-03 10:12:34.123456":"2024-01-04 10:12:34.123456"} {"2024-01-03 10:12:34.123456":"2024-01-04 10:12:34.123456", "2024-01-01 10:12:34.123456":"2024-01-02 10:12:34.123456"} ["2024-01-01 10:12:34.123456", "2024-01-02 10:12:34.123456", "2024-01-03 10:12:34.123456"] ["2024-01-01 10:12:34.123456", "2024-01-02 10:12:34.123456", "2024-01-03 10:12:34.123456"] {"crow1":"2024-01-01 10:12:34.123456", "crow2":"2024-01-02 10:12:34.123456"}
|
||||
|
||||
-- !ltz_ntz_simple3 --
|
||||
{"2024-01-01 10:12:34.123456":"2024-01-02 10:12:34.123456", "2024-01-03 10:12:34.123456":"2024-01-04 10:12:34.123456"}
|
||||
|
||||
-- !ltz_ntz_simple4 --
|
||||
2024-01-02T10:12:34.123456
|
||||
|
||||
-- !ltz_ntz_simple5 --
|
||||
2024-01-04T10:12:34.123456
|
||||
|
||||
-- !ltz_ntz_simple6 --
|
||||
{"2024-01-03 10:12:34.123456":"2024-01-04 10:12:34.123456", "2024-01-01 10:12:34.123456":"2024-01-02 10:12:34.123456"}
|
||||
|
||||
-- !ltz_ntz_simple7 --
|
||||
2024-01-02T10:12:34.123456
|
||||
|
||||
-- !ltz_ntz_simple8 --
|
||||
2024-01-04T10:12:34.123456
|
||||
|
||||
-- !ltz_ntz_simple9 --
|
||||
["2024-01-01 10:12:34.123456", "2024-01-02 10:12:34.123456", "2024-01-03 10:12:34.123456"]
|
||||
|
||||
-- !ltz_ntz_simple10 --
|
||||
2024-01-02T10:12:34.123456
|
||||
|
||||
-- !ltz_ntz_simple11 --
|
||||
["2024-01-01 10:12:34.123456", "2024-01-02 10:12:34.123456", "2024-01-03 10:12:34.123456"]
|
||||
|
||||
-- !ltz_ntz_simple12 --
|
||||
2024-01-02T10:12:34.123456
|
||||
|
||||
-- !ltz_ntz_simple13 --
|
||||
{"crow1":"2024-01-01 10:12:34.123456", "crow2":"2024-01-02 10:12:34.123456"}
|
||||
|
||||
-- !ltz_ntz_simple14 --
|
||||
2024-01-01T10:12:34.123456
|
||||
|
||||
-- !ltz_ntz_simple15 --
|
||||
2024-01-02T10:12:34.123456
|
||||
|
||||
-- !c1 --
|
||||
1 2024-01-02T10:04:05.100 2024-01-02T10:04:05.120 2024-01-02T10:04:05.123 2024-01-02T10:04:05.123400 2024-01-02T10:04:05.123450 2024-01-02T10:04:05.123456 2024-01-02T10:04:05.123456 2024-01-02T10:04:05.123456 2024-01-02T10:04:05.123456 2024-01-02T02:04:05.100 2024-01-02T02:04:05.120 2024-01-02T02:04:05.123 2024-01-02T02:04:05.123400 2024-01-02T02:04:05.123450 2024-01-02T02:04:05.123456 2024-01-02T02:04:05.123456 2024-01-02T02:04:05.123456 2024-01-02T02:04:05.123456
|
||||
|
||||
-- !c2 --
|
||||
1 2024-01-02T10:04:05.100 2024-01-02T10:04:05.120 2024-01-02T10:04:05.123 2024-01-02T10:04:05.123400 2024-01-02T10:04:05.123450 2024-01-02T10:04:05.123456 2024-01-02T18:04:05.123456 2024-01-02T18:04:05.123456 2024-01-02T18:04:05.123456 2024-01-02T10:04:05.100 2024-01-02T10:04:05.120 2024-01-02T10:04:05.123 2024-01-02T10:04:05.123400 2024-01-02T10:04:05.123450 2024-01-02T10:04:05.123456 2024-01-02T10:04:05.123456 2024-01-02T10:04:05.123456 2024-01-02T10:04:05.123456
|
||||
|
||||
-- !ltz_ntz_simple2 --
|
||||
1 {"2024-01-01 10:12:34.123456":"2024-01-02 10:12:34.123456", "2024-01-03 10:12:34.123456":"2024-01-04 10:12:34.123456"} {"2024-01-03 02:12:34.123456":"2024-01-04 02:12:34.123456", "2024-01-01 02:12:34.123456":"2024-01-02 02:12:34.123456"} ["2024-01-01 10:12:34.123456", "2024-01-02 10:12:34.123456", "2024-01-03 10:12:34.123456"] ["2024-01-01 02:12:34.123456", "2024-01-02 02:12:34.123456", "2024-01-03 02:12:34.123456"] {"crow1":"2024-01-01 10:12:34.123456", "crow2":"2024-01-02 02:12:34.123456"}
|
||||
|
||||
-- !ltz_ntz_simple3 --
|
||||
{"2024-01-01 10:12:34.123456":"2024-01-02 10:12:34.123456", "2024-01-03 10:12:34.123456":"2024-01-04 10:12:34.123456"}
|
||||
|
||||
-- !ltz_ntz_simple4 --
|
||||
2024-01-02T10:12:34.123456
|
||||
|
||||
-- !ltz_ntz_simple5 --
|
||||
2024-01-04T10:12:34.123456
|
||||
|
||||
-- !ltz_ntz_simple6 --
|
||||
{"2024-01-03 02:12:34.123456":"2024-01-04 02:12:34.123456", "2024-01-01 02:12:34.123456":"2024-01-02 02:12:34.123456"}
|
||||
|
||||
-- !ltz_ntz_simple7 --
|
||||
\N
|
||||
|
||||
-- !ltz_ntz_simple8 --
|
||||
\N
|
||||
|
||||
-- !ltz_ntz_simple9 --
|
||||
["2024-01-01 10:12:34.123456", "2024-01-02 10:12:34.123456", "2024-01-03 10:12:34.123456"]
|
||||
|
||||
-- !ltz_ntz_simple10 --
|
||||
2024-01-02T10:12:34.123456
|
||||
|
||||
-- !ltz_ntz_simple11 --
|
||||
["2024-01-01 02:12:34.123456", "2024-01-02 02:12:34.123456", "2024-01-03 02:12:34.123456"]
|
||||
|
||||
-- !ltz_ntz_simple12 --
|
||||
2024-01-02T02:12:34.123456
|
||||
|
||||
-- !ltz_ntz_simple13 --
|
||||
{"crow1":"2024-01-01 10:12:34.123456", "crow2":"2024-01-02 02:12:34.123456"}
|
||||
|
||||
-- !ltz_ntz_simple14 --
|
||||
2024-01-01T10:12:34.123456
|
||||
|
||||
-- !ltz_ntz_simple15 --
|
||||
2024-01-02T02:12:34.123456
|
||||
|
||||
-- !ltz_ntz_simple2 --
|
||||
1 {"2024-01-01 10:12:34.123456":"2024-01-02 10:12:34.123456", "2024-01-03 10:12:34.123456":"2024-01-04 10:12:34.123456"} {"2024-01-03 10:12:34.123456":"2024-01-04 10:12:34.123456", "2024-01-01 10:12:34.123456":"2024-01-02 10:12:34.123456"} ["2024-01-01 10:12:34.123456", "2024-01-02 10:12:34.123456", "2024-01-03 10:12:34.123456"] ["2024-01-01 10:12:34.123456", "2024-01-02 10:12:34.123456", "2024-01-03 10:12:34.123456"] {"crow1":"2024-01-01 10:12:34.123456", "crow2":"2024-01-02 10:12:34.123456"}
|
||||
|
||||
-- !ltz_ntz_simple3 --
|
||||
{"2024-01-01 10:12:34.123456":"2024-01-02 10:12:34.123456", "2024-01-03 10:12:34.123456":"2024-01-04 10:12:34.123456"}
|
||||
|
||||
-- !ltz_ntz_simple4 --
|
||||
2024-01-02T10:12:34.123456
|
||||
|
||||
-- !ltz_ntz_simple5 --
|
||||
2024-01-04T10:12:34.123456
|
||||
|
||||
-- !ltz_ntz_simple6 --
|
||||
{"2024-01-03 10:12:34.123456":"2024-01-04 10:12:34.123456", "2024-01-01 10:12:34.123456":"2024-01-02 10:12:34.123456"}
|
||||
|
||||
-- !ltz_ntz_simple7 --
|
||||
2024-01-02T10:12:34.123456
|
||||
|
||||
-- !ltz_ntz_simple8 --
|
||||
2024-01-04T10:12:34.123456
|
||||
|
||||
-- !ltz_ntz_simple9 --
|
||||
["2024-01-01 10:12:34.123456", "2024-01-02 10:12:34.123456", "2024-01-03 10:12:34.123456"]
|
||||
|
||||
-- !ltz_ntz_simple10 --
|
||||
2024-01-02T10:12:34.123456
|
||||
|
||||
-- !ltz_ntz_simple11 --
|
||||
["2024-01-01 10:12:34.123456", "2024-01-02 10:12:34.123456", "2024-01-03 10:12:34.123456"]
|
||||
|
||||
-- !ltz_ntz_simple12 --
|
||||
2024-01-02T10:12:34.123456
|
||||
|
||||
-- !ltz_ntz_simple13 --
|
||||
{"crow1":"2024-01-01 10:12:34.123456", "crow2":"2024-01-02 10:12:34.123456"}
|
||||
|
||||
-- !ltz_ntz_simple14 --
|
||||
2024-01-01T10:12:34.123456
|
||||
|
||||
-- !ltz_ntz_simple15 --
|
||||
2024-01-02T10:12:34.123456
|
||||
|
||||
@ -17,14 +17,9 @@
|
||||
|
||||
suite("paimon_timestamp_types", "p0,external,doris,external_docker,external_docker_doris") {
|
||||
|
||||
def ts_orc = """select * from ts_orc"""
|
||||
def ts_parquet = """select * from ts_parquet"""
|
||||
|
||||
String enabled = context.config.otherConfigs.get("enablePaimonTest")
|
||||
// The timestamp type of paimon has no logical or converted type,
|
||||
// and is conflict with column type change from bigint to timestamp.
|
||||
// Deprecated currently.
|
||||
if (enabled == null || !enabled.equalsIgnoreCase("enable_deprecated_case")) {
|
||||
if (enabled == null || !enabled.equalsIgnoreCase("true")) {
|
||||
logger.info("disabled paimon test")
|
||||
return
|
||||
}
|
||||
|
||||
@ -46,20 +41,85 @@ suite("paimon_timestamp_types", "p0,external,doris,external_docker,external_dock
|
||||
logger.info("catalog " + catalog_name + " created")
|
||||
sql """switch ${catalog_name};"""
|
||||
logger.info("switched to catalog " + catalog_name)
|
||||
sql """use test_paimon_db;"""
|
||||
sql """use flink_paimon;"""
|
||||
logger.info("use test_paimon_db")
|
||||
|
||||
|
||||
def test_ltz_ntz = { table ->
|
||||
qt_ltz_ntz2 """ select * from ${table} """
|
||||
qt_ltz_ntz3 """ select cmap1 from ${table} """
|
||||
qt_ltz_ntz4 """ select cmap1['2024-01-01 10:12:34.123456'] from ${table} """
|
||||
qt_ltz_ntz5 """ select cmap1['2024-01-03 10:12:34.123456'] from ${table} """
|
||||
qt_ltz_ntz6 """ select cmap2 from ${table} """
|
||||
qt_ltz_ntz7 """ select cmap2['2024-01-01 10:12:34.123456'] from ${table} """
|
||||
qt_ltz_ntz8 """ select cmap2['2024-01-03 10:12:34.123456'] from ${table} """
|
||||
qt_ltz_ntz9 """ select cmap3 from ${table} """
|
||||
qt_ltz_ntz10 """ select cmap4 from ${table} """
|
||||
qt_ltz_ntz11 """ select cmap4['1'] from ${table} """
|
||||
qt_ltz_ntz12 """ select cmap4['2'] from ${table} """
|
||||
qt_ltz_ntz13 """ select cmap4['3'] from ${table} """
|
||||
qt_ltz_ntz14 """ select cmap4['4'] from ${table} """
|
||||
qt_ltz_ntz15 """ select carray1 from ${table} """
|
||||
qt_ltz_ntz16 """ select carray1[2] from ${table} """
|
||||
qt_ltz_ntz17 """ select carray2 from ${table} """
|
||||
qt_ltz_ntz18 """ select carray2[2] from ${table} """
|
||||
qt_ltz_ntz19 """ select crow1 from ${table} """
|
||||
qt_ltz_ntz20 """ select crow2 from ${table} """
|
||||
qt_ltz_ntz21 """ select crow3 from ${table} """
|
||||
}
|
||||
|
||||
def test_ltz_ntz_simple = { table ->
|
||||
qt_ltz_ntz_simple2 """ select * from ${table} """
|
||||
qt_ltz_ntz_simple3 """ select cmap1 from ${table} """
|
||||
qt_ltz_ntz_simple4 """ select cmap1['2024-01-01 10:12:34.123456'] from ${table} """
|
||||
qt_ltz_ntz_simple5 """ select cmap1['2024-01-03 10:12:34.123456'] from ${table} """
|
||||
qt_ltz_ntz_simple6 """ select cmap2 from ${table} """
|
||||
qt_ltz_ntz_simple7 """ select cmap2['2024-01-01 10:12:34.123456'] from ${table} """
|
||||
qt_ltz_ntz_simple8 """ select cmap2['2024-01-03 10:12:34.123456'] from ${table} """
|
||||
qt_ltz_ntz_simple9 """ select carray1 from ${table} """
|
||||
qt_ltz_ntz_simple10 """ select carray1[2] from ${table} """
|
||||
qt_ltz_ntz_simple11 """ select carray2 from ${table} """
|
||||
qt_ltz_ntz_simple12 """ select carray2[2] from ${table} """
|
||||
qt_ltz_ntz_simple13 """ select crow from ${table} """
|
||||
qt_ltz_ntz_simple14 """ select STRUCT_ELEMENT(crow, 'crow1') from ${table} """
|
||||
qt_ltz_ntz_simple15 """ select STRUCT_ELEMENT(crow, 'crow2') from ${table} """
|
||||
}
|
||||
|
||||
def test_scale = {
|
||||
def ts_scale_orc = """select * from ts_scale_orc"""
|
||||
def ts_scale_parquet = """select * from ts_scale_parquet"""
|
||||
qt_c1 ts_scale_orc
|
||||
qt_c2 ts_scale_parquet
|
||||
|
||||
}
|
||||
|
||||
sql """set force_jni_scanner=true"""
|
||||
qt_c1 ts_orc
|
||||
qt_c2 ts_parquet
|
||||
test_scale()
|
||||
// test_ltz_ntz("test_timestamp_ntz_ltz_orc")
|
||||
// test_ltz_ntz("test_timestamp_ntz_ltz_parquet")
|
||||
test_ltz_ntz_simple("test_timestamp_ntz_ltz_simple_orc")
|
||||
// test_ltz_ntz_simple("test_timestamp_ntz_ltz_simple_parquet")
|
||||
|
||||
sql """set force_jni_scanner=false"""
|
||||
qt_c3 ts_orc
|
||||
qt_c4 ts_parquet
|
||||
|
||||
test_scale()
|
||||
// test_ltz_ntz("test_timestamp_ntz_ltz_orc")
|
||||
// test_ltz_ntz("test_timestamp_ntz_ltz_parquet")
|
||||
test_ltz_ntz_simple("test_timestamp_ntz_ltz_simple_orc")
|
||||
test_ltz_ntz_simple("test_timestamp_ntz_ltz_simple_parquet")
|
||||
} finally {
|
||||
sql """set force_jni_scanner=false"""
|
||||
}
|
||||
|
||||
// TODO:
|
||||
// 1. Fix: native read + parquet + timestamp(7/8/9) (ts7,ts8,ts9), it will be 8 hour more
|
||||
// 2. paimon bugs: native read + orc + timestamp_ltz.
|
||||
// In the Shanghai time zone, the read data will be 8 hours less,
|
||||
// because the data written by Flink to the orc file is UTC, but the time zone saved in the orc file is Shanghai.
|
||||
// Currently, Paimon will not fix this problem, but recommends using the parquet format.
|
||||
// 3. paimon bugs: jni read + parquet + row types + timestamp.
|
||||
// Data of the timestamp type should be converted to the timestamp type, but paimon converted it to the long type.
|
||||
// Will be fixed in paimon0.9
|
||||
|
||||
}
|
||||
|
||||
|
||||
@ -69,7 +129,22 @@ suite("paimon_timestamp_types", "p0,external,doris,external_docker,external_dock
|
||||
|
||||
SET 'table.local-time-zone' = 'Asia/Shanghai';
|
||||
|
||||
create table ts_orc (
|
||||
CREATE CATALOG paimon_minio WITH (
|
||||
'type' = 'paimon',
|
||||
'warehouse' = 's3://warehouse/wh',
|
||||
's3.endpoint' = 'http://172.21.0.101:19001',
|
||||
's3.access-key' = 'admin',
|
||||
's3.secret-key' = 'password',
|
||||
's3.path.style.access' = 'true'
|
||||
);
|
||||
|
||||
use catalog paimon_minio;
|
||||
|
||||
CREATE DATABASE IF NOT EXISTS flink_paimon;
|
||||
|
||||
use flink_paimon;
|
||||
|
||||
create table ts_scale_orc (
|
||||
id int,
|
||||
ts1 timestamp(1),
|
||||
ts2 timestamp(2),
|
||||
@ -91,7 +166,7 @@ ts18 timestamp_ltz(8),
|
||||
ts19 timestamp_ltz(9))
|
||||
WITH ('file.format' = 'orc','write-only'='true');
|
||||
|
||||
create table ts_parquet (
|
||||
create table ts_scale_parquet (
|
||||
id int,
|
||||
ts1 timestamp(1),
|
||||
ts2 timestamp(2),
|
||||
@ -113,7 +188,7 @@ ts18 timestamp_ltz(8),
|
||||
ts19 timestamp_ltz(9))
|
||||
WITH ('file.format' = 'parquet','write-only'='true');
|
||||
|
||||
insert into ts_orc values (
|
||||
insert into ts_scale_orc values (
|
||||
1,
|
||||
timestamp '2024-01-02 10:04:05.123456789',
|
||||
timestamp '2024-01-02 10:04:05.123456789',
|
||||
@ -134,7 +209,7 @@ insert into ts_orc values (
|
||||
timestamp '2024-01-02 10:04:05.123456789',
|
||||
timestamp '2024-01-02 10:04:05.123456789');
|
||||
|
||||
insert into ts_parquet values (
|
||||
insert into ts_scale_parquet values (
|
||||
1,
|
||||
timestamp '2024-01-02 10:04:05.123456789',
|
||||
timestamp '2024-01-02 10:04:05.123456789',
|
||||
@ -155,4 +230,119 @@ insert into ts_parquet values (
|
||||
timestamp '2024-01-02 10:04:05.123456789',
|
||||
timestamp '2024-01-02 10:04:05.123456789');
|
||||
|
||||
create table test_timestamp_ntz_ltz_orc (
|
||||
id int,
|
||||
cmap1 MAP<timestamp, timestamp>,
|
||||
cmap2 MAP<TIMESTAMP_LTZ, TIMESTAMP_LTZ>,
|
||||
cmap3 MAP<int, ROW<cmap_row1 timestamp>>,
|
||||
cmap4 MAP<int, ROW<cmap_rowid int, cmap_row1 timestamp, cmap_row2 TIMESTAMP_LTZ>>,
|
||||
carray1 ARRAY<timestamp>,
|
||||
carray2 ARRAY<TIMESTAMP_LTZ>,
|
||||
crow1 ROW<crow1 timestamp, crow2 TIMESTAMP_LTZ>,
|
||||
crow2 ROW<crow1 timestamp, crow2 TIMESTAMP_LTZ, crow_row ROW<crow_row1 timestamp, crow_row2 TIMESTAMP_LTZ>>,
|
||||
crow3 ROW<crow1 timestamp, crow2 TIMESTAMP_LTZ, crow_array1 ARRAY<timestamp>, crow_array2 ARRAY<TIMESTAMP_LTZ>>
|
||||
) with (
|
||||
'write-only' = 'true',
|
||||
'file.format' = 'orc'
|
||||
);
|
||||
|
||||
create table test_timestamp_ntz_ltz_parquet (
|
||||
id int,
|
||||
cmap1 MAP<timestamp, timestamp>,
|
||||
cmap2 MAP<TIMESTAMP_LTZ, TIMESTAMP_LTZ>,
|
||||
cmap3 MAP<int, ROW<cmap_row1 timestamp>>,
|
||||
cmap4 MAP<int, ROW<cmap_rowid int, cmap_row1 timestamp, cmap_row2 TIMESTAMP_LTZ>>,
|
||||
carray1 ARRAY<timestamp>,
|
||||
carray2 ARRAY<TIMESTAMP_LTZ>,
|
||||
crow1 ROW<crow1 timestamp, crow2 TIMESTAMP_LTZ>,
|
||||
crow2 ROW<crow1 timestamp, crow2 TIMESTAMP_LTZ, crow_row ROW<crow_row1 timestamp, crow_row2 TIMESTAMP_LTZ>>,
|
||||
crow3 ROW<crow1 timestamp, crow2 TIMESTAMP_LTZ, crow_array1 ARRAY<timestamp>, crow_array2 ARRAY<TIMESTAMP_LTZ>>
|
||||
) with (
|
||||
'write-only' = 'true',
|
||||
'file.format' = 'parquet'
|
||||
);
|
||||
|
||||
insert into test_timestamp_ntz_ltz_orc values (
|
||||
1,
|
||||
MAP[timestamp '2024-01-01 10:12:34.123456', timestamp '2024-01-02 10:12:34.123456', timestamp '2024-01-03 10:12:34.123456', timestamp '2024-01-04 10:12:34.123456'],
|
||||
MAP[timestamp '2024-01-01 10:12:34.123456', timestamp '2024-01-02 10:12:34.123456', timestamp '2024-01-03 10:12:34.123456', timestamp '2024-01-04 10:12:34.123456'],
|
||||
MAP[1, ROW(timestamp '2024-01-01 10:12:34.123456'),
|
||||
2, ROW(timestamp '2024-01-02 10:12:34.123456'),
|
||||
3, ROW(timestamp '2024-01-03 10:12:34.123456')
|
||||
],
|
||||
MAP[1, ROW(1, timestamp '2024-01-01 10:12:34.123456', timestamp '2024-01-02 10:12:34.123456'),
|
||||
2, ROW(2, timestamp '2024-01-03 10:12:34.123456', timestamp '2024-01-04 10:12:34.123456'),
|
||||
3, ROW(3, timestamp '2024-01-05 10:12:34.123456', timestamp '2024-01-06 10:12:34.123456'),
|
||||
4, ROW(4, timestamp '2024-01-07 10:12:34.123456', timestamp '2024-01-08 10:12:34.123456')
|
||||
],
|
||||
ARRAY[timestamp '2024-01-01 10:12:34.123456', timestamp '2024-01-02 10:12:34.123456', timestamp '2024-01-03 10:12:34.123456'],
|
||||
ARRAY[timestamp '2024-01-01 10:12:34.123456', timestamp '2024-01-02 10:12:34.123456', timestamp '2024-01-03 10:12:34.123456'],
|
||||
ROW(timestamp '2024-01-01 10:12:34.123456', timestamp '2024-01-02 10:12:34.123456'),
|
||||
ROW(timestamp '2024-01-01 10:12:34.123456', timestamp '2024-01-02 10:12:34.123456', ROW(timestamp '2024-01-03 10:12:34.123456', timestamp '2024-01-04 10:12:34.123456')),
|
||||
ROW(timestamp '2024-01-01 10:12:34.123456', timestamp '2024-01-02 10:12:34.123456', ARRAY[timestamp '2024-01-03 10:12:34.123456', timestamp '2024-01-04 10:12:34.123456', timestamp '2024-01-05 10:12:34.123456'], ARRAY[timestamp '2024-01-06 10:12:34.123456', timestamp '2024-01-07 10:12:34.123456', timestamp '2024-01-08 10:12:34.123456'])
|
||||
);
|
||||
|
||||
-- Currently paimon does not support nested parquet formats
|
||||
insert into test_timestamp_ntz_ltz_parquet values (
|
||||
1,
|
||||
MAP[timestamp '2024-01-01 10:12:34.123456', timestamp '2024-01-02 10:12:34.123456', timestamp '2024-01-03 10:12:34.123456', timestamp '2024-01-04 10:12:34.123456'],
|
||||
MAP[timestamp '2024-01-01 10:12:34.123456', timestamp '2024-01-02 10:12:34.123456', timestamp '2024-01-03 10:12:34.123456', timestamp '2024-01-04 10:12:34.123456'],
|
||||
MAP[1, ROW(timestamp '2024-01-01 10:12:34.123456'),
|
||||
2, ROW(timestamp '2024-01-02 10:12:34.123456'),
|
||||
3, ROW(timestamp '2024-01-03 10:12:34.123456')
|
||||
],
|
||||
MAP[1, ROW(1, timestamp '2024-01-01 10:12:34.123456', timestamp '2024-01-02 10:12:34.123456'),
|
||||
2, ROW(2, timestamp '2024-01-03 10:12:34.123456', timestamp '2024-01-04 10:12:34.123456'),
|
||||
3, ROW(3, timestamp '2024-01-05 10:12:34.123456', timestamp '2024-01-06 10:12:34.123456'),
|
||||
4, ROW(4, timestamp '2024-01-07 10:12:34.123456', timestamp '2024-01-08 10:12:34.123456')
|
||||
],
|
||||
ARRAY[timestamp '2024-01-01 10:12:34.123456', timestamp '2024-01-02 10:12:34.123456', timestamp '2024-01-03 10:12:34.123456'],
|
||||
ARRAY[timestamp '2024-01-01 10:12:34.123456', timestamp '2024-01-02 10:12:34.123456', timestamp '2024-01-03 10:12:34.123456'],
|
||||
ROW(timestamp '2024-01-01 10:12:34.123456', timestamp '2024-01-02 10:12:34.123456'),
|
||||
ROW(timestamp '2024-01-01 10:12:34.123456', timestamp '2024-01-02 10:12:34.123456', ROW(timestamp '2024-01-03 10:12:34.123456', timestamp '2024-01-04 10:12:34.123456')),
|
||||
ROW(timestamp '2024-01-01 10:12:34.123456', timestamp '2024-01-02 10:12:34.123456', ARRAY[timestamp '2024-01-03 10:12:34.123456', timestamp '2024-01-04 10:12:34.123456', timestamp '2024-01-05 10:12:34.123456'], ARRAY[timestamp '2024-01-06 10:12:34.123456', timestamp '2024-01-07 10:12:34.123456', timestamp '2024-01-08 10:12:34.123456'])
|
||||
);
|
||||
|
||||
create table test_timestamp_ntz_ltz_simple_orc (
|
||||
id int,
|
||||
cmap1 MAP<timestamp, timestamp>,
|
||||
cmap2 MAP<TIMESTAMP_LTZ, TIMESTAMP_LTZ>,
|
||||
carray1 ARRAY<timestamp>,
|
||||
carray2 ARRAY<TIMESTAMP_LTZ>,
|
||||
crow ROW<crow1 timestamp, crow2 TIMESTAMP_LTZ>
|
||||
) with (
|
||||
'write-only' = 'true',
|
||||
'file.format' = 'orc'
|
||||
);
|
||||
|
||||
create table test_timestamp_ntz_ltz_simple_parquet (
|
||||
id int,
|
||||
cmap1 MAP<timestamp, timestamp>,
|
||||
cmap2 MAP<TIMESTAMP_LTZ, TIMESTAMP_LTZ>,
|
||||
carray1 ARRAY<timestamp>,
|
||||
carray2 ARRAY<TIMESTAMP_LTZ>,
|
||||
crow ROW<crow1 timestamp, crow2 TIMESTAMP_LTZ>
|
||||
) with (
|
||||
'write-only' = 'true',
|
||||
'file.format' = 'parquet'
|
||||
);
|
||||
|
||||
insert into test_timestamp_ntz_ltz_simple_orc values (
|
||||
1,
|
||||
MAP[timestamp '2024-01-01 10:12:34.123456', timestamp '2024-01-02 10:12:34.123456', timestamp '2024-01-03 10:12:34.123456', timestamp '2024-01-04 10:12:34.123456'],
|
||||
MAP[timestamp '2024-01-01 10:12:34.123456', timestamp '2024-01-02 10:12:34.123456', timestamp '2024-01-03 10:12:34.123456', timestamp '2024-01-04 10:12:34.123456'],
|
||||
ARRAY[timestamp '2024-01-01 10:12:34.123456', timestamp '2024-01-02 10:12:34.123456', timestamp '2024-01-03 10:12:34.123456'],
|
||||
ARRAY[timestamp '2024-01-01 10:12:34.123456', timestamp '2024-01-02 10:12:34.123456', timestamp '2024-01-03 10:12:34.123456'],
|
||||
ROW(timestamp '2024-01-01 10:12:34.123456', timestamp '2024-01-02 10:12:34.123456')
|
||||
);
|
||||
|
||||
insert into test_timestamp_ntz_ltz_simple_parquet values (
|
||||
1,
|
||||
MAP[timestamp '2024-01-01 10:12:34.123456', timestamp '2024-01-02 10:12:34.123456', timestamp '2024-01-03 10:12:34.123456', timestamp '2024-01-04 10:12:34.123456'],
|
||||
MAP[timestamp '2024-01-01 10:12:34.123456', timestamp '2024-01-02 10:12:34.123456', timestamp '2024-01-03 10:12:34.123456', timestamp '2024-01-04 10:12:34.123456'],
|
||||
ARRAY[timestamp '2024-01-01 10:12:34.123456', timestamp '2024-01-02 10:12:34.123456', timestamp '2024-01-03 10:12:34.123456'],
|
||||
ARRAY[timestamp '2024-01-01 10:12:34.123456', timestamp '2024-01-02 10:12:34.123456', timestamp '2024-01-03 10:12:34.123456'],
|
||||
ROW(timestamp '2024-01-01 10:12:34.123456', timestamp '2024-01-02 10:12:34.123456')
|
||||
);
|
||||
|
||||
*/
|
||||
|
||||
Reference in New Issue
Block a user