From ce18f1148a5e06d56ca31809225fcaa1888bd04f Mon Sep 17 00:00:00 2001
From: zhangdong <493738387@qq.com>
Date: Tue, 17 Oct 2023 22:07:13 +0800
Subject: [PATCH] [improvement](catalog)compatible with paimon 0.5 (#24985)
compatible with paimon 0.5
add p0 for paimon,need set enablePaimonTest=true
---
.../docker-compose/hive/scripts/README | 5 +
.../hive/scripts/hive-metastore.sh | 17 ++
fe/be-java-extensions/paimon-scanner/pom.xml | 47 +---
.../apache/doris/paimon/PaimonJniScanner.java | 2 +-
.../apache/doris/paimon/PaimonTypeUtils.java | 229 ++++++++++++++++++
fe/pom.xml | 4 +-
regression-test/conf/regression-conf.groovy | 3 +-
.../paimon/test_paimon_catalog.out | 68 ++++++
.../paimon/test_paimon_catalog.groovy | 61 +++++
9 files changed, 394 insertions(+), 42 deletions(-)
create mode 100644 fe/be-java-extensions/paimon-scanner/src/main/java/org/apache/doris/paimon/PaimonTypeUtils.java
create mode 100644 regression-test/data/external_table_p0/paimon/test_paimon_catalog.out
diff --git a/docker/thirdparties/docker-compose/hive/scripts/README b/docker/thirdparties/docker-compose/hive/scripts/README
index a50efc78f0..4eaf389348 100644
--- a/docker/thirdparties/docker-compose/hive/scripts/README
+++ b/docker/thirdparties/docker-compose/hive/scripts/README
@@ -2,3 +2,8 @@
https://doris-build-hk-1308700295.cos.ap-hongkong.myqcloud.com/regression/load/tpch1_parquet/tpch1.db.tar.gz
2. Unzip and name it to "tpch1.db"
+
+3. Download paimon file from:
+ https://doris-build-hk-1308700295.cos.ap-hongkong.myqcloud.com/regression/paimon/paimon1.tar.gz
+
+4. Unzip and name it to "paimon1"
diff --git a/docker/thirdparties/docker-compose/hive/scripts/hive-metastore.sh b/docker/thirdparties/docker-compose/hive/scripts/hive-metastore.sh
index 3ac47e4c11..2d19c7aa1c 100755
--- a/docker/thirdparties/docker-compose/hive/scripts/hive-metastore.sh
+++ b/docker/thirdparties/docker-compose/hive/scripts/hive-metastore.sh
@@ -40,6 +40,23 @@ hadoop fs -mkdir -p /user/doris/
echo "hadoop fs -put /mnt/scripts/tpch1.db /user/doris/"
hadoop fs -put /mnt/scripts/tpch1.db /user/doris/
+
+# if you test in your local,better use # to annotation section about paimon
+if [[ ! -d "/mnt/scripts/paimon1" ]]; then
+ echo "/mnt/scripts/paimon1 does not exist"
+ cd /mnt/scripts/
+ curl -O https://doris-build-hk-1308700295.cos.ap-hongkong.myqcloud.com/regression/paimon/paimon1.tar.gz
+ tar -zxf paimon1.tar.gz
+ rm -rf paimon1.tar.gz
+ cd -
+else
+ echo "/mnt/scripts/paimon1 exist, continue !"
+fi
+
+## put paimon1
+echo "hadoop fs -put /mnt/scripts/paimon1 /user/doris/"
+hadoop fs -put /mnt/scripts/paimon1 /user/doris/
+
## put other preinstalled data
echo "hadoop fs -put /mnt/scripts/preinstalled_data /user/doris/"
hadoop fs -put /mnt/scripts/preinstalled_data /user/doris/
diff --git a/fe/be-java-extensions/paimon-scanner/pom.xml b/fe/be-java-extensions/paimon-scanner/pom.xml
index 76da4288d0..0b51369130 100644
--- a/fe/be-java-extensions/paimon-scanner/pom.xml
+++ b/fe/be-java-extensions/paimon-scanner/pom.xml
@@ -49,55 +49,26 @@ under the License.
org.apache.paimon
- paimon-bundle
+ paimon-core
${paimon.version}
+
org.apache.paimon
- paimon-hive-connector-2.3
+ paimon-common
${paimon.version}
+
org.apache.paimon
- paimon-s3
+ paimon-format
${paimon.version}
+
- org.apache.paimon
- paimon-oss-impl
- ${paimon.version}
-
-
- org.apache.thrift
- libthrift
- 0.9.3
-
-
- com.facebook.presto.hive
- hive-apache
- ${presto.hive.version}
-
-
- org.slf4j
- slf4j-log4j12
-
-
-
-
- org.apache.hadoop
- hadoop-client
-
-
- org.apache.hadoop
- hadoop-common
-
-
- org.apache.hadoop
- hadoop-hdfs
-
-
- commons-io
- commons-io
+ org.apache.doris
+ hive-catalog-shade
+ ${doris.hive.catalog.shade.version}
diff --git a/fe/be-java-extensions/paimon-scanner/src/main/java/org/apache/doris/paimon/PaimonJniScanner.java b/fe/be-java-extensions/paimon-scanner/src/main/java/org/apache/doris/paimon/PaimonJniScanner.java
index 4967562789..4e3cda8222 100644
--- a/fe/be-java-extensions/paimon-scanner/src/main/java/org/apache/doris/paimon/PaimonJniScanner.java
+++ b/fe/be-java-extensions/paimon-scanner/src/main/java/org/apache/doris/paimon/PaimonJniScanner.java
@@ -118,7 +118,7 @@ public class PaimonJniScanner extends JniScanner {
fields[i], paimonAllFieldNames));
}
DataType dataType = table.rowType().getTypeAt(index);
- columnTypes[i] = ColumnType.parseType(fields[i], dataType.toString());
+ columnTypes[i] = PaimonTypeUtils.fromPaimonType(fields[i], dataType);
}
super.types = columnTypes;
}
diff --git a/fe/be-java-extensions/paimon-scanner/src/main/java/org/apache/doris/paimon/PaimonTypeUtils.java b/fe/be-java-extensions/paimon-scanner/src/main/java/org/apache/doris/paimon/PaimonTypeUtils.java
new file mode 100644
index 0000000000..b2a9450a6d
--- /dev/null
+++ b/fe/be-java-extensions/paimon-scanner/src/main/java/org/apache/doris/paimon/PaimonTypeUtils.java
@@ -0,0 +1,229 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris.paimon;
+
+import org.apache.doris.common.jni.vec.ColumnType;
+import org.apache.doris.common.jni.vec.ColumnType.Type;
+
+import org.apache.paimon.types.ArrayType;
+import org.apache.paimon.types.BigIntType;
+import org.apache.paimon.types.BinaryType;
+import org.apache.paimon.types.BooleanType;
+import org.apache.paimon.types.CharType;
+import org.apache.paimon.types.DataType;
+import org.apache.paimon.types.DataTypeDefaultVisitor;
+import org.apache.paimon.types.DateType;
+import org.apache.paimon.types.DecimalType;
+import org.apache.paimon.types.DoubleType;
+import org.apache.paimon.types.FloatType;
+import org.apache.paimon.types.IntType;
+import org.apache.paimon.types.LocalZonedTimestampType;
+import org.apache.paimon.types.MapType;
+import org.apache.paimon.types.MultisetType;
+import org.apache.paimon.types.RowType;
+import org.apache.paimon.types.SmallIntType;
+import org.apache.paimon.types.TimeType;
+import org.apache.paimon.types.TimestampType;
+import org.apache.paimon.types.TinyIntType;
+import org.apache.paimon.types.VarBinaryType;
+import org.apache.paimon.types.VarCharType;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * Convert paimon type to doris type.
+ */
+public class PaimonTypeUtils {
+ private static final Logger LOG = LoggerFactory.getLogger(PaimonTypeUtils.class);
+
+ private PaimonTypeUtils() {
+ }
+
+ public static ColumnType fromPaimonType(String columnName, DataType type) {
+ PaimonColumnType paimonColumnType = type.accept(PaimonToDorisTypeVisitor.INSTANCE);
+ return new ColumnType(columnName, paimonColumnType.getType(), paimonColumnType.getLength(),
+ paimonColumnType.getPrecision(),
+ paimonColumnType.getScale());
+ }
+
+ private static class PaimonToDorisTypeVisitor extends DataTypeDefaultVisitor {
+
+ private static final PaimonToDorisTypeVisitor INSTANCE = new PaimonToDorisTypeVisitor();
+
+ @Override
+ public PaimonColumnType visit(CharType charType) {
+ return new PaimonColumnType(Type.CHAR, charType.getLength());
+ }
+
+ @Override
+ public PaimonColumnType visit(VarCharType varCharType) {
+ return new PaimonColumnType(Type.VARCHAR, varCharType.getLength());
+ }
+
+ @Override
+ public PaimonColumnType visit(BooleanType booleanType) {
+ return new PaimonColumnType(Type.BOOLEAN);
+ }
+
+ @Override
+ public PaimonColumnType visit(BinaryType binaryType) {
+ return new PaimonColumnType(Type.BINARY);
+ }
+
+ @Override
+ public PaimonColumnType visit(VarBinaryType varBinaryType) {
+ return new PaimonColumnType(Type.BINARY);
+ }
+
+ @Override
+ public PaimonColumnType visit(DecimalType decimalType) {
+ return new PaimonColumnType(Type.DECIMAL128, decimalType.getPrecision(), decimalType.getScale());
+ }
+
+ @Override
+ public PaimonColumnType visit(TinyIntType tinyIntType) {
+ return new PaimonColumnType(Type.TINYINT);
+ }
+
+ @Override
+ public PaimonColumnType visit(SmallIntType smallIntType) {
+ return new PaimonColumnType(Type.SMALLINT);
+ }
+
+ @Override
+ public PaimonColumnType visit(IntType intType) {
+ return new PaimonColumnType(Type.INT);
+ }
+
+ @Override
+ public PaimonColumnType visit(BigIntType bigIntType) {
+ return new PaimonColumnType(Type.BIGINT);
+ }
+
+ @Override
+ public PaimonColumnType visit(FloatType floatType) {
+ return new PaimonColumnType(Type.FLOAT);
+ }
+
+ @Override
+ public PaimonColumnType visit(DoubleType doubleType) {
+ return new PaimonColumnType(Type.DOUBLE);
+ }
+
+ @Override
+ public PaimonColumnType visit(DateType dateType) {
+ return new PaimonColumnType(Type.DATEV2);
+ }
+
+ @Override
+ public PaimonColumnType visit(TimeType timeType) {
+ PaimonColumnType paimonColumnType = new PaimonColumnType(Type.DATETIMEV2);
+ paimonColumnType.setPrecision(timeType.getPrecision());
+ return paimonColumnType;
+ }
+
+ @Override
+ public PaimonColumnType visit(TimestampType timestampType) {
+ PaimonColumnType paimonColumnType = new PaimonColumnType(Type.DATETIMEV2);
+ paimonColumnType.setPrecision(timestampType.getPrecision());
+ return paimonColumnType;
+ }
+
+ @Override
+ public PaimonColumnType visit(LocalZonedTimestampType localZonedTimestampType) {
+ PaimonColumnType paimonColumnType = new PaimonColumnType(Type.DATETIMEV2);
+ paimonColumnType.setPrecision(localZonedTimestampType.getPrecision());
+ return paimonColumnType;
+ }
+
+ @Override
+ public PaimonColumnType visit(ArrayType arrayType) {
+ return this.defaultMethod(arrayType);
+ }
+
+ @Override
+ public PaimonColumnType visit(MultisetType multisetType) {
+ return this.defaultMethod(multisetType);
+ }
+
+ @Override
+ public PaimonColumnType visit(MapType mapType) {
+ return this.defaultMethod(mapType);
+ }
+
+ @Override
+ public PaimonColumnType visit(RowType rowType) {
+ return this.defaultMethod(rowType);
+ }
+
+ @Override
+ protected PaimonColumnType defaultMethod(DataType dataType) {
+ LOG.info("UNSUPPORTED type:" + dataType);
+ return new PaimonColumnType(Type.UNSUPPORTED);
+ }
+ }
+
+ private static class PaimonColumnType {
+ private Type type;
+ // only used in char & varchar
+ private int length;
+ private int precision;
+ private int scale;
+
+ public PaimonColumnType(Type type) {
+ this.type = type;
+ this.length = -1;
+ this.precision = -1;
+ this.scale = -1;
+ }
+
+ public PaimonColumnType(Type type, int length) {
+ this.type = type;
+ this.length = length;
+ this.precision = -1;
+ this.scale = -1;
+ }
+
+ public PaimonColumnType(Type type, int precision, int scale) {
+ this.type = type;
+ this.precision = precision;
+ this.scale = scale;
+ this.length = -1;
+ }
+
+ public Type getType() {
+ return type;
+ }
+
+ public int getLength() {
+ return length;
+ }
+
+ public int getPrecision() {
+ return precision;
+ }
+
+ public int getScale() {
+ return scale;
+ }
+
+ public void setPrecision(int precision) {
+ this.precision = precision;
+ }
+ }
+}
diff --git a/fe/pom.xml b/fe/pom.xml
index cfb63957bc..d47dfa681d 100644
--- a/fe/pom.xml
+++ b/fe/pom.xml
@@ -195,7 +195,7 @@ under the License.
${fe.dir}/../
1.2-SNAPSHOT
UTF-8
- 1.0.1
+ 1.0.2
1.8
1.8
@@ -318,7 +318,7 @@ under the License.
2.3.2
- 0.4.0-incubating
+ 0.5.0-incubating
3.4.4
395
diff --git a/regression-test/conf/regression-conf.groovy b/regression-test/conf/regression-conf.groovy
index e21e513718..81792a08a1 100644
--- a/regression-test/conf/regression-conf.groovy
+++ b/regression-test/conf/regression-conf.groovy
@@ -113,9 +113,10 @@ clickhouse_22_port=8123
doris_port=9030
// hive catalog test config
-// To enable hive test, you need first start hive container.
+// To enable hive/paimon test, you need first start hive container.
// See `docker/thirdparties/start-thirdparties-docker.sh`
enableHiveTest=false
+enablePaimonTest=false
hms_port=9183
hdfs_port=8120
hiveServerPort=10000
diff --git a/regression-test/data/external_table_p0/paimon/test_paimon_catalog.out b/regression-test/data/external_table_p0/paimon/test_paimon_catalog.out
new file mode 100644
index 0000000000..4918db4555
--- /dev/null
+++ b/regression-test/data/external_table_p0/paimon/test_paimon_catalog.out
@@ -0,0 +1,68 @@
+-- This file is automatically generated. You should know what you did if you want to edit this
+-- !all --
+1 2 3 4 5 6 7 8 9.1 10.1 11.10 2020-02-02 13str 14varchar a true aaaa 2023-08-13T09:32:38.530
+10 20 30 40 50 60 70 80 90.1 100.1 0.00 2020-03-02 130str 140varchar b false bbbb 2023-08-14T08:32:52.821
+
+-- !c1 --
+1 2 3 4 5 6 7 8 9.1 10.1 11.10 2020-02-02 13str 14varchar a true aaaa 2023-08-13T09:32:38.530
+
+-- !c2 --
+1 2 3 4 5 6 7 8 9.1 10.1 11.10 2020-02-02 13str 14varchar a true aaaa 2023-08-13T09:32:38.530
+
+-- !c3 --
+1 2 3 4 5 6 7 8 9.1 10.1 11.10 2020-02-02 13str 14varchar a true aaaa 2023-08-13T09:32:38.530
+
+-- !c4 --
+1 2 3 4 5 6 7 8 9.1 10.1 11.10 2020-02-02 13str 14varchar a true aaaa 2023-08-13T09:32:38.530
+
+-- !c5 --
+1 2 3 4 5 6 7 8 9.1 10.1 11.10 2020-02-02 13str 14varchar a true aaaa 2023-08-13T09:32:38.530
+
+-- !c6 --
+1 2 3 4 5 6 7 8 9.1 10.1 11.10 2020-02-02 13str 14varchar a true aaaa 2023-08-13T09:32:38.530
+
+-- !c7 --
+1 2 3 4 5 6 7 8 9.1 10.1 11.10 2020-02-02 13str 14varchar a true aaaa 2023-08-13T09:32:38.530
+
+-- !c8 --
+1 2 3 4 5 6 7 8 9.1 10.1 11.10 2020-02-02 13str 14varchar a true aaaa 2023-08-13T09:32:38.530
+
+-- !c9 --
+1 2 3 4 5 6 7 8 9.1 10.1 11.10 2020-02-02 13str 14varchar a true aaaa 2023-08-13T09:32:38.530
+
+-- !c10 --
+1 2 3 4 5 6 7 8 9.1 10.1 11.10 2020-02-02 13str 14varchar a true aaaa 2023-08-13T09:32:38.530
+
+-- !c11 --
+1 2 3 4 5 6 7 8 9.1 10.1 11.10 2020-02-02 13str 14varchar a true aaaa 2023-08-13T09:32:38.530
+
+-- !c12 --
+1 2 3 4 5 6 7 8 9.1 10.1 11.10 2020-02-02 13str 14varchar a true aaaa 2023-08-13T09:32:38.530
+
+-- !c13 --
+1 2 3 4 5 6 7 8 9.1 10.1 11.10 2020-02-02 13str 14varchar a true aaaa 2023-08-13T09:32:38.530
+
+-- !c14 --
+1 2 3 4 5 6 7 8 9.1 10.1 11.10 2020-02-02 13str 14varchar a true aaaa 2023-08-13T09:32:38.530
+
+-- !c15 --
+1 2 3 4 5 6 7 8 9.1 10.1 11.10 2020-02-02 13str 14varchar a true aaaa 2023-08-13T09:32:38.530
+
+-- !c16 --
+1 2 3 4 5 6 7 8 9.1 10.1 11.10 2020-02-02 13str 14varchar a true aaaa 2023-08-13T09:32:38.530
+
+-- !c18 --
+1 2 3 4 5 6 7 8 9.1 10.1 11.10 2020-02-02 13str 14varchar a true aaaa 2023-08-13T09:32:38.530
+
+-- !c19 --
+11 22 aa bb cc
+1 2 a b c
+
+-- !c20 --
+1 2 a b c
+
+-- !c21 --
+1 2 a b c
+
+-- !c22 --
+
diff --git a/regression-test/suites/external_table_p0/paimon/test_paimon_catalog.groovy b/regression-test/suites/external_table_p0/paimon/test_paimon_catalog.groovy
index a6e687b3e0..ad72a47e64 100644
--- a/regression-test/suites/external_table_p0/paimon/test_paimon_catalog.groovy
+++ b/regression-test/suites/external_table_p0/paimon/test_paimon_catalog.groovy
@@ -51,4 +51,65 @@ suite("test_paimon_catalog", "p0,external,doris,external_docker,external_docker_
"hadoop.username"="hadoop"
);
"""
+
+ String enabled = context.config.otherConfigs.get("enablePaimonTest")
+ if (enabled != null && enabled.equalsIgnoreCase("true")) {
+ def all = """select * from all_table;"""
+ def c1 = """select * from all_table where c1=1;"""
+ def c2 = """select * from all_table where c2=2;"""
+ def c3 = """select * from all_table where c3=3;"""
+ def c4 = """select * from all_table where c4=4;"""
+ def c5 = """select * from all_table where c5=5;"""
+ def c6 = """select * from all_table where c6=6;"""
+ def c7 = """select * from all_table where c7=7;"""
+ def c8 = """select * from all_table where c8=8;"""
+ def c9 = """select * from all_table where c9<10;"""
+ def c10 = """select * from all_table where c10=10.1;"""
+ def c11 = """select * from all_table where c11=11.1;"""
+ def c12 = """select * from all_table where c12='2020-02-02';"""
+ def c13 = """select * from all_table where c13='13str';"""
+ def c14 = """select * from all_table where c14='14varchar';"""
+ def c15 = """select * from all_table where c15='a';"""
+ def c16 = """select * from all_table where c16=true;"""
+ def c18 = """select * from all_table where c18='2023-08-13 09:32:38.53';"""
+ def c19 = """select * from auto_bucket;"""
+ def c20 = """select * from auto_bucket where dt="b";"""
+ def c21 = """select * from auto_bucket where dt="b" and hh="c";"""
+ def c22 = """select * from auto_bucket where dt="d";"""
+
+ String hdfs_port = context.config.otherConfigs.get("hdfs_port")
+ String catalog_name = "paimon1"
+ String externalEnvIp = context.config.otherConfigs.get("externalEnvIp")
+
+ sql """drop catalog if exists ${catalog_name}"""
+ sql """create catalog if not exists ${catalog_name} properties (
+ "type" = "paimon",
+ "paimon.catalog.type"="filesystem",
+ "warehouse" = "hdfs://${externalEnvIp}:${hdfs_port}/user/doris/paimon1"
+ );"""
+ sql """use `${catalog_name}`.`db1`"""
+
+ qt_all all
+ qt_c1 c1
+ qt_c2 c2
+ qt_c3 c3
+ qt_c4 c4
+ qt_c5 c5
+ qt_c6 c6
+ qt_c7 c7
+ qt_c8 c8
+ qt_c9 c9
+ qt_c10 c10
+ qt_c11 c11
+ qt_c12 c12
+ qt_c13 c13
+ qt_c14 c14
+ qt_c15 c15
+ qt_c16 c16
+ qt_c18 c18
+ qt_c19 c19
+ qt_c20 c20
+ qt_c21 c21
+ qt_c22 c22
+ }
}