From ce18f1148a5e06d56ca31809225fcaa1888bd04f Mon Sep 17 00:00:00 2001 From: zhangdong <493738387@qq.com> Date: Tue, 17 Oct 2023 22:07:13 +0800 Subject: [PATCH] [improvement](catalog)compatible with paimon 0.5 (#24985) compatible with paimon 0.5 add p0 for paimon,need set enablePaimonTest=true --- .../docker-compose/hive/scripts/README | 5 + .../hive/scripts/hive-metastore.sh | 17 ++ fe/be-java-extensions/paimon-scanner/pom.xml | 47 +--- .../apache/doris/paimon/PaimonJniScanner.java | 2 +- .../apache/doris/paimon/PaimonTypeUtils.java | 229 ++++++++++++++++++ fe/pom.xml | 4 +- regression-test/conf/regression-conf.groovy | 3 +- .../paimon/test_paimon_catalog.out | 68 ++++++ .../paimon/test_paimon_catalog.groovy | 61 +++++ 9 files changed, 394 insertions(+), 42 deletions(-) create mode 100644 fe/be-java-extensions/paimon-scanner/src/main/java/org/apache/doris/paimon/PaimonTypeUtils.java create mode 100644 regression-test/data/external_table_p0/paimon/test_paimon_catalog.out diff --git a/docker/thirdparties/docker-compose/hive/scripts/README b/docker/thirdparties/docker-compose/hive/scripts/README index a50efc78f0..4eaf389348 100644 --- a/docker/thirdparties/docker-compose/hive/scripts/README +++ b/docker/thirdparties/docker-compose/hive/scripts/README @@ -2,3 +2,8 @@ https://doris-build-hk-1308700295.cos.ap-hongkong.myqcloud.com/regression/load/tpch1_parquet/tpch1.db.tar.gz 2. Unzip and name it to "tpch1.db" + +3. Download paimon file from: + https://doris-build-hk-1308700295.cos.ap-hongkong.myqcloud.com/regression/paimon/paimon1.tar.gz + +4. Unzip and name it to "paimon1" diff --git a/docker/thirdparties/docker-compose/hive/scripts/hive-metastore.sh b/docker/thirdparties/docker-compose/hive/scripts/hive-metastore.sh index 3ac47e4c11..2d19c7aa1c 100755 --- a/docker/thirdparties/docker-compose/hive/scripts/hive-metastore.sh +++ b/docker/thirdparties/docker-compose/hive/scripts/hive-metastore.sh @@ -40,6 +40,23 @@ hadoop fs -mkdir -p /user/doris/ echo "hadoop fs -put /mnt/scripts/tpch1.db /user/doris/" hadoop fs -put /mnt/scripts/tpch1.db /user/doris/ + +# if you test in your local,better use # to annotation section about paimon +if [[ ! -d "/mnt/scripts/paimon1" ]]; then + echo "/mnt/scripts/paimon1 does not exist" + cd /mnt/scripts/ + curl -O https://doris-build-hk-1308700295.cos.ap-hongkong.myqcloud.com/regression/paimon/paimon1.tar.gz + tar -zxf paimon1.tar.gz + rm -rf paimon1.tar.gz + cd - +else + echo "/mnt/scripts/paimon1 exist, continue !" +fi + +## put paimon1 +echo "hadoop fs -put /mnt/scripts/paimon1 /user/doris/" +hadoop fs -put /mnt/scripts/paimon1 /user/doris/ + ## put other preinstalled data echo "hadoop fs -put /mnt/scripts/preinstalled_data /user/doris/" hadoop fs -put /mnt/scripts/preinstalled_data /user/doris/ diff --git a/fe/be-java-extensions/paimon-scanner/pom.xml b/fe/be-java-extensions/paimon-scanner/pom.xml index 76da4288d0..0b51369130 100644 --- a/fe/be-java-extensions/paimon-scanner/pom.xml +++ b/fe/be-java-extensions/paimon-scanner/pom.xml @@ -49,55 +49,26 @@ under the License. org.apache.paimon - paimon-bundle + paimon-core ${paimon.version} + org.apache.paimon - paimon-hive-connector-2.3 + paimon-common ${paimon.version} + org.apache.paimon - paimon-s3 + paimon-format ${paimon.version} + - org.apache.paimon - paimon-oss-impl - ${paimon.version} - - - org.apache.thrift - libthrift - 0.9.3 - - - com.facebook.presto.hive - hive-apache - ${presto.hive.version} - - - org.slf4j - slf4j-log4j12 - - - - - org.apache.hadoop - hadoop-client - - - org.apache.hadoop - hadoop-common - - - org.apache.hadoop - hadoop-hdfs - - - commons-io - commons-io + org.apache.doris + hive-catalog-shade + ${doris.hive.catalog.shade.version} diff --git a/fe/be-java-extensions/paimon-scanner/src/main/java/org/apache/doris/paimon/PaimonJniScanner.java b/fe/be-java-extensions/paimon-scanner/src/main/java/org/apache/doris/paimon/PaimonJniScanner.java index 4967562789..4e3cda8222 100644 --- a/fe/be-java-extensions/paimon-scanner/src/main/java/org/apache/doris/paimon/PaimonJniScanner.java +++ b/fe/be-java-extensions/paimon-scanner/src/main/java/org/apache/doris/paimon/PaimonJniScanner.java @@ -118,7 +118,7 @@ public class PaimonJniScanner extends JniScanner { fields[i], paimonAllFieldNames)); } DataType dataType = table.rowType().getTypeAt(index); - columnTypes[i] = ColumnType.parseType(fields[i], dataType.toString()); + columnTypes[i] = PaimonTypeUtils.fromPaimonType(fields[i], dataType); } super.types = columnTypes; } diff --git a/fe/be-java-extensions/paimon-scanner/src/main/java/org/apache/doris/paimon/PaimonTypeUtils.java b/fe/be-java-extensions/paimon-scanner/src/main/java/org/apache/doris/paimon/PaimonTypeUtils.java new file mode 100644 index 0000000000..b2a9450a6d --- /dev/null +++ b/fe/be-java-extensions/paimon-scanner/src/main/java/org/apache/doris/paimon/PaimonTypeUtils.java @@ -0,0 +1,229 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.paimon; + +import org.apache.doris.common.jni.vec.ColumnType; +import org.apache.doris.common.jni.vec.ColumnType.Type; + +import org.apache.paimon.types.ArrayType; +import org.apache.paimon.types.BigIntType; +import org.apache.paimon.types.BinaryType; +import org.apache.paimon.types.BooleanType; +import org.apache.paimon.types.CharType; +import org.apache.paimon.types.DataType; +import org.apache.paimon.types.DataTypeDefaultVisitor; +import org.apache.paimon.types.DateType; +import org.apache.paimon.types.DecimalType; +import org.apache.paimon.types.DoubleType; +import org.apache.paimon.types.FloatType; +import org.apache.paimon.types.IntType; +import org.apache.paimon.types.LocalZonedTimestampType; +import org.apache.paimon.types.MapType; +import org.apache.paimon.types.MultisetType; +import org.apache.paimon.types.RowType; +import org.apache.paimon.types.SmallIntType; +import org.apache.paimon.types.TimeType; +import org.apache.paimon.types.TimestampType; +import org.apache.paimon.types.TinyIntType; +import org.apache.paimon.types.VarBinaryType; +import org.apache.paimon.types.VarCharType; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Convert paimon type to doris type. + */ +public class PaimonTypeUtils { + private static final Logger LOG = LoggerFactory.getLogger(PaimonTypeUtils.class); + + private PaimonTypeUtils() { + } + + public static ColumnType fromPaimonType(String columnName, DataType type) { + PaimonColumnType paimonColumnType = type.accept(PaimonToDorisTypeVisitor.INSTANCE); + return new ColumnType(columnName, paimonColumnType.getType(), paimonColumnType.getLength(), + paimonColumnType.getPrecision(), + paimonColumnType.getScale()); + } + + private static class PaimonToDorisTypeVisitor extends DataTypeDefaultVisitor { + + private static final PaimonToDorisTypeVisitor INSTANCE = new PaimonToDorisTypeVisitor(); + + @Override + public PaimonColumnType visit(CharType charType) { + return new PaimonColumnType(Type.CHAR, charType.getLength()); + } + + @Override + public PaimonColumnType visit(VarCharType varCharType) { + return new PaimonColumnType(Type.VARCHAR, varCharType.getLength()); + } + + @Override + public PaimonColumnType visit(BooleanType booleanType) { + return new PaimonColumnType(Type.BOOLEAN); + } + + @Override + public PaimonColumnType visit(BinaryType binaryType) { + return new PaimonColumnType(Type.BINARY); + } + + @Override + public PaimonColumnType visit(VarBinaryType varBinaryType) { + return new PaimonColumnType(Type.BINARY); + } + + @Override + public PaimonColumnType visit(DecimalType decimalType) { + return new PaimonColumnType(Type.DECIMAL128, decimalType.getPrecision(), decimalType.getScale()); + } + + @Override + public PaimonColumnType visit(TinyIntType tinyIntType) { + return new PaimonColumnType(Type.TINYINT); + } + + @Override + public PaimonColumnType visit(SmallIntType smallIntType) { + return new PaimonColumnType(Type.SMALLINT); + } + + @Override + public PaimonColumnType visit(IntType intType) { + return new PaimonColumnType(Type.INT); + } + + @Override + public PaimonColumnType visit(BigIntType bigIntType) { + return new PaimonColumnType(Type.BIGINT); + } + + @Override + public PaimonColumnType visit(FloatType floatType) { + return new PaimonColumnType(Type.FLOAT); + } + + @Override + public PaimonColumnType visit(DoubleType doubleType) { + return new PaimonColumnType(Type.DOUBLE); + } + + @Override + public PaimonColumnType visit(DateType dateType) { + return new PaimonColumnType(Type.DATEV2); + } + + @Override + public PaimonColumnType visit(TimeType timeType) { + PaimonColumnType paimonColumnType = new PaimonColumnType(Type.DATETIMEV2); + paimonColumnType.setPrecision(timeType.getPrecision()); + return paimonColumnType; + } + + @Override + public PaimonColumnType visit(TimestampType timestampType) { + PaimonColumnType paimonColumnType = new PaimonColumnType(Type.DATETIMEV2); + paimonColumnType.setPrecision(timestampType.getPrecision()); + return paimonColumnType; + } + + @Override + public PaimonColumnType visit(LocalZonedTimestampType localZonedTimestampType) { + PaimonColumnType paimonColumnType = new PaimonColumnType(Type.DATETIMEV2); + paimonColumnType.setPrecision(localZonedTimestampType.getPrecision()); + return paimonColumnType; + } + + @Override + public PaimonColumnType visit(ArrayType arrayType) { + return this.defaultMethod(arrayType); + } + + @Override + public PaimonColumnType visit(MultisetType multisetType) { + return this.defaultMethod(multisetType); + } + + @Override + public PaimonColumnType visit(MapType mapType) { + return this.defaultMethod(mapType); + } + + @Override + public PaimonColumnType visit(RowType rowType) { + return this.defaultMethod(rowType); + } + + @Override + protected PaimonColumnType defaultMethod(DataType dataType) { + LOG.info("UNSUPPORTED type:" + dataType); + return new PaimonColumnType(Type.UNSUPPORTED); + } + } + + private static class PaimonColumnType { + private Type type; + // only used in char & varchar + private int length; + private int precision; + private int scale; + + public PaimonColumnType(Type type) { + this.type = type; + this.length = -1; + this.precision = -1; + this.scale = -1; + } + + public PaimonColumnType(Type type, int length) { + this.type = type; + this.length = length; + this.precision = -1; + this.scale = -1; + } + + public PaimonColumnType(Type type, int precision, int scale) { + this.type = type; + this.precision = precision; + this.scale = scale; + this.length = -1; + } + + public Type getType() { + return type; + } + + public int getLength() { + return length; + } + + public int getPrecision() { + return precision; + } + + public int getScale() { + return scale; + } + + public void setPrecision(int precision) { + this.precision = precision; + } + } +} diff --git a/fe/pom.xml b/fe/pom.xml index cfb63957bc..d47dfa681d 100644 --- a/fe/pom.xml +++ b/fe/pom.xml @@ -195,7 +195,7 @@ under the License. ${fe.dir}/../ 1.2-SNAPSHOT UTF-8 - 1.0.1 + 1.0.2 1.8 1.8 @@ -318,7 +318,7 @@ under the License. 2.3.2 - 0.4.0-incubating + 0.5.0-incubating 3.4.4 395 diff --git a/regression-test/conf/regression-conf.groovy b/regression-test/conf/regression-conf.groovy index e21e513718..81792a08a1 100644 --- a/regression-test/conf/regression-conf.groovy +++ b/regression-test/conf/regression-conf.groovy @@ -113,9 +113,10 @@ clickhouse_22_port=8123 doris_port=9030 // hive catalog test config -// To enable hive test, you need first start hive container. +// To enable hive/paimon test, you need first start hive container. // See `docker/thirdparties/start-thirdparties-docker.sh` enableHiveTest=false +enablePaimonTest=false hms_port=9183 hdfs_port=8120 hiveServerPort=10000 diff --git a/regression-test/data/external_table_p0/paimon/test_paimon_catalog.out b/regression-test/data/external_table_p0/paimon/test_paimon_catalog.out new file mode 100644 index 0000000000..4918db4555 --- /dev/null +++ b/regression-test/data/external_table_p0/paimon/test_paimon_catalog.out @@ -0,0 +1,68 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !all -- +1 2 3 4 5 6 7 8 9.1 10.1 11.10 2020-02-02 13str 14varchar a true aaaa 2023-08-13T09:32:38.530 +10 20 30 40 50 60 70 80 90.1 100.1 0.00 2020-03-02 130str 140varchar b false bbbb 2023-08-14T08:32:52.821 + +-- !c1 -- +1 2 3 4 5 6 7 8 9.1 10.1 11.10 2020-02-02 13str 14varchar a true aaaa 2023-08-13T09:32:38.530 + +-- !c2 -- +1 2 3 4 5 6 7 8 9.1 10.1 11.10 2020-02-02 13str 14varchar a true aaaa 2023-08-13T09:32:38.530 + +-- !c3 -- +1 2 3 4 5 6 7 8 9.1 10.1 11.10 2020-02-02 13str 14varchar a true aaaa 2023-08-13T09:32:38.530 + +-- !c4 -- +1 2 3 4 5 6 7 8 9.1 10.1 11.10 2020-02-02 13str 14varchar a true aaaa 2023-08-13T09:32:38.530 + +-- !c5 -- +1 2 3 4 5 6 7 8 9.1 10.1 11.10 2020-02-02 13str 14varchar a true aaaa 2023-08-13T09:32:38.530 + +-- !c6 -- +1 2 3 4 5 6 7 8 9.1 10.1 11.10 2020-02-02 13str 14varchar a true aaaa 2023-08-13T09:32:38.530 + +-- !c7 -- +1 2 3 4 5 6 7 8 9.1 10.1 11.10 2020-02-02 13str 14varchar a true aaaa 2023-08-13T09:32:38.530 + +-- !c8 -- +1 2 3 4 5 6 7 8 9.1 10.1 11.10 2020-02-02 13str 14varchar a true aaaa 2023-08-13T09:32:38.530 + +-- !c9 -- +1 2 3 4 5 6 7 8 9.1 10.1 11.10 2020-02-02 13str 14varchar a true aaaa 2023-08-13T09:32:38.530 + +-- !c10 -- +1 2 3 4 5 6 7 8 9.1 10.1 11.10 2020-02-02 13str 14varchar a true aaaa 2023-08-13T09:32:38.530 + +-- !c11 -- +1 2 3 4 5 6 7 8 9.1 10.1 11.10 2020-02-02 13str 14varchar a true aaaa 2023-08-13T09:32:38.530 + +-- !c12 -- +1 2 3 4 5 6 7 8 9.1 10.1 11.10 2020-02-02 13str 14varchar a true aaaa 2023-08-13T09:32:38.530 + +-- !c13 -- +1 2 3 4 5 6 7 8 9.1 10.1 11.10 2020-02-02 13str 14varchar a true aaaa 2023-08-13T09:32:38.530 + +-- !c14 -- +1 2 3 4 5 6 7 8 9.1 10.1 11.10 2020-02-02 13str 14varchar a true aaaa 2023-08-13T09:32:38.530 + +-- !c15 -- +1 2 3 4 5 6 7 8 9.1 10.1 11.10 2020-02-02 13str 14varchar a true aaaa 2023-08-13T09:32:38.530 + +-- !c16 -- +1 2 3 4 5 6 7 8 9.1 10.1 11.10 2020-02-02 13str 14varchar a true aaaa 2023-08-13T09:32:38.530 + +-- !c18 -- +1 2 3 4 5 6 7 8 9.1 10.1 11.10 2020-02-02 13str 14varchar a true aaaa 2023-08-13T09:32:38.530 + +-- !c19 -- +11 22 aa bb cc +1 2 a b c + +-- !c20 -- +1 2 a b c + +-- !c21 -- +1 2 a b c + +-- !c22 -- + diff --git a/regression-test/suites/external_table_p0/paimon/test_paimon_catalog.groovy b/regression-test/suites/external_table_p0/paimon/test_paimon_catalog.groovy index a6e687b3e0..ad72a47e64 100644 --- a/regression-test/suites/external_table_p0/paimon/test_paimon_catalog.groovy +++ b/regression-test/suites/external_table_p0/paimon/test_paimon_catalog.groovy @@ -51,4 +51,65 @@ suite("test_paimon_catalog", "p0,external,doris,external_docker,external_docker_ "hadoop.username"="hadoop" ); """ + + String enabled = context.config.otherConfigs.get("enablePaimonTest") + if (enabled != null && enabled.equalsIgnoreCase("true")) { + def all = """select * from all_table;""" + def c1 = """select * from all_table where c1=1;""" + def c2 = """select * from all_table where c2=2;""" + def c3 = """select * from all_table where c3=3;""" + def c4 = """select * from all_table where c4=4;""" + def c5 = """select * from all_table where c5=5;""" + def c6 = """select * from all_table where c6=6;""" + def c7 = """select * from all_table where c7=7;""" + def c8 = """select * from all_table where c8=8;""" + def c9 = """select * from all_table where c9<10;""" + def c10 = """select * from all_table where c10=10.1;""" + def c11 = """select * from all_table where c11=11.1;""" + def c12 = """select * from all_table where c12='2020-02-02';""" + def c13 = """select * from all_table where c13='13str';""" + def c14 = """select * from all_table where c14='14varchar';""" + def c15 = """select * from all_table where c15='a';""" + def c16 = """select * from all_table where c16=true;""" + def c18 = """select * from all_table where c18='2023-08-13 09:32:38.53';""" + def c19 = """select * from auto_bucket;""" + def c20 = """select * from auto_bucket where dt="b";""" + def c21 = """select * from auto_bucket where dt="b" and hh="c";""" + def c22 = """select * from auto_bucket where dt="d";""" + + String hdfs_port = context.config.otherConfigs.get("hdfs_port") + String catalog_name = "paimon1" + String externalEnvIp = context.config.otherConfigs.get("externalEnvIp") + + sql """drop catalog if exists ${catalog_name}""" + sql """create catalog if not exists ${catalog_name} properties ( + "type" = "paimon", + "paimon.catalog.type"="filesystem", + "warehouse" = "hdfs://${externalEnvIp}:${hdfs_port}/user/doris/paimon1" + );""" + sql """use `${catalog_name}`.`db1`""" + + qt_all all + qt_c1 c1 + qt_c2 c2 + qt_c3 c3 + qt_c4 c4 + qt_c5 c5 + qt_c6 c6 + qt_c7 c7 + qt_c8 c8 + qt_c9 c9 + qt_c10 c10 + qt_c11 c11 + qt_c12 c12 + qt_c13 c13 + qt_c14 c14 + qt_c15 c15 + qt_c16 c16 + qt_c18 c18 + qt_c19 c19 + qt_c20 c20 + qt_c21 c21 + qt_c22 c22 + } }