[fix](multi-catalog)fix avro and jdbc scanner dependency (#23015)

add preload-extensions module, put all conflict dependencies to pom.xml in preload-extensions
This commit is contained in:
slothever
2023-08-20 19:28:17 +08:00
committed by GitHub
parent 58b9bce954
commit 5ba505ebf4
9 changed files with 571 additions and 36 deletions

View File

@ -78,7 +78,11 @@ if [[ "${MAX_FILE_COUNT}" -lt 65536 ]]; then
fi
# add java libs
for f in "${DORIS_HOME}/lib/java_extensions/java-udf"/*.jar; do
preload_jars=("preload-extensions")
preload_jars+=("java-udf")
for preload_jar in "${preload_jars[@]}"; do
f="${DORIS_HOME}/lib/java_extensions/${preload_jar}/${preload_jar}-jar-with-dependencies.jar"
if [[ -z "${DORIS_CLASSPATH}" ]]; then
export DORIS_CLASSPATH="${f}"
else

View File

@ -457,6 +457,7 @@ if [[ "${BUILD_BE_JAVA_EXTENSIONS}" -eq 1 ]]; then
modules+=("be-java-extensions/paimon-scanner")
modules+=("be-java-extensions/max-compute-scanner")
modules+=("be-java-extensions/avro-scanner")
modules+=("be-java-extensions/preload-extensions")
fi
FE_MODULES="$(
IFS=','
@ -673,6 +674,7 @@ EOF
extensions_modules+=("paimon-scanner")
extensions_modules+=("max-compute-scanner")
extensions_modules+=("avro-scanner")
extensions_modules+=("preload-extensions")
BE_JAVA_EXTENSIONS_DIR="${DORIS_OUTPUT}/be/lib/java_extensions/"
rm -rf "${BE_JAVA_EXTENSIONS_DIR}"

View File

@ -41,16 +41,6 @@ under the License.
<artifactId>java-common</artifactId>
<version>${project.version}</version>
</dependency>
<dependency>
<groupId>org.apache.avro</groupId>
<artifactId>avro</artifactId>
<exclusions>
<exclusion>
<groupId>org.apache.avro</groupId>
<artifactId>avro-tools</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-client</artifactId>
@ -73,6 +63,7 @@ under the License.
<dependency>
<groupId>org.apache.doris</groupId>
<artifactId>hive-catalog-shade</artifactId>
<scope>provided</scope>
</dependency>
</dependencies>

View File

@ -42,27 +42,47 @@ under the License.
<groupId>org.scala-lang</groupId>
<artifactId>scala-library</artifactId>
<version>${scala.version}</version>
<scope>compile</scope>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.apache.hudi</groupId>
<artifactId>hudi-spark-common_${scala.binary.version}</artifactId>
<version>${hudi.version}</version>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-common</artifactId>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.apache.hudi</groupId>
<artifactId>hudi-spark-client</artifactId>
<version>${hudi.version}</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.apache.hudi</groupId>
<artifactId>hudi-spark-common_${scala.binary.version}</artifactId>
<version>${hudi.version}</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.apache.avro</groupId>
<artifactId>avro</artifactId>
<scope>provided</scope>
<exclusions>
<exclusion>
<groupId>org.apache.avro</groupId>
<artifactId>avro-tools</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.apache.hudi</groupId>
<artifactId>hudi-spark3-common</artifactId>
<version>${hudi.version}</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.apache.hudi</groupId>
<artifactId>hudi-spark3.2.x_${scala.binary.version}</artifactId>
<version>${hudi.version}</version>
<scope>provided</scope>
<exclusions>
<exclusion>
<artifactId>json4s-ast_2.11</artifactId>
@ -86,7 +106,7 @@ under the License.
<groupId>org.apache.parquet</groupId>
<artifactId>parquet-avro</artifactId>
<version>1.10.1</version>
<scope>compile</scope>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
@ -110,13 +130,25 @@ under the License.
</exclusion>
</exclusions>
<version>${spark.version}</version>
<scope>compile</scope>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-sql_${scala.binary.version}</artifactId>
<version>${spark.version}</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-launcher_${scala.binary.version}</artifactId>
<version>${spark.version}</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-catalyst_${scala.binary.version}</artifactId>
<version>${spark.version}</version>
<scope>compile</scope>
<scope>provided</scope>
<exclusions>
<exclusion>
<groupId>org.codehaus.janino</groupId>
@ -128,23 +160,12 @@ under the License.
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-sql_${scala.binary.version}</artifactId>
<version>${spark.version}</version>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-launcher_${scala.binary.version}</artifactId>
<version>${spark.version}</version>
<scope>compile</scope>
</dependency>
<dependency>
<!-- version of spark's janino is error -->
<groupId>org.codehaus.janino</groupId>
<artifactId>janino</artifactId>
<version>${janino.version}</version>
<scope>provided</scope>
<exclusions>
<exclusion>
<groupId>org.codehaus.janino</groupId>
@ -156,12 +177,14 @@ under the License.
<groupId>org.codehaus.janino</groupId>
<artifactId>commons-compiler</artifactId>
<version>${janino.version}</version>
<scope>provided</scope>
</dependency>
<dependency>
<!-- version of spark's jackson module is error -->
<groupId>com.fasterxml.jackson.module</groupId>
<artifactId>jackson-module-scala_${scala.binary.version}</artifactId>
<version>${jackson.version}</version>
<scope>provided</scope>
<exclusions>
<exclusion>
<groupId>com.google.guava</groupId>
@ -169,15 +192,16 @@ under the License.
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.apache.doris</groupId>
<artifactId>java-common</artifactId>
<version>${project.version}</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-common</artifactId>
<exclusions>
<exclusion>
<groupId>org.apache.thrift</groupId>
<artifactId>libthrift</artifactId>
</exclusion>
</exclusions>
</dependency>
</dependencies>
<build>

View File

@ -43,16 +43,24 @@ under the License.
<dependency>
<groupId>com.oracle.database.jdbc</groupId>
<artifactId>ojdbc8</artifactId>
<scope>provided</scope>
</dependency>
<!-- https://mvnrepository.com/artifact/com.alibaba/druid -->
<dependency>
<groupId>com.alibaba</groupId>
<artifactId>druid</artifactId>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>com.clickhouse</groupId>
<artifactId>clickhouse-jdbc</artifactId>
<classifier>all</classifier>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>com.oracle.ojdbc</groupId>
<artifactId>orai18n</artifactId>
<version>19.3.0.0</version>
<scope>provided</scope>
</dependency>
</dependencies>

View File

@ -28,6 +28,7 @@ under the License.
<module>paimon-scanner</module>
<module>max-compute-scanner</module>
<module>avro-scanner</module>
<module>preload-extensions</module>
</modules>
<parent>

View File

@ -0,0 +1,247 @@
<?xml version="1.0" encoding="UTF-8"?>
<!--
Licensed to the Apache Software Foundation (ASF) under one
or more contributor license agreements. See the NOTICE file
distributed with this work for additional information
regarding copyright ownership. The ASF licenses this file
to you under the Apache License, Version 2.0 (the
"License"); you may not use this file except in compliance
with the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing,
software distributed under the License is distributed on an
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
KIND, either express or implied. See the License for the
specific language governing permissions and limitations
under the License.
-->
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<parent>
<artifactId>be-java-extensions</artifactId>
<groupId>org.apache.doris</groupId>
<version>${revision}</version>
</parent>
<modelVersion>4.0.0</modelVersion>
<artifactId>preload-extensions</artifactId>
<properties>
<maven.compiler.source>8</maven.compiler.source>
<maven.compiler.target>8</maven.compiler.target>
<scala.binary.version>2.12</scala.binary.version>
<spark.version>3.2.0</spark.version>
<janino.version>3.0.16</janino.version>
</properties>
<dependencies>
<dependency>
<groupId>org.apache.arrow</groupId>
<artifactId>arrow-memory-unsafe</artifactId>
<version>${arrow.version}</version>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>org.apache.parquet</groupId>
<artifactId>parquet-avro</artifactId>
<version>1.10.1</version>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>org.scala-lang</groupId>
<artifactId>scala-library</artifactId>
<version>${scala.version}</version>
<scope>compile</scope>
</dependency>
<!-- For Avro and Hudi Scanner PreLoad -->
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-common</artifactId>
</dependency>
<dependency>
<groupId>org.apache.hudi</groupId>
<artifactId>hudi-spark-client</artifactId>
<version>${hudi.version}</version>
</dependency>
<dependency>
<groupId>org.apache.hudi</groupId>
<artifactId>hudi-spark-common_${scala.binary.version}</artifactId>
<version>${hudi.version}</version>
</dependency>
<dependency>
<groupId>org.apache.avro</groupId>
<artifactId>avro</artifactId>
<exclusions>
<exclusion>
<groupId>org.apache.avro</groupId>
<artifactId>avro-tools</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.apache.hudi</groupId>
<artifactId>hudi-spark3-common</artifactId>
<version>${hudi.version}</version>
</dependency>
<dependency>
<groupId>org.apache.hudi</groupId>
<artifactId>hudi-spark3.2.x_${scala.binary.version}</artifactId>
<version>${hudi.version}</version>
<exclusions>
<exclusion>
<artifactId>json4s-ast_2.11</artifactId>
<groupId>org.json4s</groupId>
</exclusion>
<exclusion>
<artifactId>json4s-core_2.11</artifactId>
<groupId>org.json4s</groupId>
</exclusion>
<exclusion>
<artifactId>json4s-jackson_2.11</artifactId>
<groupId>org.json4s</groupId>
</exclusion>
<exclusion>
<artifactId>json4s-scalap_2.11</artifactId>
<groupId>org.json4s</groupId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-core_${scala.binary.version}</artifactId>
<exclusions>
<exclusion>
<groupId>javax.servlet</groupId>
<artifactId>*</artifactId>
</exclusion>
<exclusion>
<artifactId>jackson-module-scala_2.12</artifactId>
<groupId>com.fasterxml.jackson.module</groupId>
</exclusion>
<exclusion>
<artifactId>hadoop-client-api</artifactId>
<groupId>org.apache.hadoop</groupId>
</exclusion>
<exclusion>
<artifactId>hadoop-client-runtime</artifactId>
<groupId>org.apache.hadoop</groupId>
</exclusion>
</exclusions>
<version>${spark.version}</version>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-sql_${scala.binary.version}</artifactId>
<version>${spark.version}</version>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-launcher_${scala.binary.version}</artifactId>
<version>${spark.version}</version>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-catalyst_${scala.binary.version}</artifactId>
<version>${spark.version}</version>
<scope>compile</scope>
<exclusions>
<exclusion>
<groupId>org.codehaus.janino</groupId>
<artifactId>janino</artifactId>
</exclusion>
<exclusion>
<groupId>org.codehaus.janino</groupId>
<artifactId>commons-compiler</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<!-- version of spark's janino is error -->
<groupId>org.codehaus.janino</groupId>
<artifactId>janino</artifactId>
<version>${janino.version}</version>
<exclusions>
<exclusion>
<groupId>org.codehaus.janino</groupId>
<artifactId>commons-compiler</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.codehaus.janino</groupId>
<artifactId>commons-compiler</artifactId>
<version>${janino.version}</version>
</dependency>
<dependency>
<!-- version of spark's jackson module is error -->
<groupId>com.fasterxml.jackson.module</groupId>
<artifactId>jackson-module-scala_${scala.binary.version}</artifactId>
<version>${jackson.version}</version>
<exclusions>
<exclusion>
<groupId>com.google.guava</groupId>
<artifactId>guava</artifactId>
</exclusion>
</exclusions>
</dependency>
<!-- For JDBC Scanner PreLoad-->
<dependency>
<groupId>com.oracle.database.jdbc</groupId>
<artifactId>ojdbc8</artifactId>
</dependency>
<dependency>
<groupId>com.alibaba</groupId>
<artifactId>druid</artifactId>
</dependency>
<dependency>
<groupId>com.clickhouse</groupId>
<artifactId>clickhouse-jdbc</artifactId>
<classifier>all</classifier>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>com.oracle.ojdbc</groupId>
<artifactId>orai18n</artifactId>
<version>19.3.0.0</version>
</dependency>
<dependency>
<groupId>org.apache.doris</groupId>
<artifactId>hive-catalog-shade</artifactId>
</dependency>
</dependencies>
<build>
<finalName>preload-extensions</finalName>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-assembly-plugin</artifactId>
<configuration>
<descriptors>
<descriptor>src/main/resources/package.xml</descriptor>
</descriptors>
<archive>
<manifest>
<mainClass></mainClass>
</manifest>
</archive>
</configuration>
<executions>
<execution>
<id>make-assembly</id>
<phase>package</phase>
<goals>
<goal>single</goal>
</goals>
</execution>
</executions>
</plugin>
</plugins>
</build>
</project>

View File

@ -0,0 +1,217 @@
- [Preload Dependencies For BE Extensions](#Preload-Dependencies-For-BE-Extensions)
- [Avro Scanner](#Avro-Scanner)
- [Hudi Scanner](#Hudi-Scanner)
- [MaxCompute Scanner](#MaxCompute-Scanner)
- [Paimon Scanner](#Paimon-Scanner)
- [JDBC Scanner](#JDBC-Scanner)
# Preload Dependencies For BE Extensions
## Avro Scanner
Avro Scanner Compile Dependencies:
```
<dependency>
<groupId>org.apache.avro</groupId>
<artifactId>avro</artifactId>
<exclusions>
<exclusion>
<groupId>org.apache.avro</groupId>
<artifactId>avro-tools</artifactId>
</exclusion>
</exclusions>
</dependency>
```
## Hudi Scanner
Hudi Scanner Compile Dependencies:
```
<dependency>
<groupId>org.apache.parquet</groupId>
<artifactId>parquet-avro</artifactId>
<version>1.10.1</version>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>org.scala-lang</groupId>
<artifactId>scala-library</artifactId>
<version>${scala.version}</version>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-common</artifactId>
</dependency>
<dependency>
<groupId>org.apache.hudi</groupId>
<artifactId>hudi-spark-client</artifactId>
<version>${hudi.version}</version>
</dependency>
<dependency>
<groupId>org.apache.hudi</groupId>
<artifactId>hudi-spark-common_${scala.binary.version}</artifactId>
<version>${hudi.version}</version>
</dependency>
<dependency>
<groupId>org.apache.hudi</groupId>
<artifactId>hudi-spark3-common</artifactId>
<version>${hudi.version}</version>
</dependency>
<dependency>
<groupId>org.apache.hudi</groupId>
<artifactId>hudi-spark3.2.x_${scala.binary.version}</artifactId>
<version>${hudi.version}</version>
<exclusions>
<exclusion>
<artifactId>json4s-ast_2.11</artifactId>
<groupId>org.json4s</groupId>
</exclusion>
<exclusion>
<artifactId>json4s-core_2.11</artifactId>
<groupId>org.json4s</groupId>
</exclusion>
<exclusion>
<artifactId>json4s-jackson_2.11</artifactId>
<groupId>org.json4s</groupId>
</exclusion>
<exclusion>
<artifactId>json4s-scalap_2.11</artifactId>
<groupId>org.json4s</groupId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-core_${scala.binary.version}</artifactId>
<exclusions>
<exclusion>
<groupId>javax.servlet</groupId>
<artifactId>*</artifactId>
</exclusion>
<exclusion>
<artifactId>jackson-module-scala_2.12</artifactId>
<groupId>com.fasterxml.jackson.module</groupId>
</exclusion>
<exclusion>
<artifactId>hadoop-client-api</artifactId>
<groupId>org.apache.hadoop</groupId>
</exclusion>
<exclusion>
<artifactId>hadoop-client-runtime</artifactId>
<groupId>org.apache.hadoop</groupId>
</exclusion>
</exclusions>
<version>${spark.version}</version>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-sql_${scala.binary.version}</artifactId>
<version>${spark.version}</version>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-launcher_${scala.binary.version}</artifactId>
<version>${spark.version}</version>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-catalyst_${scala.binary.version}</artifactId>
<version>${spark.version}</version>
<scope>compile</scope>
<exclusions>
<exclusion>
<groupId>org.codehaus.janino</groupId>
<artifactId>janino</artifactId>
</exclusion>
<exclusion>
<groupId>org.codehaus.janino</groupId>
<artifactId>commons-compiler</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<!-- version of spark's janino is error -->
<groupId>org.codehaus.janino</groupId>
<artifactId>janino</artifactId>
<version>${janino.version}</version>
<exclusions>
<exclusion>
<groupId>org.codehaus.janino</groupId>
<artifactId>commons-compiler</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.codehaus.janino</groupId>
<artifactId>commons-compiler</artifactId>
<version>${janino.version}</version>
</dependency>
<dependency>
<!-- version of spark's jackson module is error -->
<groupId>com.fasterxml.jackson.module</groupId>
<artifactId>jackson-module-scala_${scala.binary.version}</artifactId>
<version>${jackson.version}</version>
<exclusions>
<exclusion>
<groupId>com.google.guava</groupId>
<artifactId>guava</artifactId>
</exclusion>
</exclusions>
</dependency>
```
## MaxCompute Scanner
MaxCompute Scanner Compile Dependencies:
```
<dependency>
<groupId>org.apache.arrow</groupId>
<artifactId>arrow-memory-unsafe</artifactId>
<version>${arrow.version}</version>
<scope>compile</scope>
</dependency>
```
## Paimon Scanner
```
```
## JDBC Scanner
JDBC Scanner Compile Dependencies:
```
<dependency>
<groupId>com.oracle.database.jdbc</groupId>
<artifactId>ojdbc8</artifactId>
</dependency>
<dependency>
<groupId>com.alibaba</groupId>
<artifactId>druid</artifactId>
</dependency>
<dependency>
<groupId>com.clickhouse</groupId>
<artifactId>clickhouse-jdbc</artifactId>
<classifier>all</classifier>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>com.oracle.ojdbc</groupId>
<artifactId>orai18n</artifactId>
<version>19.3.0.0</version>
</dependency>
<dependency>
<groupId>org.apache.doris</groupId>
<artifactId>hive-catalog-shade</artifactId>
</dependency>
```

View File

@ -0,0 +1,41 @@
<?xml version="1.0" encoding="UTF-8" ?>
<!--
Licensed to the Apache Software Foundation (ASF) under one
or more contributor license agreements. See the NOTICE file
distributed with this work for additional information
regarding copyright ownership. The ASF licenses this file
to you under the Apache License, Version 2.0 (the
"License"); you may not use this file except in compliance
with the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing,
software distributed under the License is distributed on an
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
KIND, either express or implied. See the License for the
specific language governing permissions and limitations
under the License.
-->
<assembly xmlns="http://maven.apache.org/ASSEMBLY/2.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/ASSEMBLY/2.0.0 http://maven.apache.org/xsd/assembly-2.0.0.xsd">
<id>jar-with-dependencies</id>
<formats>
<format>jar</format>
</formats>
<includeBaseDirectory>false</includeBaseDirectory>
<dependencySets>
<dependencySet>
<outputDirectory>/</outputDirectory>
<useProjectArtifact>true</useProjectArtifact>
<unpack>true</unpack>
<scope>runtime</scope>
<unpackOptions>
<excludes>
<exclude>**/Log4j2Plugins.dat</exclude>
</excludes>
</unpackOptions>
</dependencySet>
</dependencySets>
</assembly>