From 9dc1196f1c26ff0127eb4dfd2598d7bb8713cabf Mon Sep 17 00:00:00 2001 From: Ashin Gau Date: Fri, 31 May 2024 20:51:28 +0800 Subject: [PATCH] [update](hudi) update hudi-spark bundle to 3.4.3 (#35013) (#35718) backport: #35013 --- fe/be-java-extensions/hudi-scanner/pom.xml | 41 +++-------------- .../apache/doris/hudi/BaseSplitReader.scala | 8 ++-- .../doris/hudi/MORSnapshotSplitReader.scala | 3 +- .../preload-extensions/pom.xml | 42 +++++------------ fe/fe-core/pom.xml | 40 +++++++++++++---- .../source/HudiCachedPartitionProcessor.java | 1 + fe/pom.xml | 45 +++++++++++++++++-- 7 files changed, 99 insertions(+), 81 deletions(-) diff --git a/fe/be-java-extensions/hudi-scanner/pom.xml b/fe/be-java-extensions/hudi-scanner/pom.xml index d4f7a45861..465a9393b0 100644 --- a/fe/be-java-extensions/hudi-scanner/pom.xml +++ b/fe/be-java-extensions/hudi-scanner/pom.xml @@ -32,10 +32,7 @@ under the License. 1 2.12.15 2.12 - 3.2.0 - 3.2 - 3.0.16 - 1.11.2 + 1.11.3 @@ -91,7 +88,7 @@ under the License. org.apache.hudi - hudi-spark3.2.x_${scala.binary.version} + ${hudi-spark.version}_${scala.binary.version} ${hudi.version} provided @@ -119,6 +116,11 @@ under the License. 1.10.1 provided + + org.antlr + antlr4-runtime + ${antlr4.version} + org.apache.spark spark-core_${scala.binary.version} @@ -160,35 +162,6 @@ under the License. spark-catalyst_${scala.binary.version} ${spark.version} provided - - - org.codehaus.janino - janino - - - org.codehaus.janino - commons-compiler - - - - - - org.codehaus.janino - janino - ${janino.version} - provided - - - org.codehaus.janino - commons-compiler - - - - - org.codehaus.janino - commons-compiler - ${janino.version} - provided diff --git a/fe/be-java-extensions/hudi-scanner/src/main/scala/org/apache/doris/hudi/BaseSplitReader.scala b/fe/be-java-extensions/hudi-scanner/src/main/scala/org/apache/doris/hudi/BaseSplitReader.scala index 8229064163..dcc068ad70 100644 --- a/fe/be-java-extensions/hudi-scanner/src/main/scala/org/apache/doris/hudi/BaseSplitReader.scala +++ b/fe/be-java-extensions/hudi-scanner/src/main/scala/org/apache/doris/hudi/BaseSplitReader.scala @@ -44,7 +44,7 @@ import org.apache.hudi.io.storage.HoodieAvroHFileReader import org.apache.hudi.metadata.HoodieTableMetadataUtil import org.apache.hudi.{AvroConversionUtils, DataSourceReadOptions, DataSourceWriteOptions, HoodieSparkConfUtils, HoodieTableSchema, HoodieTableState} import org.apache.log4j.Logger -import org.apache.spark.sql.adapter.Spark3_2Adapter +import org.apache.spark.sql.adapter.Spark3_4Adapter import org.apache.spark.sql.avro.{HoodieAvroSchemaConverters, HoodieSparkAvroSchemaConverters} import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat @@ -66,7 +66,7 @@ import scala.collection.JavaConverters._ import scala.util.control.NonFatal import scala.util.{Failure, Success, Try} -class DorisSparkAdapter extends Spark3_2Adapter { +class DorisSparkAdapter extends Spark3_4Adapter { override def getAvroSchemaConverters: HoodieAvroSchemaConverters = HoodieSparkAvroSchemaConverters } @@ -498,7 +498,7 @@ abstract class BaseSplitReader(val split: HoodieSplit) { hadoopConf: Configuration): PartitionedFile => Iterator[InternalRow] = { partitionedFile => { val reader = new HoodieAvroHFileReader( - hadoopConf, new Path(partitionedFile.filePath), new CacheConfig(hadoopConf)) + hadoopConf, partitionedFile.filePath.toPath, new CacheConfig(hadoopConf)) val requiredRowSchema = requiredDataSchema.structTypeSchema // NOTE: Schema has to be parsed at this point, since Avro's [[Schema]] aren't serializable @@ -573,7 +573,7 @@ abstract class BaseSplitReader(val split: HoodieSplit) { BaseFileReader( read = partitionedFile => { - val extension = FSUtils.getFileExtension(partitionedFile.filePath) + val extension = FSUtils.getFileExtension(partitionedFile.filePath.toString()) if (tableBaseFileFormat.getFileExtension.equals(extension)) { read(partitionedFile) } else { diff --git a/fe/be-java-extensions/hudi-scanner/src/main/scala/org/apache/doris/hudi/MORSnapshotSplitReader.scala b/fe/be-java-extensions/hudi-scanner/src/main/scala/org/apache/doris/hudi/MORSnapshotSplitReader.scala index 07e236082c..02a4fa4004 100644 --- a/fe/be-java-extensions/hudi-scanner/src/main/scala/org/apache/doris/hudi/MORSnapshotSplitReader.scala +++ b/fe/be-java-extensions/hudi-scanner/src/main/scala/org/apache/doris/hudi/MORSnapshotSplitReader.scala @@ -21,6 +21,7 @@ import org.apache.hudi.HoodieBaseRelation.convertToAvroSchema import org.apache.hudi.avro.HoodieAvroUtils import org.apache.hudi.common.model.HoodieLogFile import org.apache.hudi.{DataSourceReadOptions, HoodieMergeOnReadFileSplit, HoodieTableSchema} +import org.apache.spark.paths.SparkPath import org.apache.spark.sql.SQLContext import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.execution.datasources.PartitionedFile @@ -80,7 +81,7 @@ class MORSnapshotSplitReader(override val split: HoodieSplit) extends BaseSplitR val partitionedBaseFile = if (split.dataFilePath.isEmpty) { None } else { - Some(PartitionedFile(getPartitionColumnsAsInternalRow(), split.dataFilePath, 0, split.dataFileLength)) + Some(PartitionedFile(getPartitionColumnsAsInternalRow(), SparkPath.fromPathString(split.dataFilePath), 0, split.dataFileLength)) } HoodieMergeOnReadFileSplit(partitionedBaseFile, logFiles) } diff --git a/fe/be-java-extensions/preload-extensions/pom.xml b/fe/be-java-extensions/preload-extensions/pom.xml index 8cc11473fd..235a3f270f 100644 --- a/fe/be-java-extensions/preload-extensions/pom.xml +++ b/fe/be-java-extensions/preload-extensions/pom.xml @@ -33,8 +33,6 @@ under the License. 8 8 2.12 - 3.2.0 - 3.0.16 @@ -63,6 +61,12 @@ under the License. provided + + org.apache.hadoop + hadoop-annotations + ${hadoop.version} + provided + org.apache.hudi hudi-spark-client @@ -83,6 +87,11 @@ under the License. + + org.antlr + antlr4-runtime + ${antlr4.version} + org.apache.hudi hudi-spark3-common @@ -90,7 +99,7 @@ under the License. org.apache.hudi - hudi-spark3.2.x_${scala.binary.version} + ${hudi-spark.version}_${scala.binary.version} ${hudi.version} @@ -158,33 +167,6 @@ under the License. spark-catalyst_${scala.binary.version} ${spark.version} compile - - - org.codehaus.janino - janino - - - org.codehaus.janino - commons-compiler - - - - - - org.codehaus.janino - janino - ${janino.version} - - - org.codehaus.janino - commons-compiler - - - - - org.codehaus.janino - commons-compiler - ${janino.version} diff --git a/fe/fe-core/pom.xml b/fe/fe-core/pom.xml index 0a8cbcdce2..21a6fac91f 100644 --- a/fe/fe-core/pom.xml +++ b/fe/fe-core/pom.xml @@ -32,7 +32,6 @@ under the License. ${basedir}/../../ ${basedir}/../../thirdparty 1 - 4.9.3 2.20.131 3.1.1-hw-46 8.2.7 @@ -433,9 +432,26 @@ under the License. + + + org.antlr + antlr4-runtime + ${antlr4.version} + com.aliyun.odps odps-sdk-core + + + antlr-runtime + org.antlr + + + antlr4 + org.antlr + + @@ -639,14 +655,6 @@ under the License. mariadb-java-client - - - org.antlr - antlr4-runtime - ${antlr4.version} - - com.zaxxer HikariCP @@ -747,6 +755,20 @@ under the License. io.airlift concurrent + + me.bechberger + ap-loader-all + 3.0-8 + + + org.apache.hbase + hbase-server + + + org.apache.hbase + hbase-hadoop-compat + 2.5.2-hadoop3 + diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/hudi/source/HudiCachedPartitionProcessor.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/hudi/source/HudiCachedPartitionProcessor.java index 4543303db6..c822034901 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/datasource/hudi/source/HudiCachedPartitionProcessor.java +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/hudi/source/HudiCachedPartitionProcessor.java @@ -162,6 +162,7 @@ public class HudiCachedPartitionProcessor extends HudiPartitionProcessor { partitionValues.writeLock().unlock(); } } catch (Exception e) { + LOG.warn("Failed to get hudi partitions", e); throw new CacheException("Failed to get hudi partitions", e); } } diff --git a/fe/pom.xml b/fe/pom.xml index 5dcdabfb33..f20953bc8e 100644 --- a/fe/pom.xml +++ b/fe/pom.xml @@ -273,7 +273,7 @@ under the License. 1.34.0 1.60.1 - 3.42.0 + 3.43.0 3.24.3 @@ -293,12 +293,13 @@ under the License. 0.2.3 3.4.0 6.4.5 - 3.8.9.Final + 3.8.14.Final 1.3.2 1.2.0 2.3.0 0.8.13 - 3.4.1 + 3.4.3 + hudi-spark3.4.x 3.1.3 2.3.9 9.35 @@ -330,6 +331,8 @@ under the License. 1.12.669 3.0.9 3.3.6 + 2.4.9 + 4.13.1 2.8.1 github 2.7.13 @@ -522,6 +525,14 @@ under the License. javax.servlet servlet-api + + + org.apache.hadoop + hadoop-yarn-common + + + org.apache.hadoop + hadoop-yarn-api @@ -552,6 +563,29 @@ under the License. kerb-simplekdc ${kerby.version} + + org.apache.hbase + hbase-server + ${hbase.version} + + + org.apache.hadoop + hadoop-yarn-api + + + org.apache.hadoop + hadoop-yarn-common + + + org.apache.hbase + hbase-hadoop2-compat + + + org.apache.hadoop + hadoop-annotations + + + org.apache.kerby kerb-core @@ -1123,6 +1157,11 @@ under the License. xnio-nio ${xnio-nio.version} + + org.jboss.xnio + xnio-api + ${xnio-nio.version} + javax.annotation