[update](hudi) update hudi-spark bundle to 3.4.3 (#35013) (#35718)

backport: #35013
This commit is contained in:
Ashin Gau
2024-05-31 20:51:28 +08:00
committed by GitHub
parent 9c270e5cdf
commit 9dc1196f1c
7 changed files with 99 additions and 81 deletions

View File

@ -32,10 +32,7 @@ under the License.
<fe_ut_parallel>1</fe_ut_parallel>
<scala.version>2.12.15</scala.version>
<scala.binary.version>2.12</scala.binary.version>
<spark.version>3.2.0</spark.version>
<sparkbundle.version>3.2</sparkbundle.version>
<janino.version>3.0.16</janino.version>
<avro.version>1.11.2</avro.version>
<avro.version>1.11.3</avro.version>
</properties>
<dependencyManagement>
@ -91,7 +88,7 @@ under the License.
</dependency>
<dependency>
<groupId>org.apache.hudi</groupId>
<artifactId>hudi-spark3.2.x_${scala.binary.version}</artifactId>
<artifactId>${hudi-spark.version}_${scala.binary.version}</artifactId>
<version>${hudi.version}</version>
<scope>provided</scope>
<exclusions>
@ -119,6 +116,11 @@ under the License.
<version>1.10.1</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.antlr</groupId>
<artifactId>antlr4-runtime</artifactId>
<version>${antlr4.version}</version>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-core_${scala.binary.version}</artifactId>
@ -160,35 +162,6 @@ under the License.
<artifactId>spark-catalyst_${scala.binary.version}</artifactId>
<version>${spark.version}</version>
<scope>provided</scope>
<exclusions>
<exclusion>
<groupId>org.codehaus.janino</groupId>
<artifactId>janino</artifactId>
</exclusion>
<exclusion>
<groupId>org.codehaus.janino</groupId>
<artifactId>commons-compiler</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<!-- version of spark's janino is error -->
<groupId>org.codehaus.janino</groupId>
<artifactId>janino</artifactId>
<version>${janino.version}</version>
<scope>provided</scope>
<exclusions>
<exclusion>
<groupId>org.codehaus.janino</groupId>
<artifactId>commons-compiler</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.codehaus.janino</groupId>
<artifactId>commons-compiler</artifactId>
<version>${janino.version}</version>
<scope>provided</scope>
</dependency>
<dependency>
<!-- version of spark's jackson module is error -->

View File

@ -44,7 +44,7 @@ import org.apache.hudi.io.storage.HoodieAvroHFileReader
import org.apache.hudi.metadata.HoodieTableMetadataUtil
import org.apache.hudi.{AvroConversionUtils, DataSourceReadOptions, DataSourceWriteOptions, HoodieSparkConfUtils, HoodieTableSchema, HoodieTableState}
import org.apache.log4j.Logger
import org.apache.spark.sql.adapter.Spark3_2Adapter
import org.apache.spark.sql.adapter.Spark3_4Adapter
import org.apache.spark.sql.avro.{HoodieAvroSchemaConverters, HoodieSparkAvroSchemaConverters}
import org.apache.spark.sql.catalyst.InternalRow
import org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat
@ -66,7 +66,7 @@ import scala.collection.JavaConverters._
import scala.util.control.NonFatal
import scala.util.{Failure, Success, Try}
class DorisSparkAdapter extends Spark3_2Adapter {
class DorisSparkAdapter extends Spark3_4Adapter {
override def getAvroSchemaConverters: HoodieAvroSchemaConverters = HoodieSparkAvroSchemaConverters
}
@ -498,7 +498,7 @@ abstract class BaseSplitReader(val split: HoodieSplit) {
hadoopConf: Configuration): PartitionedFile => Iterator[InternalRow] = {
partitionedFile => {
val reader = new HoodieAvroHFileReader(
hadoopConf, new Path(partitionedFile.filePath), new CacheConfig(hadoopConf))
hadoopConf, partitionedFile.filePath.toPath, new CacheConfig(hadoopConf))
val requiredRowSchema = requiredDataSchema.structTypeSchema
// NOTE: Schema has to be parsed at this point, since Avro's [[Schema]] aren't serializable
@ -573,7 +573,7 @@ abstract class BaseSplitReader(val split: HoodieSplit) {
BaseFileReader(
read = partitionedFile => {
val extension = FSUtils.getFileExtension(partitionedFile.filePath)
val extension = FSUtils.getFileExtension(partitionedFile.filePath.toString())
if (tableBaseFileFormat.getFileExtension.equals(extension)) {
read(partitionedFile)
} else {

View File

@ -21,6 +21,7 @@ import org.apache.hudi.HoodieBaseRelation.convertToAvroSchema
import org.apache.hudi.avro.HoodieAvroUtils
import org.apache.hudi.common.model.HoodieLogFile
import org.apache.hudi.{DataSourceReadOptions, HoodieMergeOnReadFileSplit, HoodieTableSchema}
import org.apache.spark.paths.SparkPath
import org.apache.spark.sql.SQLContext
import org.apache.spark.sql.catalyst.InternalRow
import org.apache.spark.sql.execution.datasources.PartitionedFile
@ -80,7 +81,7 @@ class MORSnapshotSplitReader(override val split: HoodieSplit) extends BaseSplitR
val partitionedBaseFile = if (split.dataFilePath.isEmpty) {
None
} else {
Some(PartitionedFile(getPartitionColumnsAsInternalRow(), split.dataFilePath, 0, split.dataFileLength))
Some(PartitionedFile(getPartitionColumnsAsInternalRow(), SparkPath.fromPathString(split.dataFilePath), 0, split.dataFileLength))
}
HoodieMergeOnReadFileSplit(partitionedBaseFile, logFiles)
}

View File

@ -33,8 +33,6 @@ under the License.
<maven.compiler.source>8</maven.compiler.source>
<maven.compiler.target>8</maven.compiler.target>
<scala.binary.version>2.12</scala.binary.version>
<spark.version>3.2.0</spark.version>
<janino.version>3.0.16</janino.version>
</properties>
<dependencies>
@ -63,6 +61,12 @@ under the License.
<!-- Must be provided, we use hadoop_libs in BE's 3rd party instead -->
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-annotations</artifactId>
<version>${hadoop.version}</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.apache.hudi</groupId>
<artifactId>hudi-spark-client</artifactId>
@ -83,6 +87,11 @@ under the License.
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.antlr</groupId>
<artifactId>antlr4-runtime</artifactId>
<version>${antlr4.version}</version>
</dependency>
<dependency>
<groupId>org.apache.hudi</groupId>
<artifactId>hudi-spark3-common</artifactId>
@ -90,7 +99,7 @@ under the License.
</dependency>
<dependency>
<groupId>org.apache.hudi</groupId>
<artifactId>hudi-spark3.2.x_${scala.binary.version}</artifactId>
<artifactId>${hudi-spark.version}_${scala.binary.version}</artifactId>
<version>${hudi.version}</version>
<exclusions>
<exclusion>
@ -158,33 +167,6 @@ under the License.
<artifactId>spark-catalyst_${scala.binary.version}</artifactId>
<version>${spark.version}</version>
<scope>compile</scope>
<exclusions>
<exclusion>
<groupId>org.codehaus.janino</groupId>
<artifactId>janino</artifactId>
</exclusion>
<exclusion>
<groupId>org.codehaus.janino</groupId>
<artifactId>commons-compiler</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<!-- version of spark's janino is error -->
<groupId>org.codehaus.janino</groupId>
<artifactId>janino</artifactId>
<version>${janino.version}</version>
<exclusions>
<exclusion>
<groupId>org.codehaus.janino</groupId>
<artifactId>commons-compiler</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.codehaus.janino</groupId>
<artifactId>commons-compiler</artifactId>
<version>${janino.version}</version>
</dependency>
<dependency>
<!-- version of spark's jackson module is error -->

View File

@ -32,7 +32,6 @@ under the License.
<doris.home>${basedir}/../../</doris.home>
<doris.thirdparty>${basedir}/../../thirdparty</doris.thirdparty>
<fe_ut_parallel>1</fe_ut_parallel>
<antlr4.version>4.9.3</antlr4.version>
<awssdk.version>2.20.131</awssdk.version>
<huaweiobs.version>3.1.1-hw-46</huaweiobs.version>
<tencentcos.version>8.2.7</tencentcos.version>
@ -433,9 +432,26 @@ under the License.
</exclusion>
</exclusions>
</dependency>
<!-- antl4 The version of antlr-runtime in trino parser is need to be consistent with doris,
when upgrade doris antlr-runtime version, should take care of trino-parser.-->
<dependency>
<groupId>org.antlr</groupId>
<artifactId>antlr4-runtime</artifactId>
<version>${antlr4.version}</version>
</dependency>
<dependency>
<groupId>com.aliyun.odps</groupId>
<artifactId>odps-sdk-core</artifactId>
<exclusions>
<exclusion>
<artifactId>antlr-runtime</artifactId>
<groupId>org.antlr</groupId>
</exclusion>
<exclusion>
<artifactId>antlr4</artifactId>
<groupId>org.antlr</groupId>
</exclusion>
</exclusions>
</dependency>
<!-- https://mvnrepository.com/artifact/org.springframework.boot/spring-boot-starter-web -->
<dependency>
@ -639,14 +655,6 @@ under the License.
<artifactId>mariadb-java-client</artifactId>
</dependency>
<!-- antl4 The version of antlr-runtime in trino parser is need to be consistent with doris,
when upgrade doris antlr-runtime version, should take care of trino-parser.-->
<dependency>
<groupId>org.antlr</groupId>
<artifactId>antlr4-runtime</artifactId>
<version>${antlr4.version}</version>
</dependency>
<dependency>
<groupId>com.zaxxer</groupId>
<artifactId>HikariCP</artifactId>
@ -747,6 +755,20 @@ under the License.
<groupId>io.airlift</groupId>
<artifactId>concurrent</artifactId>
</dependency>
<dependency>
<groupId>me.bechberger</groupId>
<artifactId>ap-loader-all</artifactId>
<version>3.0-8</version>
</dependency>
<dependency>
<groupId>org.apache.hbase</groupId>
<artifactId>hbase-server</artifactId>
</dependency>
<dependency>
<groupId>org.apache.hbase</groupId>
<artifactId>hbase-hadoop-compat</artifactId>
<version>2.5.2-hadoop3</version>
</dependency>
</dependencies>
<repositories>
<!-- for huawei obs sdk -->

View File

@ -162,6 +162,7 @@ public class HudiCachedPartitionProcessor extends HudiPartitionProcessor {
partitionValues.writeLock().unlock();
}
} catch (Exception e) {
LOG.warn("Failed to get hudi partitions", e);
throw new CacheException("Failed to get hudi partitions", e);
}
}

View File

@ -273,7 +273,7 @@ under the License.
<!-- NOTE: Using grpc-java whose version is newer than 1.34.0 will break the build on CentOS 6 due to the obsolete GLIBC -->
<grpc-java.version>1.34.0</grpc-java.version>
<grpc.version>1.60.1</grpc.version>
<check.freamework.version>3.42.0</check.freamework.version>
<check.freamework.version>3.43.0</check.freamework.version>
<protobuf.version>3.24.3</protobuf.version>
<!-- we use protoc-jar-maven-plugin to generate protobuf generated code -->
<!-- see https://repo.maven.apache.org/maven2/com/google/protobuf/protoc/ to get correct version -->
@ -293,12 +293,13 @@ under the License.
<zjsonpatch.version>0.2.3</zjsonpatch.version>
<kafka-clients.version>3.4.0</kafka-clients.version>
<oshi-core.version>6.4.5</oshi-core.version>
<xnio-nio.version>3.8.9.Final</xnio-nio.version>
<xnio-nio.version>3.8.14.Final</xnio-nio.version>
<javax.annotation-api.version>1.3.2</javax.annotation-api.version>
<javax.activation.version>1.2.0</javax.activation.version>
<jaxws-api.version>2.3.0</jaxws-api.version>
<RoaringBitmap.version>0.8.13</RoaringBitmap.version>
<spark.version>3.4.1</spark.version>
<spark.version>3.4.3</spark.version>
<hudi-spark.version>hudi-spark3.4.x</hudi-spark.version>
<hive.version>3.1.3</hive.version>
<hive.common.version>2.3.9</hive.common.version>
<nimbusds.version>9.35</nimbusds.version>
@ -330,6 +331,8 @@ under the License.
<aws-java-sdk.version>1.12.669</aws-java-sdk.version>
<mariadb-java-client.version>3.0.9</mariadb-java-client.version>
<hadoop.version>3.3.6</hadoop.version>
<hbase.version>2.4.9</hbase.version>
<antlr4.version>4.13.1</antlr4.version>
<joda.version>2.8.1</joda.version>
<project.scm.id>github</project.scm.id>
<spring.version>2.7.13</spring.version>
@ -522,6 +525,14 @@ under the License.
<exclusion>
<groupId>javax.servlet</groupId>
<artifactId>servlet-api</artifactId>
</exclusion>
<exclusion>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-yarn-common</artifactId>
</exclusion>
<exclusion>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-yarn-api</artifactId>
</exclusion>
</exclusions>
</dependency>
@ -552,6 +563,29 @@ under the License.
<artifactId>kerb-simplekdc</artifactId>
<version>${kerby.version}</version>
</dependency>
<dependency>
<groupId>org.apache.hbase</groupId>
<artifactId>hbase-server</artifactId>
<version>${hbase.version}</version>
<exclusions>
<exclusion>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-yarn-api</artifactId>
</exclusion>
<exclusion>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-yarn-common</artifactId>
</exclusion>
<exclusion>
<groupId>org.apache.hbase</groupId>
<artifactId>hbase-hadoop2-compat</artifactId>
</exclusion>
<exclusion>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-annotations</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.apache.kerby</groupId>
<artifactId>kerb-core</artifactId>
@ -1123,6 +1157,11 @@ under the License.
<artifactId>xnio-nio</artifactId>
<version>${xnio-nio.version}</version>
</dependency>
<dependency>
<groupId>org.jboss.xnio</groupId>
<artifactId>xnio-api</artifactId>
<version>${xnio-nio.version}</version>
</dependency>
<!-- support jdk9 -->
<dependency>
<groupId>javax.annotation</groupId>