backport: #35013
This commit is contained in:
@ -32,10 +32,7 @@ under the License.
|
||||
<fe_ut_parallel>1</fe_ut_parallel>
|
||||
<scala.version>2.12.15</scala.version>
|
||||
<scala.binary.version>2.12</scala.binary.version>
|
||||
<spark.version>3.2.0</spark.version>
|
||||
<sparkbundle.version>3.2</sparkbundle.version>
|
||||
<janino.version>3.0.16</janino.version>
|
||||
<avro.version>1.11.2</avro.version>
|
||||
<avro.version>1.11.3</avro.version>
|
||||
</properties>
|
||||
|
||||
<dependencyManagement>
|
||||
@ -91,7 +88,7 @@ under the License.
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.hudi</groupId>
|
||||
<artifactId>hudi-spark3.2.x_${scala.binary.version}</artifactId>
|
||||
<artifactId>${hudi-spark.version}_${scala.binary.version}</artifactId>
|
||||
<version>${hudi.version}</version>
|
||||
<scope>provided</scope>
|
||||
<exclusions>
|
||||
@ -119,6 +116,11 @@ under the License.
|
||||
<version>1.10.1</version>
|
||||
<scope>provided</scope>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.antlr</groupId>
|
||||
<artifactId>antlr4-runtime</artifactId>
|
||||
<version>${antlr4.version}</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.spark</groupId>
|
||||
<artifactId>spark-core_${scala.binary.version}</artifactId>
|
||||
@ -160,35 +162,6 @@ under the License.
|
||||
<artifactId>spark-catalyst_${scala.binary.version}</artifactId>
|
||||
<version>${spark.version}</version>
|
||||
<scope>provided</scope>
|
||||
<exclusions>
|
||||
<exclusion>
|
||||
<groupId>org.codehaus.janino</groupId>
|
||||
<artifactId>janino</artifactId>
|
||||
</exclusion>
|
||||
<exclusion>
|
||||
<groupId>org.codehaus.janino</groupId>
|
||||
<artifactId>commons-compiler</artifactId>
|
||||
</exclusion>
|
||||
</exclusions>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<!-- version of spark's janino is error -->
|
||||
<groupId>org.codehaus.janino</groupId>
|
||||
<artifactId>janino</artifactId>
|
||||
<version>${janino.version}</version>
|
||||
<scope>provided</scope>
|
||||
<exclusions>
|
||||
<exclusion>
|
||||
<groupId>org.codehaus.janino</groupId>
|
||||
<artifactId>commons-compiler</artifactId>
|
||||
</exclusion>
|
||||
</exclusions>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.codehaus.janino</groupId>
|
||||
<artifactId>commons-compiler</artifactId>
|
||||
<version>${janino.version}</version>
|
||||
<scope>provided</scope>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<!-- version of spark's jackson module is error -->
|
||||
|
||||
@ -44,7 +44,7 @@ import org.apache.hudi.io.storage.HoodieAvroHFileReader
|
||||
import org.apache.hudi.metadata.HoodieTableMetadataUtil
|
||||
import org.apache.hudi.{AvroConversionUtils, DataSourceReadOptions, DataSourceWriteOptions, HoodieSparkConfUtils, HoodieTableSchema, HoodieTableState}
|
||||
import org.apache.log4j.Logger
|
||||
import org.apache.spark.sql.adapter.Spark3_2Adapter
|
||||
import org.apache.spark.sql.adapter.Spark3_4Adapter
|
||||
import org.apache.spark.sql.avro.{HoodieAvroSchemaConverters, HoodieSparkAvroSchemaConverters}
|
||||
import org.apache.spark.sql.catalyst.InternalRow
|
||||
import org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat
|
||||
@ -66,7 +66,7 @@ import scala.collection.JavaConverters._
|
||||
import scala.util.control.NonFatal
|
||||
import scala.util.{Failure, Success, Try}
|
||||
|
||||
class DorisSparkAdapter extends Spark3_2Adapter {
|
||||
class DorisSparkAdapter extends Spark3_4Adapter {
|
||||
override def getAvroSchemaConverters: HoodieAvroSchemaConverters = HoodieSparkAvroSchemaConverters
|
||||
}
|
||||
|
||||
@ -498,7 +498,7 @@ abstract class BaseSplitReader(val split: HoodieSplit) {
|
||||
hadoopConf: Configuration): PartitionedFile => Iterator[InternalRow] = {
|
||||
partitionedFile => {
|
||||
val reader = new HoodieAvroHFileReader(
|
||||
hadoopConf, new Path(partitionedFile.filePath), new CacheConfig(hadoopConf))
|
||||
hadoopConf, partitionedFile.filePath.toPath, new CacheConfig(hadoopConf))
|
||||
|
||||
val requiredRowSchema = requiredDataSchema.structTypeSchema
|
||||
// NOTE: Schema has to be parsed at this point, since Avro's [[Schema]] aren't serializable
|
||||
@ -573,7 +573,7 @@ abstract class BaseSplitReader(val split: HoodieSplit) {
|
||||
|
||||
BaseFileReader(
|
||||
read = partitionedFile => {
|
||||
val extension = FSUtils.getFileExtension(partitionedFile.filePath)
|
||||
val extension = FSUtils.getFileExtension(partitionedFile.filePath.toString())
|
||||
if (tableBaseFileFormat.getFileExtension.equals(extension)) {
|
||||
read(partitionedFile)
|
||||
} else {
|
||||
|
||||
@ -21,6 +21,7 @@ import org.apache.hudi.HoodieBaseRelation.convertToAvroSchema
|
||||
import org.apache.hudi.avro.HoodieAvroUtils
|
||||
import org.apache.hudi.common.model.HoodieLogFile
|
||||
import org.apache.hudi.{DataSourceReadOptions, HoodieMergeOnReadFileSplit, HoodieTableSchema}
|
||||
import org.apache.spark.paths.SparkPath
|
||||
import org.apache.spark.sql.SQLContext
|
||||
import org.apache.spark.sql.catalyst.InternalRow
|
||||
import org.apache.spark.sql.execution.datasources.PartitionedFile
|
||||
@ -80,7 +81,7 @@ class MORSnapshotSplitReader(override val split: HoodieSplit) extends BaseSplitR
|
||||
val partitionedBaseFile = if (split.dataFilePath.isEmpty) {
|
||||
None
|
||||
} else {
|
||||
Some(PartitionedFile(getPartitionColumnsAsInternalRow(), split.dataFilePath, 0, split.dataFileLength))
|
||||
Some(PartitionedFile(getPartitionColumnsAsInternalRow(), SparkPath.fromPathString(split.dataFilePath), 0, split.dataFileLength))
|
||||
}
|
||||
HoodieMergeOnReadFileSplit(partitionedBaseFile, logFiles)
|
||||
}
|
||||
|
||||
@ -33,8 +33,6 @@ under the License.
|
||||
<maven.compiler.source>8</maven.compiler.source>
|
||||
<maven.compiler.target>8</maven.compiler.target>
|
||||
<scala.binary.version>2.12</scala.binary.version>
|
||||
<spark.version>3.2.0</spark.version>
|
||||
<janino.version>3.0.16</janino.version>
|
||||
</properties>
|
||||
|
||||
<dependencies>
|
||||
@ -63,6 +61,12 @@ under the License.
|
||||
<!-- Must be provided, we use hadoop_libs in BE's 3rd party instead -->
|
||||
<scope>provided</scope>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.hadoop</groupId>
|
||||
<artifactId>hadoop-annotations</artifactId>
|
||||
<version>${hadoop.version}</version>
|
||||
<scope>provided</scope>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.hudi</groupId>
|
||||
<artifactId>hudi-spark-client</artifactId>
|
||||
@ -83,6 +87,11 @@ under the License.
|
||||
</exclusion>
|
||||
</exclusions>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.antlr</groupId>
|
||||
<artifactId>antlr4-runtime</artifactId>
|
||||
<version>${antlr4.version}</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.hudi</groupId>
|
||||
<artifactId>hudi-spark3-common</artifactId>
|
||||
@ -90,7 +99,7 @@ under the License.
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.hudi</groupId>
|
||||
<artifactId>hudi-spark3.2.x_${scala.binary.version}</artifactId>
|
||||
<artifactId>${hudi-spark.version}_${scala.binary.version}</artifactId>
|
||||
<version>${hudi.version}</version>
|
||||
<exclusions>
|
||||
<exclusion>
|
||||
@ -158,33 +167,6 @@ under the License.
|
||||
<artifactId>spark-catalyst_${scala.binary.version}</artifactId>
|
||||
<version>${spark.version}</version>
|
||||
<scope>compile</scope>
|
||||
<exclusions>
|
||||
<exclusion>
|
||||
<groupId>org.codehaus.janino</groupId>
|
||||
<artifactId>janino</artifactId>
|
||||
</exclusion>
|
||||
<exclusion>
|
||||
<groupId>org.codehaus.janino</groupId>
|
||||
<artifactId>commons-compiler</artifactId>
|
||||
</exclusion>
|
||||
</exclusions>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<!-- version of spark's janino is error -->
|
||||
<groupId>org.codehaus.janino</groupId>
|
||||
<artifactId>janino</artifactId>
|
||||
<version>${janino.version}</version>
|
||||
<exclusions>
|
||||
<exclusion>
|
||||
<groupId>org.codehaus.janino</groupId>
|
||||
<artifactId>commons-compiler</artifactId>
|
||||
</exclusion>
|
||||
</exclusions>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.codehaus.janino</groupId>
|
||||
<artifactId>commons-compiler</artifactId>
|
||||
<version>${janino.version}</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<!-- version of spark's jackson module is error -->
|
||||
|
||||
@ -32,7 +32,6 @@ under the License.
|
||||
<doris.home>${basedir}/../../</doris.home>
|
||||
<doris.thirdparty>${basedir}/../../thirdparty</doris.thirdparty>
|
||||
<fe_ut_parallel>1</fe_ut_parallel>
|
||||
<antlr4.version>4.9.3</antlr4.version>
|
||||
<awssdk.version>2.20.131</awssdk.version>
|
||||
<huaweiobs.version>3.1.1-hw-46</huaweiobs.version>
|
||||
<tencentcos.version>8.2.7</tencentcos.version>
|
||||
@ -433,9 +432,26 @@ under the License.
|
||||
</exclusion>
|
||||
</exclusions>
|
||||
</dependency>
|
||||
<!-- antl4 The version of antlr-runtime in trino parser is need to be consistent with doris,
|
||||
when upgrade doris antlr-runtime version, should take care of trino-parser.-->
|
||||
<dependency>
|
||||
<groupId>org.antlr</groupId>
|
||||
<artifactId>antlr4-runtime</artifactId>
|
||||
<version>${antlr4.version}</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>com.aliyun.odps</groupId>
|
||||
<artifactId>odps-sdk-core</artifactId>
|
||||
<exclusions>
|
||||
<exclusion>
|
||||
<artifactId>antlr-runtime</artifactId>
|
||||
<groupId>org.antlr</groupId>
|
||||
</exclusion>
|
||||
<exclusion>
|
||||
<artifactId>antlr4</artifactId>
|
||||
<groupId>org.antlr</groupId>
|
||||
</exclusion>
|
||||
</exclusions>
|
||||
</dependency>
|
||||
<!-- https://mvnrepository.com/artifact/org.springframework.boot/spring-boot-starter-web -->
|
||||
<dependency>
|
||||
@ -639,14 +655,6 @@ under the License.
|
||||
<artifactId>mariadb-java-client</artifactId>
|
||||
</dependency>
|
||||
|
||||
<!-- antl4 The version of antlr-runtime in trino parser is need to be consistent with doris,
|
||||
when upgrade doris antlr-runtime version, should take care of trino-parser.-->
|
||||
<dependency>
|
||||
<groupId>org.antlr</groupId>
|
||||
<artifactId>antlr4-runtime</artifactId>
|
||||
<version>${antlr4.version}</version>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>com.zaxxer</groupId>
|
||||
<artifactId>HikariCP</artifactId>
|
||||
@ -747,6 +755,20 @@ under the License.
|
||||
<groupId>io.airlift</groupId>
|
||||
<artifactId>concurrent</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>me.bechberger</groupId>
|
||||
<artifactId>ap-loader-all</artifactId>
|
||||
<version>3.0-8</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.hbase</groupId>
|
||||
<artifactId>hbase-server</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.hbase</groupId>
|
||||
<artifactId>hbase-hadoop-compat</artifactId>
|
||||
<version>2.5.2-hadoop3</version>
|
||||
</dependency>
|
||||
</dependencies>
|
||||
<repositories>
|
||||
<!-- for huawei obs sdk -->
|
||||
|
||||
@ -162,6 +162,7 @@ public class HudiCachedPartitionProcessor extends HudiPartitionProcessor {
|
||||
partitionValues.writeLock().unlock();
|
||||
}
|
||||
} catch (Exception e) {
|
||||
LOG.warn("Failed to get hudi partitions", e);
|
||||
throw new CacheException("Failed to get hudi partitions", e);
|
||||
}
|
||||
}
|
||||
|
||||
45
fe/pom.xml
45
fe/pom.xml
@ -273,7 +273,7 @@ under the License.
|
||||
<!-- NOTE: Using grpc-java whose version is newer than 1.34.0 will break the build on CentOS 6 due to the obsolete GLIBC -->
|
||||
<grpc-java.version>1.34.0</grpc-java.version>
|
||||
<grpc.version>1.60.1</grpc.version>
|
||||
<check.freamework.version>3.42.0</check.freamework.version>
|
||||
<check.freamework.version>3.43.0</check.freamework.version>
|
||||
<protobuf.version>3.24.3</protobuf.version>
|
||||
<!-- we use protoc-jar-maven-plugin to generate protobuf generated code -->
|
||||
<!-- see https://repo.maven.apache.org/maven2/com/google/protobuf/protoc/ to get correct version -->
|
||||
@ -293,12 +293,13 @@ under the License.
|
||||
<zjsonpatch.version>0.2.3</zjsonpatch.version>
|
||||
<kafka-clients.version>3.4.0</kafka-clients.version>
|
||||
<oshi-core.version>6.4.5</oshi-core.version>
|
||||
<xnio-nio.version>3.8.9.Final</xnio-nio.version>
|
||||
<xnio-nio.version>3.8.14.Final</xnio-nio.version>
|
||||
<javax.annotation-api.version>1.3.2</javax.annotation-api.version>
|
||||
<javax.activation.version>1.2.0</javax.activation.version>
|
||||
<jaxws-api.version>2.3.0</jaxws-api.version>
|
||||
<RoaringBitmap.version>0.8.13</RoaringBitmap.version>
|
||||
<spark.version>3.4.1</spark.version>
|
||||
<spark.version>3.4.3</spark.version>
|
||||
<hudi-spark.version>hudi-spark3.4.x</hudi-spark.version>
|
||||
<hive.version>3.1.3</hive.version>
|
||||
<hive.common.version>2.3.9</hive.common.version>
|
||||
<nimbusds.version>9.35</nimbusds.version>
|
||||
@ -330,6 +331,8 @@ under the License.
|
||||
<aws-java-sdk.version>1.12.669</aws-java-sdk.version>
|
||||
<mariadb-java-client.version>3.0.9</mariadb-java-client.version>
|
||||
<hadoop.version>3.3.6</hadoop.version>
|
||||
<hbase.version>2.4.9</hbase.version>
|
||||
<antlr4.version>4.13.1</antlr4.version>
|
||||
<joda.version>2.8.1</joda.version>
|
||||
<project.scm.id>github</project.scm.id>
|
||||
<spring.version>2.7.13</spring.version>
|
||||
@ -522,6 +525,14 @@ under the License.
|
||||
<exclusion>
|
||||
<groupId>javax.servlet</groupId>
|
||||
<artifactId>servlet-api</artifactId>
|
||||
</exclusion>
|
||||
<exclusion>
|
||||
<groupId>org.apache.hadoop</groupId>
|
||||
<artifactId>hadoop-yarn-common</artifactId>
|
||||
</exclusion>
|
||||
<exclusion>
|
||||
<groupId>org.apache.hadoop</groupId>
|
||||
<artifactId>hadoop-yarn-api</artifactId>
|
||||
</exclusion>
|
||||
</exclusions>
|
||||
</dependency>
|
||||
@ -552,6 +563,29 @@ under the License.
|
||||
<artifactId>kerb-simplekdc</artifactId>
|
||||
<version>${kerby.version}</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.hbase</groupId>
|
||||
<artifactId>hbase-server</artifactId>
|
||||
<version>${hbase.version}</version>
|
||||
<exclusions>
|
||||
<exclusion>
|
||||
<groupId>org.apache.hadoop</groupId>
|
||||
<artifactId>hadoop-yarn-api</artifactId>
|
||||
</exclusion>
|
||||
<exclusion>
|
||||
<groupId>org.apache.hadoop</groupId>
|
||||
<artifactId>hadoop-yarn-common</artifactId>
|
||||
</exclusion>
|
||||
<exclusion>
|
||||
<groupId>org.apache.hbase</groupId>
|
||||
<artifactId>hbase-hadoop2-compat</artifactId>
|
||||
</exclusion>
|
||||
<exclusion>
|
||||
<groupId>org.apache.hadoop</groupId>
|
||||
<artifactId>hadoop-annotations</artifactId>
|
||||
</exclusion>
|
||||
</exclusions>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.kerby</groupId>
|
||||
<artifactId>kerb-core</artifactId>
|
||||
@ -1123,6 +1157,11 @@ under the License.
|
||||
<artifactId>xnio-nio</artifactId>
|
||||
<version>${xnio-nio.version}</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.jboss.xnio</groupId>
|
||||
<artifactId>xnio-api</artifactId>
|
||||
<version>${xnio-nio.version}</version>
|
||||
</dependency>
|
||||
<!-- support jdk9 -->
|
||||
<dependency>
|
||||
<groupId>javax.annotation</groupId>
|
||||
|
||||
Reference in New Issue
Block a user