[Feature](hive-writer) Implements s3 file committer. (#34307)
Backport #33937.
This commit is contained in:
@ -88,6 +88,8 @@ public class SummaryProfile {
|
||||
public static final String FILESYSTEM_OPT_TIME = "FileSystem Operator Time";
|
||||
public static final String FILESYSTEM_OPT_RENAME_FILE_CNT = "Rename File Count";
|
||||
public static final String FILESYSTEM_OPT_RENAME_DIR_CNT = "Rename Dir Count";
|
||||
|
||||
public static final String FILESYSTEM_OPT_DELETE_FILE_CNT = "Delete File Count";
|
||||
public static final String FILESYSTEM_OPT_DELETE_DIR_CNT = "Delete Dir Count";
|
||||
public static final String HMS_ADD_PARTITION_TIME = "HMS Add Partition Time";
|
||||
public static final String HMS_ADD_PARTITION_CNT = "HMS Add Partition Count";
|
||||
@ -164,6 +166,7 @@ public class SummaryProfile {
|
||||
.put(FILESYSTEM_OPT_TIME, 1)
|
||||
.put(FILESYSTEM_OPT_RENAME_FILE_CNT, 2)
|
||||
.put(FILESYSTEM_OPT_RENAME_DIR_CNT, 2)
|
||||
.put(FILESYSTEM_OPT_DELETE_FILE_CNT, 2)
|
||||
.put(FILESYSTEM_OPT_DELETE_DIR_CNT, 2)
|
||||
.put(HMS_ADD_PARTITION_TIME, 1)
|
||||
.put(HMS_ADD_PARTITION_CNT, 2)
|
||||
@ -223,6 +226,8 @@ public class SummaryProfile {
|
||||
private long hmsUpdatePartitionCnt = 0;
|
||||
private long filesystemRenameFileCnt = 0;
|
||||
private long filesystemRenameDirCnt = 0;
|
||||
|
||||
private long filesystemDeleteFileCnt = 0;
|
||||
private long filesystemDeleteDirCnt = 0;
|
||||
private TransactionType transactionType = TransactionType.UNKNOWN;
|
||||
|
||||
@ -344,6 +349,8 @@ public class SummaryProfile {
|
||||
getPrettyCount(filesystemRenameFileCnt));
|
||||
executionSummaryProfile.addInfoString(FILESYSTEM_OPT_RENAME_DIR_CNT,
|
||||
getPrettyCount(filesystemRenameDirCnt));
|
||||
executionSummaryProfile.addInfoString(FILESYSTEM_OPT_DELETE_FILE_CNT,
|
||||
getPrettyCount(filesystemDeleteFileCnt));
|
||||
executionSummaryProfile.addInfoString(FILESYSTEM_OPT_DELETE_DIR_CNT,
|
||||
getPrettyCount(filesystemDeleteDirCnt));
|
||||
|
||||
@ -666,4 +673,8 @@ public class SummaryProfile {
|
||||
public void incDeleteDirRecursiveCnt() {
|
||||
this.filesystemDeleteDirCnt += 1;
|
||||
}
|
||||
|
||||
public void incDeleteFileCnt() {
|
||||
this.filesystemDeleteFileCnt += 1;
|
||||
}
|
||||
}
|
||||
|
||||
@ -296,8 +296,11 @@ public class LocationPath {
|
||||
fsType = FileSystemType.S3;
|
||||
break;
|
||||
case COSN:
|
||||
// COSN use s3 client on FE side, because it need to complete multi-part uploading files on FE side.
|
||||
fsType = FileSystemType.S3;
|
||||
break;
|
||||
case OFS:
|
||||
// ofs:// and cosn:// use the same underlying file system: Tencent Cloud HDFS, aka CHDFS)) {
|
||||
// ofs:// use the underlying file system: Tencent Cloud HDFS, aka CHDFS)) {
|
||||
fsType = FileSystemType.OFS;
|
||||
break;
|
||||
case HDFS:
|
||||
@ -329,7 +332,11 @@ public class LocationPath {
|
||||
return null;
|
||||
}
|
||||
LocationPath locationPath = new LocationPath(location);
|
||||
switch (locationPath.getLocationType()) {
|
||||
return locationPath.getTFileTypeForBE();
|
||||
}
|
||||
|
||||
public TFileType getTFileTypeForBE() {
|
||||
switch (this.getLocationType()) {
|
||||
case S3:
|
||||
case S3A:
|
||||
case S3N:
|
||||
@ -362,7 +369,7 @@ public class LocationPath {
|
||||
*
|
||||
* @return BE scan range path
|
||||
*/
|
||||
public Path toScanRangeLocation() {
|
||||
public Path toStorageLocation() {
|
||||
switch (locationType) {
|
||||
case S3:
|
||||
case S3A:
|
||||
|
||||
@ -68,10 +68,10 @@ public class S3URI {
|
||||
public static final String SCHEME_DELIM = "://";
|
||||
public static final String PATH_DELIM = "/";
|
||||
private static final Set<String> VALID_SCHEMES = ImmutableSet.of("http", "https", "s3", "s3a", "s3n",
|
||||
"bos", "oss", "cos", "obs");
|
||||
"bos", "oss", "cos", "cosn", "obs");
|
||||
|
||||
private static final Set<String> OS_SCHEMES = ImmutableSet.of("s3", "s3a", "s3n",
|
||||
"bos", "oss", "cos", "obs");
|
||||
"bos", "oss", "cos", "cosn", "obs");
|
||||
|
||||
private URI uri;
|
||||
|
||||
|
||||
@ -22,6 +22,7 @@ import org.apache.doris.catalog.HdfsResource;
|
||||
import org.apache.doris.cluster.ClusterNamespace;
|
||||
import org.apache.doris.common.Config;
|
||||
import org.apache.doris.common.DdlException;
|
||||
import org.apache.doris.common.ThreadPoolManager;
|
||||
import org.apache.doris.common.security.authentication.AuthenticationConfig;
|
||||
import org.apache.doris.common.security.authentication.HadoopUGI;
|
||||
import org.apache.doris.datasource.CatalogProperty;
|
||||
@ -34,6 +35,8 @@ import org.apache.doris.datasource.jdbc.client.JdbcClientConfig;
|
||||
import org.apache.doris.datasource.operations.ExternalMetadataOperations;
|
||||
import org.apache.doris.datasource.property.PropertyConverter;
|
||||
import org.apache.doris.datasource.property.constants.HMSProperties;
|
||||
import org.apache.doris.fs.FileSystemProvider;
|
||||
import org.apache.doris.fs.FileSystemProviderImpl;
|
||||
import org.apache.doris.transaction.TransactionManagerFactory;
|
||||
|
||||
import com.google.common.base.Strings;
|
||||
@ -46,6 +49,7 @@ import org.apache.logging.log4j.Logger;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Objects;
|
||||
import java.util.concurrent.ThreadPoolExecutor;
|
||||
|
||||
/**
|
||||
* External catalog for hive metastore compatible data sources.
|
||||
@ -63,6 +67,9 @@ public class HMSExternalCatalog extends ExternalCatalog {
|
||||
// 0 means file cache is disabled; >0 means file cache with ttl;
|
||||
public static final int FILE_META_CACHE_TTL_DISABLE_CACHE = 0;
|
||||
|
||||
private static final int FILE_SYSTEM_EXECUTOR_THREAD_NUM = 16;
|
||||
private ThreadPoolExecutor fileSystemExecutor;
|
||||
|
||||
public HMSExternalCatalog() {
|
||||
catalogProperty = new CatalogProperty(null, null);
|
||||
}
|
||||
@ -147,7 +154,12 @@ public class HMSExternalCatalog extends ExternalCatalog {
|
||||
AuthenticationConfig.HADOOP_KERBEROS_KEYTAB));
|
||||
}
|
||||
HiveMetadataOps hiveOps = ExternalMetadataOperations.newHiveMetadataOps(hiveConf, jdbcClientConfig, this);
|
||||
transactionManager = TransactionManagerFactory.createHiveTransactionManager(hiveOps);
|
||||
FileSystemProvider fileSystemProvider = new FileSystemProviderImpl(Env.getCurrentEnv().getExtMetaCacheMgr(),
|
||||
this.bindBrokerName(), this.catalogProperty.getHadoopProperties());
|
||||
this.fileSystemExecutor = ThreadPoolManager.newDaemonFixedThreadPool(FILE_SYSTEM_EXECUTOR_THREAD_NUM,
|
||||
Integer.MAX_VALUE, String.format("hms_committer_%s_file_system_executor_pool", name), true);
|
||||
transactionManager = TransactionManagerFactory.createHiveTransactionManager(hiveOps, fileSystemProvider,
|
||||
fileSystemExecutor);
|
||||
metadataOps = hiveOps;
|
||||
}
|
||||
|
||||
|
||||
@ -23,13 +23,18 @@ package org.apache.doris.datasource.hive;
|
||||
|
||||
import org.apache.doris.backup.Status;
|
||||
import org.apache.doris.common.Pair;
|
||||
import org.apache.doris.common.UserException;
|
||||
import org.apache.doris.common.profile.SummaryProfile;
|
||||
import org.apache.doris.fs.FileSystem;
|
||||
import org.apache.doris.fs.FileSystemProvider;
|
||||
import org.apache.doris.fs.FileSystemUtil;
|
||||
import org.apache.doris.fs.remote.RemoteFile;
|
||||
import org.apache.doris.fs.remote.S3FileSystem;
|
||||
import org.apache.doris.fs.remote.SwitchingFileSystem;
|
||||
import org.apache.doris.nereids.trees.plans.commands.insert.HiveInsertCommandContext;
|
||||
import org.apache.doris.qe.ConnectContext;
|
||||
import org.apache.doris.thrift.THivePartitionUpdate;
|
||||
import org.apache.doris.thrift.TS3MPUPendingUpload;
|
||||
import org.apache.doris.thrift.TUpdateMode;
|
||||
import org.apache.doris.transaction.Transaction;
|
||||
|
||||
@ -48,6 +53,11 @@ import org.apache.hadoop.hive.metastore.api.StorageDescriptor;
|
||||
import org.apache.hadoop.hive.metastore.api.Table;
|
||||
import org.apache.logging.log4j.LogManager;
|
||||
import org.apache.logging.log4j.Logger;
|
||||
import software.amazon.awssdk.services.s3.S3Client;
|
||||
import software.amazon.awssdk.services.s3.model.AbortMultipartUploadRequest;
|
||||
import software.amazon.awssdk.services.s3.model.CompleteMultipartUploadRequest;
|
||||
import software.amazon.awssdk.services.s3.model.CompletedMultipartUpload;
|
||||
import software.amazon.awssdk.services.s3.model.CompletedPart;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
@ -79,17 +89,34 @@ public class HMSTransaction implements Transaction {
|
||||
private final Map<DatabaseTableName, Map<List<String>, Action<PartitionAndMore>>>
|
||||
partitionActions = new HashMap<>();
|
||||
|
||||
private final Executor fileSystemExecutor;
|
||||
private HmsCommitter hmsCommitter;
|
||||
private List<THivePartitionUpdate> hivePartitionUpdates = Lists.newArrayList();
|
||||
private String declaredIntentionsToWrite;
|
||||
|
||||
public HMSTransaction(HiveMetadataOps hiveOps) {
|
||||
this.hiveOps = hiveOps;
|
||||
this.fs = hiveOps.getFs();
|
||||
private static class UncompletedMpuPendingUpload {
|
||||
|
||||
private final TS3MPUPendingUpload s3MPUPendingUpload;
|
||||
private final String path;
|
||||
|
||||
public UncompletedMpuPendingUpload(TS3MPUPendingUpload s3MPUPendingUpload, String path) {
|
||||
this.s3MPUPendingUpload = s3MPUPendingUpload;
|
||||
this.path = path;
|
||||
}
|
||||
}
|
||||
|
||||
private Set<UncompletedMpuPendingUpload> uncompletedMpuPendingUploads = new HashSet<>();
|
||||
|
||||
public HMSTransaction(HiveMetadataOps hiveOps, FileSystemProvider fileSystemProvider, Executor fileSystemExecutor) {
|
||||
this.hiveOps = hiveOps;
|
||||
this.fs = fileSystemProvider.get(null);
|
||||
if (!(fs instanceof SwitchingFileSystem)) {
|
||||
throw new RuntimeException("fs should be SwitchingFileSystem");
|
||||
}
|
||||
if (ConnectContext.get().getExecutor() != null) {
|
||||
summaryProfile = Optional.of(ConnectContext.get().getExecutor().getSummaryProfile());
|
||||
}
|
||||
this.fileSystemExecutor = fileSystemExecutor;
|
||||
}
|
||||
|
||||
@Override
|
||||
@ -112,6 +139,9 @@ public class HMSTransaction implements Transaction {
|
||||
THivePartitionUpdate old = mm.get(pu.getName());
|
||||
old.setFileSize(old.getFileSize() + pu.getFileSize());
|
||||
old.setRowCount(old.getRowCount() + pu.getRowCount());
|
||||
if (old.getS3MpuPendingUploads() != null && pu.getS3MpuPendingUploads() != null) {
|
||||
old.getS3MpuPendingUploads().addAll(pu.getS3MpuPendingUploads());
|
||||
}
|
||||
old.getFileNames().addAll(pu.getFileNames());
|
||||
} else {
|
||||
mm.put(pu.getName(), pu);
|
||||
@ -136,6 +166,14 @@ public class HMSTransaction implements Transaction {
|
||||
this.dbName = dbName;
|
||||
this.tbName = tbName;
|
||||
List<THivePartitionUpdate> mergedPUs = mergePartitions(hivePartitionUpdates);
|
||||
for (THivePartitionUpdate pu : mergedPUs) {
|
||||
if (pu.getS3MpuPendingUploads() != null) {
|
||||
for (TS3MPUPendingUpload s3MPUPendingUpload : pu.getS3MpuPendingUploads()) {
|
||||
uncompletedMpuPendingUploads.add(
|
||||
new UncompletedMpuPendingUpload(s3MPUPendingUpload, pu.getLocation().getTargetPath()));
|
||||
}
|
||||
}
|
||||
}
|
||||
Table table = getTable(dbName, tbName);
|
||||
List<Pair<THivePartitionUpdate, HivePartitionStatistics>> insertExistsPartitions = new ArrayList<>();
|
||||
for (THivePartitionUpdate pu : mergedPUs) {
|
||||
@ -156,11 +194,12 @@ public class HMSTransaction implements Transaction {
|
||||
tbName,
|
||||
writePath,
|
||||
pu.getFileNames(),
|
||||
hivePartitionStatistics);
|
||||
hivePartitionStatistics,
|
||||
pu);
|
||||
break;
|
||||
case OVERWRITE:
|
||||
dropTable(dbName, tbName);
|
||||
createTable(table, writePath, pu.getFileNames(), hivePartitionStatistics);
|
||||
createTable(table, writePath, pu.getFileNames(), hivePartitionStatistics, pu);
|
||||
break;
|
||||
default:
|
||||
throw new RuntimeException("Not support mode:[" + updateMode + "] in unPartitioned table");
|
||||
@ -191,7 +230,7 @@ public class HMSTransaction implements Transaction {
|
||||
}
|
||||
addPartition(
|
||||
dbName, tbName, hivePartition, writePath,
|
||||
pu.getName(), pu.getFileNames(), hivePartitionStatistics);
|
||||
pu.getName(), pu.getFileNames(), hivePartitionStatistics, pu);
|
||||
break;
|
||||
default:
|
||||
throw new RuntimeException("Not support mode:[" + updateMode + "] in partitioned table");
|
||||
@ -351,7 +390,8 @@ public class HMSTransaction implements Transaction {
|
||||
pu.getLocation().getWritePath(),
|
||||
pu.getName(),
|
||||
pu.getFileNames(),
|
||||
updateStats
|
||||
updateStats,
|
||||
pu
|
||||
))
|
||||
);
|
||||
}
|
||||
@ -550,8 +590,8 @@ public class HMSTransaction implements Transaction {
|
||||
|
||||
|
||||
|
||||
private void recursiveDeleteItems(Path directory, boolean deleteEmptyDir) {
|
||||
DeleteRecursivelyResult deleteResult = recursiveDeleteFiles(directory, deleteEmptyDir);
|
||||
private void recursiveDeleteItems(Path directory, boolean deleteEmptyDir, boolean reverse) {
|
||||
DeleteRecursivelyResult deleteResult = recursiveDeleteFiles(directory, deleteEmptyDir, reverse);
|
||||
|
||||
if (!deleteResult.getNotDeletedEligibleItems().isEmpty()) {
|
||||
LOG.warn("Failed to delete directory {}. Some eligible items can't be deleted: {}.",
|
||||
@ -561,9 +601,9 @@ public class HMSTransaction implements Transaction {
|
||||
}
|
||||
}
|
||||
|
||||
private DeleteRecursivelyResult recursiveDeleteFiles(Path directory, boolean deleteEmptyDir) {
|
||||
private DeleteRecursivelyResult recursiveDeleteFiles(Path directory, boolean deleteEmptyDir, boolean reverse) {
|
||||
try {
|
||||
if (!fs.exists(directory.toString()).ok()) {
|
||||
if (!fs.directoryExists(directory.toString()).ok()) {
|
||||
return new DeleteRecursivelyResult(true, ImmutableList.of());
|
||||
}
|
||||
} catch (Exception e) {
|
||||
@ -572,10 +612,11 @@ public class HMSTransaction implements Transaction {
|
||||
return new DeleteRecursivelyResult(false, notDeletedEligibleItems.build());
|
||||
}
|
||||
|
||||
return doRecursiveDeleteFiles(directory, deleteEmptyDir, queryId);
|
||||
return doRecursiveDeleteFiles(directory, deleteEmptyDir, queryId, reverse);
|
||||
}
|
||||
|
||||
private DeleteRecursivelyResult doRecursiveDeleteFiles(Path directory, boolean deleteEmptyDir, String queryId) {
|
||||
private DeleteRecursivelyResult doRecursiveDeleteFiles(Path directory, boolean deleteEmptyDir,
|
||||
String queryId, boolean reverse) {
|
||||
List<RemoteFile> allFiles = new ArrayList<>();
|
||||
Set<String> allDirs = new HashSet<>();
|
||||
Status statusFile = fs.listFiles(directory.toString(), true, allFiles);
|
||||
@ -589,7 +630,7 @@ public class HMSTransaction implements Transaction {
|
||||
boolean allDescendentsDeleted = true;
|
||||
ImmutableList.Builder<String> notDeletedEligibleItems = ImmutableList.builder();
|
||||
for (RemoteFile file : allFiles) {
|
||||
if (file.getName().startsWith(queryId)) {
|
||||
if (reverse ^ file.getName().startsWith(queryId)) {
|
||||
if (!deleteIfExists(file.getPath())) {
|
||||
allDescendentsDeleted = false;
|
||||
notDeletedEligibleItems.add(file.getPath().toString());
|
||||
@ -600,7 +641,7 @@ public class HMSTransaction implements Transaction {
|
||||
}
|
||||
|
||||
for (String dir : allDirs) {
|
||||
DeleteRecursivelyResult subResult = doRecursiveDeleteFiles(new Path(dir), deleteEmptyDir, queryId);
|
||||
DeleteRecursivelyResult subResult = doRecursiveDeleteFiles(new Path(dir), deleteEmptyDir, queryId, reverse);
|
||||
if (!subResult.dirNotExists()) {
|
||||
allDescendentsDeleted = false;
|
||||
}
|
||||
@ -611,7 +652,7 @@ public class HMSTransaction implements Transaction {
|
||||
|
||||
if (allDescendentsDeleted && deleteEmptyDir) {
|
||||
Verify.verify(notDeletedEligibleItems.build().isEmpty());
|
||||
if (!deleteIfExists(directory)) {
|
||||
if (!deleteDirectoryIfExists(directory)) {
|
||||
return new DeleteRecursivelyResult(false, ImmutableList.of(directory + "/"));
|
||||
}
|
||||
// all items of the location have been deleted.
|
||||
@ -628,6 +669,14 @@ public class HMSTransaction implements Transaction {
|
||||
return !fs.exists(path.toString()).ok();
|
||||
}
|
||||
|
||||
public boolean deleteDirectoryIfExists(Path path) {
|
||||
Status status = wrapperDeleteDirWithProfileSummary(path.toString());
|
||||
if (status.ok()) {
|
||||
return true;
|
||||
}
|
||||
return !fs.directoryExists(path.toString()).ok();
|
||||
}
|
||||
|
||||
public static class DatabaseTableName {
|
||||
private final String dbName;
|
||||
private final String tbName;
|
||||
@ -676,15 +725,19 @@ public class HMSTransaction implements Transaction {
|
||||
private final List<String> fileNames;
|
||||
private final HivePartitionStatistics statisticsUpdate;
|
||||
|
||||
private final THivePartitionUpdate hivePartitionUpdate;
|
||||
|
||||
public TableAndMore(
|
||||
Table table,
|
||||
String currentLocation,
|
||||
List<String> fileNames,
|
||||
HivePartitionStatistics statisticsUpdate) {
|
||||
HivePartitionStatistics statisticsUpdate,
|
||||
THivePartitionUpdate hivePartitionUpdate) {
|
||||
this.table = Objects.requireNonNull(table, "table is null");
|
||||
this.currentLocation = Objects.requireNonNull(currentLocation);
|
||||
this.fileNames = Objects.requireNonNull(fileNames);
|
||||
this.statisticsUpdate = Objects.requireNonNull(statisticsUpdate, "statisticsUpdate is null");
|
||||
this.hivePartitionUpdate = Objects.requireNonNull(hivePartitionUpdate, "hivePartitionUpdate is null");
|
||||
}
|
||||
|
||||
public Table getTable() {
|
||||
@ -703,6 +756,10 @@ public class HMSTransaction implements Transaction {
|
||||
return statisticsUpdate;
|
||||
}
|
||||
|
||||
public THivePartitionUpdate getHivePartitionUpdate() {
|
||||
return hivePartitionUpdate;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return MoreObjects.toStringHelper(this)
|
||||
@ -719,17 +776,22 @@ public class HMSTransaction implements Transaction {
|
||||
private final List<String> fileNames;
|
||||
private final HivePartitionStatistics statisticsUpdate;
|
||||
|
||||
private final THivePartitionUpdate hivePartitionUpdate;
|
||||
|
||||
|
||||
public PartitionAndMore(
|
||||
HivePartition partition,
|
||||
String currentLocation,
|
||||
String partitionName,
|
||||
List<String> fileNames,
|
||||
HivePartitionStatistics statisticsUpdate) {
|
||||
HivePartitionStatistics statisticsUpdate,
|
||||
THivePartitionUpdate hivePartitionUpdate) {
|
||||
this.partition = Objects.requireNonNull(partition, "partition is null");
|
||||
this.currentLocation = Objects.requireNonNull(currentLocation, "currentLocation is null");
|
||||
this.partitionName = Objects.requireNonNull(partitionName, "partition is null");
|
||||
this.fileNames = Objects.requireNonNull(fileNames, "fileNames is null");
|
||||
this.statisticsUpdate = Objects.requireNonNull(statisticsUpdate, "statisticsUpdate is null");
|
||||
this.hivePartitionUpdate = Objects.requireNonNull(hivePartitionUpdate, "hivePartitionUpdate is null");
|
||||
}
|
||||
|
||||
public HivePartition getPartition() {
|
||||
@ -752,6 +814,10 @@ public class HMSTransaction implements Transaction {
|
||||
return statisticsUpdate;
|
||||
}
|
||||
|
||||
public THivePartitionUpdate getHivePartitionUpdate() {
|
||||
return hivePartitionUpdate;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return MoreObjects.toStringHelper(this)
|
||||
@ -835,7 +901,8 @@ public class HMSTransaction implements Transaction {
|
||||
String tableName,
|
||||
String location,
|
||||
List<String> fileNames,
|
||||
HivePartitionStatistics statisticsUpdate) {
|
||||
HivePartitionStatistics statisticsUpdate,
|
||||
THivePartitionUpdate hivePartitionUpdate) {
|
||||
DatabaseTableName databaseTableName = new DatabaseTableName(databaseName, tableName);
|
||||
Action<TableAndMore> oldTableAction = tableActions.get(databaseTableName);
|
||||
if (oldTableAction == null) {
|
||||
@ -843,12 +910,13 @@ public class HMSTransaction implements Transaction {
|
||||
tableActions.put(
|
||||
databaseTableName,
|
||||
new Action<>(
|
||||
actionType,
|
||||
actionType,
|
||||
new TableAndMore(
|
||||
table,
|
||||
location,
|
||||
fileNames,
|
||||
statisticsUpdate)));
|
||||
table,
|
||||
location,
|
||||
fileNames,
|
||||
statisticsUpdate,
|
||||
hivePartitionUpdate)));
|
||||
return;
|
||||
}
|
||||
|
||||
@ -870,12 +938,13 @@ public class HMSTransaction implements Transaction {
|
||||
}
|
||||
|
||||
public synchronized void createTable(
|
||||
Table table, String location, List<String> fileNames, HivePartitionStatistics statistics) {
|
||||
Table table, String location, List<String> fileNames, HivePartitionStatistics statistics,
|
||||
THivePartitionUpdate hivePartitionUpdate) {
|
||||
// When creating a table, it should never have partition actions. This is just a sanity check.
|
||||
checkNoPartitionAction(dbName, tbName);
|
||||
DatabaseTableName databaseTableName = new DatabaseTableName(dbName, tbName);
|
||||
Action<TableAndMore> oldTableAction = tableActions.get(databaseTableName);
|
||||
TableAndMore tableAndMore = new TableAndMore(table, location, fileNames, statistics);
|
||||
TableAndMore tableAndMore = new TableAndMore(table, location, fileNames, statistics, hivePartitionUpdate);
|
||||
if (oldTableAction == null) {
|
||||
tableActions.put(databaseTableName, new Action<>(ActionType.ADD, tableAndMore));
|
||||
return;
|
||||
@ -939,7 +1008,8 @@ public class HMSTransaction implements Transaction {
|
||||
String currentLocation,
|
||||
String partitionName,
|
||||
List<String> files,
|
||||
HivePartitionStatistics statistics) {
|
||||
HivePartitionStatistics statistics,
|
||||
THivePartitionUpdate hivePartitionUpdate) {
|
||||
Map<List<String>, Action<PartitionAndMore>> partitionActionsForTable =
|
||||
partitionActions.computeIfAbsent(new DatabaseTableName(databaseName, tableName), k -> new HashMap<>());
|
||||
Action<PartitionAndMore> oldPartitionAction = partitionActionsForTable.get(partition.getPartitionValues());
|
||||
@ -948,7 +1018,8 @@ public class HMSTransaction implements Transaction {
|
||||
partition.getPartitionValues(),
|
||||
new Action<>(
|
||||
ActionType.ADD,
|
||||
new PartitionAndMore(partition, currentLocation, partitionName, files, statistics))
|
||||
new PartitionAndMore(partition, currentLocation, partitionName, files, statistics,
|
||||
hivePartitionUpdate))
|
||||
);
|
||||
return;
|
||||
}
|
||||
@ -959,7 +1030,8 @@ public class HMSTransaction implements Transaction {
|
||||
partition.getPartitionValues(),
|
||||
new Action<>(
|
||||
ActionType.ALTER,
|
||||
new PartitionAndMore(partition, currentLocation, partitionName, files, statistics))
|
||||
new PartitionAndMore(partition, currentLocation, partitionName, files, statistics,
|
||||
hivePartitionUpdate))
|
||||
);
|
||||
return;
|
||||
case ADD:
|
||||
@ -1029,7 +1101,8 @@ public class HMSTransaction implements Transaction {
|
||||
private final List<RenameDirectoryTask> renameDirectoryTasksForAbort = new ArrayList<>();
|
||||
// when finished, we need clear some directories
|
||||
private final List<String> clearDirsForFinish = new ArrayList<>();
|
||||
Executor fileSystemExecutor = Executors.newFixedThreadPool(16);
|
||||
|
||||
private final List<String> s3cleanWhenSuccess = new ArrayList<>();
|
||||
|
||||
public void cancelUnStartedAsyncFileSystemTask() {
|
||||
fileSystemTaskCancelled.set(true);
|
||||
@ -1091,15 +1164,20 @@ public class HMSTransaction implements Transaction {
|
||||
writePath,
|
||||
targetPath,
|
||||
tableAndMore.getFileNames());
|
||||
} else {
|
||||
if (!tableAndMore.hivePartitionUpdate.s3_mpu_pending_uploads.isEmpty()) {
|
||||
s3Commit(fileSystemExecutor, asyncFileSystemTaskFutures, fileSystemTaskCancelled,
|
||||
tableAndMore.hivePartitionUpdate, targetPath);
|
||||
}
|
||||
}
|
||||
directoryCleanUpTasksForAbort.add(new DirectoryCleanUpTask(targetPath, false));
|
||||
updateStatisticsTasks.add(
|
||||
new UpdateStatisticsTask(
|
||||
dbName,
|
||||
tbName,
|
||||
Optional.empty(),
|
||||
tableAndMore.getStatisticsUpdate(),
|
||||
true
|
||||
new UpdateStatisticsTask(
|
||||
dbName,
|
||||
tbName,
|
||||
Optional.empty(),
|
||||
tableAndMore.getStatisticsUpdate(),
|
||||
true
|
||||
));
|
||||
}
|
||||
|
||||
@ -1129,6 +1207,12 @@ public class HMSTransaction implements Transaction {
|
||||
throw new RuntimeException(
|
||||
"Error to rename dir from " + writePath + " to " + targetPath + ":" + status.getErrMsg());
|
||||
}
|
||||
} else {
|
||||
if (!tableAndMore.hivePartitionUpdate.s3_mpu_pending_uploads.isEmpty()) {
|
||||
s3cleanWhenSuccess.add(targetPath);
|
||||
s3Commit(fileSystemExecutor, asyncFileSystemTaskFutures, fileSystemTaskCancelled,
|
||||
tableAndMore.hivePartitionUpdate, targetPath);
|
||||
}
|
||||
}
|
||||
updateStatisticsTasks.add(
|
||||
new UpdateStatisticsTask(
|
||||
@ -1154,6 +1238,11 @@ public class HMSTransaction implements Transaction {
|
||||
writePath,
|
||||
targetPath,
|
||||
() -> directoryCleanUpTasksForAbort.add(new DirectoryCleanUpTask(targetPath, true)));
|
||||
} else {
|
||||
if (!partitionAndMore.hivePartitionUpdate.s3_mpu_pending_uploads.isEmpty()) {
|
||||
s3Commit(fileSystemExecutor, asyncFileSystemTaskFutures, fileSystemTaskCancelled,
|
||||
partitionAndMore.hivePartitionUpdate, targetPath);
|
||||
}
|
||||
}
|
||||
|
||||
StorageDescriptor sd = getTable(dbName, tbName).getSd();
|
||||
@ -1194,6 +1283,11 @@ public class HMSTransaction implements Transaction {
|
||||
writePath,
|
||||
targetPath,
|
||||
partitionAndMore.getFileNames());
|
||||
} else {
|
||||
if (!partitionAndMore.hivePartitionUpdate.s3_mpu_pending_uploads.isEmpty()) {
|
||||
s3Commit(fileSystemExecutor, asyncFileSystemTaskFutures, fileSystemTaskCancelled,
|
||||
partitionAndMore.hivePartitionUpdate, targetPath);
|
||||
}
|
||||
}
|
||||
|
||||
updateStatisticsTasks.add(
|
||||
@ -1207,7 +1301,7 @@ public class HMSTransaction implements Transaction {
|
||||
|
||||
private void runDirectoryClearUpTasksForAbort() {
|
||||
for (DirectoryCleanUpTask cleanUpTask : directoryCleanUpTasksForAbort) {
|
||||
recursiveDeleteItems(cleanUpTask.getPath(), cleanUpTask.isDeleteEmptyDir());
|
||||
recursiveDeleteItems(cleanUpTask.getPath(), cleanUpTask.isDeleteEmptyDir(), false);
|
||||
}
|
||||
}
|
||||
|
||||
@ -1228,13 +1322,19 @@ public class HMSTransaction implements Transaction {
|
||||
private void runClearPathsForFinish() {
|
||||
Status status;
|
||||
for (String path : clearDirsForFinish) {
|
||||
status = wrapperDeleteWithProfileSummary(path);
|
||||
status = wrapperDeleteDirWithProfileSummary(path);
|
||||
if (!status.ok()) {
|
||||
LOG.warn("Failed to recursively delete path {}:{}", path, status.getErrCode());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private void runS3cleanWhenSuccess() {
|
||||
for (String path : s3cleanWhenSuccess) {
|
||||
recursiveDeleteItems(new Path(path), false, true);
|
||||
}
|
||||
}
|
||||
|
||||
public void prepareAlterPartition(PartitionAndMore partitionAndMore) {
|
||||
HivePartition partition = partitionAndMore.getPartition();
|
||||
String targetPath = partition.getPath();
|
||||
@ -1263,6 +1363,12 @@ public class HMSTransaction implements Transaction {
|
||||
throw new RuntimeException(
|
||||
"Error to rename dir from " + writePath + " to " + targetPath + ":" + status.getErrMsg());
|
||||
}
|
||||
} else {
|
||||
if (!partitionAndMore.hivePartitionUpdate.s3_mpu_pending_uploads.isEmpty()) {
|
||||
s3cleanWhenSuccess.add(targetPath);
|
||||
s3Commit(fileSystemExecutor, asyncFileSystemTaskFutures, fileSystemTaskCancelled,
|
||||
partitionAndMore.hivePartitionUpdate, targetPath);
|
||||
}
|
||||
}
|
||||
|
||||
updateStatisticsTasks.add(
|
||||
@ -1337,8 +1443,32 @@ public class HMSTransaction implements Transaction {
|
||||
summaryProfile.ifPresent(SummaryProfile::setHmsUpdatePartitionTime);
|
||||
}
|
||||
|
||||
public void pruneAndDeleteStagingDirectories() {
|
||||
recursiveDeleteItems(new Path(declaredIntentionsToWrite), true);
|
||||
private void pruneAndDeleteStagingDirectories() {
|
||||
recursiveDeleteItems(new Path(declaredIntentionsToWrite), true, false);
|
||||
}
|
||||
|
||||
private void abortMultiUploads() {
|
||||
if (uncompletedMpuPendingUploads.isEmpty()) {
|
||||
return;
|
||||
}
|
||||
for (UncompletedMpuPendingUpload uncompletedMpuPendingUpload : uncompletedMpuPendingUploads) {
|
||||
S3FileSystem s3FileSystem = (S3FileSystem) ((SwitchingFileSystem) fs)
|
||||
.fileSystem(uncompletedMpuPendingUpload.path);
|
||||
|
||||
S3Client s3Client;
|
||||
try {
|
||||
s3Client = (S3Client) s3FileSystem.getObjStorage().getClient();
|
||||
} catch (UserException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
asyncFileSystemTaskFutures.add(CompletableFuture.runAsync(() -> {
|
||||
s3Client.abortMultipartUpload(AbortMultipartUploadRequest.builder()
|
||||
.bucket(uncompletedMpuPendingUpload.s3MPUPendingUpload.getBucket())
|
||||
.key(uncompletedMpuPendingUpload.s3MPUPendingUpload.getKey())
|
||||
.uploadId(uncompletedMpuPendingUpload.s3MPUPendingUpload.getUploadId())
|
||||
.build());
|
||||
}, fileSystemExecutor));
|
||||
}
|
||||
}
|
||||
|
||||
public void doNothing() {
|
||||
@ -1348,6 +1478,7 @@ public class HMSTransaction implements Transaction {
|
||||
|
||||
public void doCommit() {
|
||||
waitForAsyncFileSystemTasks();
|
||||
runS3cleanWhenSuccess();
|
||||
doAddPartitionsTask();
|
||||
doUpdateStatisticsTasks();
|
||||
doNothing();
|
||||
@ -1365,6 +1496,11 @@ public class HMSTransaction implements Transaction {
|
||||
public void rollback() {
|
||||
//delete write path
|
||||
pruneAndDeleteStagingDirectories();
|
||||
// abort the in-progress multipart uploads
|
||||
abortMultiUploads();
|
||||
for (CompletableFuture<?> future : asyncFileSystemTaskFutures) {
|
||||
MoreFutures.getFutureValue(future, RuntimeException.class);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -1385,7 +1521,7 @@ public class HMSTransaction implements Transaction {
|
||||
public Status wrapperDeleteWithProfileSummary(String remotePath) {
|
||||
summaryProfile.ifPresent(profile -> {
|
||||
profile.setTempStartTime();
|
||||
profile.incDeleteDirRecursiveCnt();
|
||||
profile.incDeleteFileCnt();
|
||||
});
|
||||
|
||||
Status status = fs.delete(remotePath);
|
||||
@ -1394,6 +1530,18 @@ public class HMSTransaction implements Transaction {
|
||||
return status;
|
||||
}
|
||||
|
||||
public Status wrapperDeleteDirWithProfileSummary(String remotePath) {
|
||||
summaryProfile.ifPresent(profile -> {
|
||||
profile.setTempStartTime();
|
||||
profile.incDeleteDirRecursiveCnt();
|
||||
});
|
||||
|
||||
Status status = fs.deleteDirectory(remotePath);
|
||||
|
||||
summaryProfile.ifPresent(SummaryProfile::freshFilesystemOptTime);
|
||||
return status;
|
||||
}
|
||||
|
||||
public void wrapperAsyncRenameWithProfileSummary(Executor executor,
|
||||
List<CompletableFuture<?>> renameFileFutures,
|
||||
AtomicBoolean cancelled,
|
||||
@ -1415,4 +1563,37 @@ public class HMSTransaction implements Transaction {
|
||||
fs, executor, renameFileFutures, cancelled, origFilePath, destFilePath, runWhenPathNotExist);
|
||||
summaryProfile.ifPresent(SummaryProfile::incRenameDirCnt);
|
||||
}
|
||||
|
||||
private void s3Commit(Executor fileSystemExecutor, List<CompletableFuture<?>> asyncFileSystemTaskFutures,
|
||||
AtomicBoolean fileSystemTaskCancelled, THivePartitionUpdate hivePartitionUpdate, String path) {
|
||||
S3FileSystem s3FileSystem = (S3FileSystem) ((SwitchingFileSystem) fs).fileSystem(path);
|
||||
S3Client s3Client;
|
||||
try {
|
||||
s3Client = (S3Client) s3FileSystem.getObjStorage().getClient();
|
||||
} catch (UserException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
|
||||
for (TS3MPUPendingUpload s3MPUPendingUpload : hivePartitionUpdate.getS3MpuPendingUploads()) {
|
||||
asyncFileSystemTaskFutures.add(CompletableFuture.runAsync(() -> {
|
||||
if (fileSystemTaskCancelled.get()) {
|
||||
return;
|
||||
}
|
||||
List<CompletedPart> completedParts = Lists.newArrayList();
|
||||
for (Map.Entry<Integer, String> entry : s3MPUPendingUpload.getEtags().entrySet()) {
|
||||
completedParts.add(CompletedPart.builder().eTag(entry.getValue()).partNumber(entry.getKey())
|
||||
.build());
|
||||
}
|
||||
|
||||
s3Client.completeMultipartUpload(CompleteMultipartUploadRequest.builder()
|
||||
.bucket(s3MPUPendingUpload.getBucket())
|
||||
.key(s3MPUPendingUpload.getKey())
|
||||
.uploadId(s3MPUPendingUpload.getUploadId())
|
||||
.multipartUpload(CompletedMultipartUpload.builder().parts(completedParts).build())
|
||||
.build());
|
||||
uncompletedMpuPendingUploads.remove(new UncompletedMpuPendingUpload(s3MPUPendingUpload, path));
|
||||
}, fileSystemExecutor));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@ -349,9 +349,11 @@ public class HiveMetaStoreCache {
|
||||
List<String> partitionValues,
|
||||
String bindBrokerName) throws UserException {
|
||||
FileCacheValue result = new FileCacheValue();
|
||||
Map<String, String> properties = new HashMap<>();
|
||||
jobConf.iterator().forEachRemaining(e -> properties.put(e.getKey(), e.getValue()));
|
||||
RemoteFileSystem fs = Env.getCurrentEnv().getExtMetaCacheMgr().getFsCache().getRemoteFileSystem(
|
||||
new FileSystemCache.FileSystemCacheKey(LocationPath.getFSIdentity(
|
||||
location, bindBrokerName), jobConf, bindBrokerName));
|
||||
location, bindBrokerName), properties, bindBrokerName));
|
||||
result.setSplittable(HiveUtil.isSplittable(fs, inputFormat, location, jobConf));
|
||||
// For Tez engine, it may generate subdirectoies for "union" query.
|
||||
// So there may be files and directories in the table directory at the same time. eg:
|
||||
@ -366,7 +368,7 @@ public class HiveMetaStoreCache {
|
||||
for (RemoteFile remoteFile : remoteFiles) {
|
||||
String srcPath = remoteFile.getPath().toString();
|
||||
LocationPath locationPath = new LocationPath(srcPath, catalog.getProperties());
|
||||
Path convertedPath = locationPath.toScanRangeLocation();
|
||||
Path convertedPath = locationPath.toStorageLocation();
|
||||
if (!convertedPath.toString().equals(srcPath)) {
|
||||
remoteFile.setPath(convertedPath);
|
||||
}
|
||||
@ -777,10 +779,12 @@ public class HiveMetaStoreCache {
|
||||
return Collections.emptyList();
|
||||
}
|
||||
String acidVersionPath = new Path(baseOrDeltaPath, "_orc_acid_version").toUri().toString();
|
||||
Map<String, String> properties = new HashMap<>();
|
||||
jobConf.iterator().forEachRemaining(e -> properties.put(e.getKey(), e.getValue()));
|
||||
RemoteFileSystem fs = Env.getCurrentEnv().getExtMetaCacheMgr().getFsCache().getRemoteFileSystem(
|
||||
new FileSystemCache.FileSystemCacheKey(
|
||||
LocationPath.getFSIdentity(baseOrDeltaPath.toUri().toString(),
|
||||
bindBrokerName), jobConf, bindBrokerName));
|
||||
bindBrokerName), properties, bindBrokerName));
|
||||
Status status = fs.exists(acidVersionPath);
|
||||
if (status != Status.OK) {
|
||||
if (status.getErrCode() == ErrCode.NOT_FOUND) {
|
||||
@ -800,10 +804,12 @@ public class HiveMetaStoreCache {
|
||||
List<DeleteDeltaInfo> deleteDeltas = new ArrayList<>();
|
||||
for (AcidUtils.ParsedDelta delta : directory.getCurrentDirectories()) {
|
||||
String location = delta.getPath().toString();
|
||||
Map<String, String> properties = new HashMap<>();
|
||||
jobConf.iterator().forEachRemaining(e -> properties.put(e.getKey(), e.getValue()));
|
||||
RemoteFileSystem fs = Env.getCurrentEnv().getExtMetaCacheMgr().getFsCache().getRemoteFileSystem(
|
||||
new FileSystemCache.FileSystemCacheKey(
|
||||
LocationPath.getFSIdentity(location, bindBrokerName),
|
||||
jobConf, bindBrokerName));
|
||||
properties, bindBrokerName));
|
||||
List<RemoteFile> remoteFiles = new ArrayList<>();
|
||||
Status status = fs.listFiles(location, false, remoteFiles);
|
||||
if (status.ok()) {
|
||||
@ -825,10 +831,12 @@ public class HiveMetaStoreCache {
|
||||
// base
|
||||
if (directory.getBaseDirectory() != null) {
|
||||
String location = directory.getBaseDirectory().toString();
|
||||
Map<String, String> properties = new HashMap<>();
|
||||
jobConf.iterator().forEachRemaining(e -> properties.put(e.getKey(), e.getValue()));
|
||||
RemoteFileSystem fs = Env.getCurrentEnv().getExtMetaCacheMgr().getFsCache().getRemoteFileSystem(
|
||||
new FileSystemCache.FileSystemCacheKey(
|
||||
LocationPath.getFSIdentity(location, bindBrokerName),
|
||||
jobConf, bindBrokerName));
|
||||
properties, bindBrokerName));
|
||||
List<RemoteFile> remoteFiles = new ArrayList<>();
|
||||
Status status = fs.listFiles(location, false, remoteFiles);
|
||||
if (status.ok()) {
|
||||
|
||||
@ -36,8 +36,6 @@ import org.apache.doris.datasource.ExternalDatabase;
|
||||
import org.apache.doris.datasource.jdbc.client.JdbcClient;
|
||||
import org.apache.doris.datasource.jdbc.client.JdbcClientConfig;
|
||||
import org.apache.doris.datasource.operations.ExternalMetadataOps;
|
||||
import org.apache.doris.fs.FileSystem;
|
||||
import org.apache.doris.fs.remote.dfs.DFSFileSystem;
|
||||
|
||||
import com.google.common.annotations.VisibleForTesting;
|
||||
import com.google.common.base.Preconditions;
|
||||
@ -61,7 +59,6 @@ public class HiveMetadataOps implements ExternalMetadataOps {
|
||||
private static final Logger LOG = LogManager.getLogger(HiveMetadataOps.class);
|
||||
private static final int MIN_CLIENT_POOL_SIZE = 8;
|
||||
private final HMSCachedClient client;
|
||||
private final FileSystem fs;
|
||||
private final HMSExternalCatalog catalog;
|
||||
|
||||
public HiveMetadataOps(HiveConf hiveConf, JdbcClientConfig jdbcClientConfig, HMSExternalCatalog catalog) {
|
||||
@ -74,24 +71,14 @@ public class HiveMetadataOps implements ExternalMetadataOps {
|
||||
public HiveMetadataOps(HMSExternalCatalog catalog, HMSCachedClient client) {
|
||||
this.catalog = catalog;
|
||||
this.client = client;
|
||||
// TODO Currently only supports DFSFileSystem, more types will be supported in the future
|
||||
this.fs = new DFSFileSystem(catalog.getProperties());
|
||||
}
|
||||
|
||||
@VisibleForTesting
|
||||
public HiveMetadataOps(HMSExternalCatalog catalog, HMSCachedClient client, FileSystem fs) {
|
||||
this.catalog = catalog;
|
||||
this.client = client;
|
||||
this.fs = fs;
|
||||
}
|
||||
|
||||
|
||||
public HMSCachedClient getClient() {
|
||||
return client;
|
||||
}
|
||||
|
||||
public FileSystem getFs() {
|
||||
return fs;
|
||||
public HMSExternalCatalog getCatalog() {
|
||||
return catalog;
|
||||
}
|
||||
|
||||
public static HMSCachedClient createCachedClient(HiveConf hiveConf, int thriftClientPoolSize,
|
||||
|
||||
@ -354,7 +354,7 @@ public class HudiScanNode extends HiveScanNode {
|
||||
long fileSize = baseFile.getFileSize();
|
||||
// Need add hdfs host to location
|
||||
LocationPath locationPath = new LocationPath(filePath, hmsTable.getCatalogProperties());
|
||||
Path splitFilePath = locationPath.toScanRangeLocation();
|
||||
Path splitFilePath = locationPath.toStorageLocation();
|
||||
splits.add(new FileSplit(splitFilePath, 0, fileSize, fileSize,
|
||||
new String[0], partition.getPartitionValues()));
|
||||
});
|
||||
|
||||
@ -150,7 +150,7 @@ public class IcebergScanNode extends FileQueryScanNode {
|
||||
TIcebergDeleteFileDesc deleteFileDesc = new TIcebergDeleteFileDesc();
|
||||
String deleteFilePath = filter.getDeleteFilePath();
|
||||
LocationPath locationPath = new LocationPath(deleteFilePath, icebergSplit.getConfig());
|
||||
Path splitDeletePath = locationPath.toScanRangeLocation();
|
||||
Path splitDeletePath = locationPath.toStorageLocation();
|
||||
deleteFileDesc.setPath(splitDeletePath.toString());
|
||||
if (filter instanceof IcebergDeleteFileFilter.PositionDelete) {
|
||||
fileDesc.setContent(FileContent.POSITION_DELETES.id());
|
||||
@ -244,7 +244,7 @@ public class IcebergScanNode extends FileQueryScanNode {
|
||||
partitionPathSet.add(structLike.toString());
|
||||
}
|
||||
LocationPath locationPath = new LocationPath(dataFilePath, source.getCatalog().getProperties());
|
||||
Path finalDataFilePath = locationPath.toScanRangeLocation();
|
||||
Path finalDataFilePath = locationPath.toStorageLocation();
|
||||
IcebergSplit split = new IcebergSplit(
|
||||
finalDataFilePath,
|
||||
splitTask.start(),
|
||||
|
||||
@ -161,7 +161,7 @@ public class PaimonScanNode extends FileQueryScanNode {
|
||||
List<RawFile> rawFiles = optRawFiles.get();
|
||||
for (RawFile file : rawFiles) {
|
||||
LocationPath locationPath = new LocationPath(file.path(), source.getCatalog().getProperties());
|
||||
Path finalDataFilePath = locationPath.toScanRangeLocation();
|
||||
Path finalDataFilePath = locationPath.toStorageLocation();
|
||||
try {
|
||||
splits.addAll(
|
||||
splitFile(
|
||||
|
||||
@ -38,6 +38,10 @@ public interface FileSystem {
|
||||
|
||||
Status exists(String remotePath);
|
||||
|
||||
default Status directoryExists(String dir) {
|
||||
return exists(dir);
|
||||
}
|
||||
|
||||
Status downloadWithFileSize(String remoteFilePath, String localFilePath, long fileSize);
|
||||
|
||||
Status upload(String localPath, String remotePath);
|
||||
@ -58,6 +62,10 @@ public interface FileSystem {
|
||||
|
||||
Status delete(String remotePath);
|
||||
|
||||
default Status deleteDirectory(String dir) {
|
||||
return delete(dir);
|
||||
}
|
||||
|
||||
Status makeDir(String remotePath);
|
||||
|
||||
Status listFiles(String remotePath, boolean recursive, List<RemoteFile> result);
|
||||
|
||||
@ -23,8 +23,8 @@ import org.apache.doris.common.Pair;
|
||||
import org.apache.doris.fs.remote.RemoteFileSystem;
|
||||
|
||||
import com.github.benmanes.caffeine.cache.LoadingCache;
|
||||
import org.apache.hadoop.mapred.JobConf;
|
||||
|
||||
import java.util.Map;
|
||||
import java.util.Objects;
|
||||
import java.util.OptionalLong;
|
||||
|
||||
@ -44,7 +44,7 @@ public class FileSystemCache {
|
||||
}
|
||||
|
||||
private RemoteFileSystem loadFileSystem(FileSystemCacheKey key) {
|
||||
return FileSystemFactory.getRemoteFileSystem(key.type, key.conf, key.bindBrokerName);
|
||||
return FileSystemFactory.getRemoteFileSystem(key.type, key.properties, key.bindBrokerName);
|
||||
}
|
||||
|
||||
public RemoteFileSystem getRemoteFileSystem(FileSystemCacheKey key) {
|
||||
@ -55,13 +55,14 @@ public class FileSystemCache {
|
||||
private final FileSystemType type;
|
||||
// eg: hdfs://nameservices1
|
||||
private final String fsIdent;
|
||||
private final JobConf conf;
|
||||
private final Map<String, String> properties;
|
||||
private final String bindBrokerName;
|
||||
|
||||
public FileSystemCacheKey(Pair<FileSystemType, String> fs, JobConf conf, String bindBrokerName) {
|
||||
public FileSystemCacheKey(Pair<FileSystemType, String> fs,
|
||||
Map<String, String> properties, String bindBrokerName) {
|
||||
this.type = fs.first;
|
||||
this.fsIdent = fs.second;
|
||||
this.conf = conf;
|
||||
this.properties = properties;
|
||||
this.bindBrokerName = bindBrokerName;
|
||||
}
|
||||
|
||||
@ -75,7 +76,7 @@ public class FileSystemCache {
|
||||
}
|
||||
boolean equalsWithoutBroker = type.equals(((FileSystemCacheKey) obj).type)
|
||||
&& fsIdent.equals(((FileSystemCacheKey) obj).fsIdent)
|
||||
&& conf == ((FileSystemCacheKey) obj).conf;
|
||||
&& properties == ((FileSystemCacheKey) obj).properties;
|
||||
if (bindBrokerName == null) {
|
||||
return equalsWithoutBroker;
|
||||
}
|
||||
@ -85,9 +86,9 @@ public class FileSystemCache {
|
||||
@Override
|
||||
public int hashCode() {
|
||||
if (bindBrokerName == null) {
|
||||
return Objects.hash(conf, fsIdent, type);
|
||||
return Objects.hash(properties, fsIdent, type);
|
||||
}
|
||||
return Objects.hash(conf, fsIdent, type, bindBrokerName);
|
||||
return Objects.hash(properties, fsIdent, type, bindBrokerName);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -29,7 +29,6 @@ import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
|
||||
public class FileSystemFactory {
|
||||
@ -51,10 +50,8 @@ public class FileSystemFactory {
|
||||
}
|
||||
}
|
||||
|
||||
public static RemoteFileSystem getRemoteFileSystem(FileSystemType type, Configuration conf,
|
||||
public static RemoteFileSystem getRemoteFileSystem(FileSystemType type, Map<String, String> properties,
|
||||
String bindBrokerName) {
|
||||
Map<String, String> properties = new HashMap<>();
|
||||
conf.iterator().forEachRemaining(e -> properties.put(e.getKey(), e.getValue()));
|
||||
switch (type) {
|
||||
case S3:
|
||||
return new S3FileSystem(properties);
|
||||
|
||||
@ -0,0 +1,24 @@
|
||||
// Licensed to the Apache Software Foundation (ASF) under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing,
|
||||
// software distributed under the License is distributed on an
|
||||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
package org.apache.doris.fs;
|
||||
|
||||
import org.apache.doris.datasource.SessionContext;
|
||||
|
||||
public interface FileSystemProvider {
|
||||
FileSystem get(SessionContext ctx);
|
||||
}
|
||||
@ -0,0 +1,43 @@
|
||||
// Licensed to the Apache Software Foundation (ASF) under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing,
|
||||
// software distributed under the License is distributed on an
|
||||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
package org.apache.doris.fs;
|
||||
|
||||
import org.apache.doris.datasource.ExternalMetaCacheMgr;
|
||||
import org.apache.doris.datasource.SessionContext;
|
||||
import org.apache.doris.fs.remote.SwitchingFileSystem;
|
||||
|
||||
import java.util.Map;
|
||||
|
||||
public class FileSystemProviderImpl implements FileSystemProvider {
|
||||
private ExternalMetaCacheMgr extMetaCacheMgr;
|
||||
private String bindBrokerName;
|
||||
|
||||
private Map<String, String> properties;
|
||||
|
||||
public FileSystemProviderImpl(ExternalMetaCacheMgr extMetaCacheMgr, String bindBrokerName,
|
||||
Map<String, String> properties) {
|
||||
this.extMetaCacheMgr = extMetaCacheMgr;
|
||||
this.bindBrokerName = bindBrokerName;
|
||||
this.properties = properties;
|
||||
}
|
||||
|
||||
@Override
|
||||
public FileSystem get(SessionContext ctx) {
|
||||
return new SwitchingFileSystem(extMetaCacheMgr, bindBrokerName, properties);
|
||||
}
|
||||
}
|
||||
@ -48,6 +48,11 @@ public class LocalDfsFileSystem implements FileSystem {
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Status directoryExists(String dir) {
|
||||
return exists(dir);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Status exists(String remotePath) {
|
||||
boolean exists = false;
|
||||
|
||||
@ -31,6 +31,7 @@ import java.nio.file.FileVisitOption;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.nio.file.Paths;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Comparator;
|
||||
|
||||
public abstract class ObjFileSystem extends RemoteFileSystem {
|
||||
@ -43,11 +44,20 @@ public abstract class ObjFileSystem extends RemoteFileSystem {
|
||||
this.objStorage = objStorage;
|
||||
}
|
||||
|
||||
public ObjStorage<?> getObjStorage() {
|
||||
return objStorage;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Status exists(String remotePath) {
|
||||
return objStorage.headObject(remotePath);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Status directoryExists(String dir) {
|
||||
return listFiles(dir, false, new ArrayList<>());
|
||||
}
|
||||
|
||||
/**
|
||||
* download data from remote file and check data size with expected file size.
|
||||
* @param remoteFilePath remote file path
|
||||
@ -139,4 +149,9 @@ public abstract class ObjFileSystem extends RemoteFileSystem {
|
||||
public Status delete(String remotePath) {
|
||||
return objStorage.deleteObject(remotePath);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Status deleteDirectory(String absolutePath) {
|
||||
return objStorage.deleteObjects(absolutePath);
|
||||
}
|
||||
}
|
||||
|
||||
@ -107,8 +107,4 @@ public class S3FileSystem extends ObjFileSystem {
|
||||
}
|
||||
return Status.OK;
|
||||
}
|
||||
|
||||
public Status deleteDirectory(String absolutePath) {
|
||||
return ((S3ObjStorage) objStorage).deleteObjects(absolutePath);
|
||||
}
|
||||
}
|
||||
|
||||
@ -0,0 +1,132 @@
|
||||
// Licensed to the Apache Software Foundation (ASF) under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing,
|
||||
// software distributed under the License is distributed on an
|
||||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
package org.apache.doris.fs.remote;
|
||||
|
||||
import org.apache.doris.backup.Status;
|
||||
import org.apache.doris.common.util.LocationPath;
|
||||
import org.apache.doris.datasource.ExternalMetaCacheMgr;
|
||||
import org.apache.doris.fs.FileSystem;
|
||||
import org.apache.doris.fs.FileSystemCache;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
|
||||
public class SwitchingFileSystem implements FileSystem {
|
||||
|
||||
private final ExternalMetaCacheMgr extMetaCacheMgr;
|
||||
|
||||
private final String bindBrokerName;
|
||||
|
||||
private final Map<String, String> properties;
|
||||
|
||||
public SwitchingFileSystem(ExternalMetaCacheMgr extMetaCacheMgr, String bindBrokerName,
|
||||
Map<String, String> properties) {
|
||||
this.extMetaCacheMgr = extMetaCacheMgr;
|
||||
this.bindBrokerName = bindBrokerName;
|
||||
this.properties = properties;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Map<String, String> getProperties() {
|
||||
return properties;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Status exists(String remotePath) {
|
||||
return fileSystem(remotePath).exists(remotePath);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Status directoryExists(String dir) {
|
||||
return fileSystem(dir).directoryExists(dir);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Status downloadWithFileSize(String remoteFilePath, String localFilePath, long fileSize) {
|
||||
return fileSystem(remoteFilePath).downloadWithFileSize(remoteFilePath, localFilePath, fileSize);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Status upload(String localPath, String remotePath) {
|
||||
return fileSystem(localPath).upload(localPath, remotePath);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Status directUpload(String content, String remoteFile) {
|
||||
return fileSystem(remoteFile).directUpload(content, remoteFile);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Status rename(String origFilePath, String destFilePath) {
|
||||
return fileSystem(origFilePath).rename(origFilePath, destFilePath);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Status renameDir(String origFilePath, String destFilePath) {
|
||||
return fileSystem(origFilePath).renameDir(origFilePath, destFilePath);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Status renameDir(String origFilePath, String destFilePath, Runnable runWhenPathNotExist) {
|
||||
return fileSystem(origFilePath).renameDir(origFilePath, destFilePath, runWhenPathNotExist);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Status delete(String remotePath) {
|
||||
return fileSystem(remotePath).delete(remotePath);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Status deleteDirectory(String absolutePath) {
|
||||
return fileSystem(absolutePath).deleteDirectory(absolutePath);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Status makeDir(String remotePath) {
|
||||
return fileSystem(remotePath).makeDir(remotePath);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Status listFiles(String remotePath, boolean recursive, List<RemoteFile> result) {
|
||||
return fileSystem(remotePath).listFiles(remotePath, recursive, result);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Status globList(String remotePath, List<RemoteFile> result) {
|
||||
return fileSystem(remotePath).globList(remotePath, result);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Status globList(String remotePath, List<RemoteFile> result, boolean fileNameOnly) {
|
||||
return fileSystem(remotePath).globList(remotePath, result, fileNameOnly);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Status listDirectories(String remotePath, Set<String> result) {
|
||||
return fileSystem(remotePath).listDirectories(remotePath, result);
|
||||
}
|
||||
|
||||
public FileSystem fileSystem(String location) {
|
||||
return extMetaCacheMgr.getFsCache().getRemoteFileSystem(
|
||||
new FileSystemCache.FileSystemCacheKey(
|
||||
LocationPath.getFSIdentity(location,
|
||||
bindBrokerName), properties, bindBrokerName));
|
||||
}
|
||||
}
|
||||
|
||||
@ -33,6 +33,7 @@ import org.apache.doris.thrift.TDataSinkType;
|
||||
import org.apache.doris.thrift.TExplainLevel;
|
||||
import org.apache.doris.thrift.TFileCompressType;
|
||||
import org.apache.doris.thrift.TFileFormatType;
|
||||
import org.apache.doris.thrift.TFileType;
|
||||
import org.apache.doris.thrift.THiveBucket;
|
||||
import org.apache.doris.thrift.THiveColumn;
|
||||
import org.apache.doris.thrift.THiveColumnType;
|
||||
@ -128,21 +129,35 @@ public class HiveTableSink extends DataSink {
|
||||
setCompressType(tSink, formatType);
|
||||
|
||||
THiveLocationParams locationParams = new THiveLocationParams();
|
||||
String location = sd.getLocation();
|
||||
|
||||
String writeTempPath = createTempPath(location);
|
||||
locationParams.setWritePath(writeTempPath);
|
||||
locationParams.setTargetPath(location);
|
||||
locationParams.setFileType(LocationPath.getTFileTypeForBE(location));
|
||||
LocationPath locationPath = new LocationPath(sd.getLocation(), targetTable.getHadoopProperties());
|
||||
String location = locationPath.toString();
|
||||
String storageLocation = locationPath.toStorageLocation().toString();
|
||||
TFileType fileType = locationPath.getTFileTypeForBE();
|
||||
if (fileType == TFileType.FILE_S3) {
|
||||
locationParams.setWritePath(storageLocation);
|
||||
locationParams.setOriginalWritePath(location);
|
||||
locationParams.setTargetPath(location);
|
||||
if (insertCtx.isPresent()) {
|
||||
HiveInsertCommandContext context = (HiveInsertCommandContext) insertCtx.get();
|
||||
tSink.setOverwrite(context.isOverwrite());
|
||||
context.setWritePath(storageLocation);
|
||||
}
|
||||
} else {
|
||||
String writeTempPath = createTempPath(location);
|
||||
locationParams.setWritePath(writeTempPath);
|
||||
locationParams.setOriginalWritePath(writeTempPath);
|
||||
locationParams.setTargetPath(location);
|
||||
if (insertCtx.isPresent()) {
|
||||
HiveInsertCommandContext context = (HiveInsertCommandContext) insertCtx.get();
|
||||
tSink.setOverwrite(context.isOverwrite());
|
||||
context.setWritePath(writeTempPath);
|
||||
}
|
||||
}
|
||||
locationParams.setFileType(fileType);
|
||||
tSink.setLocation(locationParams);
|
||||
|
||||
tSink.setHadoopConfig(targetTable.getHadoopProperties());
|
||||
|
||||
if (insertCtx.isPresent()) {
|
||||
HiveInsertCommandContext context = (HiveInsertCommandContext) insertCtx.get();
|
||||
tSink.setOverwrite(context.isOverwrite());
|
||||
context.setWritePath(writeTempPath);
|
||||
}
|
||||
tDataSink = new TDataSink(getDataSinkType());
|
||||
tDataSink.setHiveTableSink(tSink);
|
||||
}
|
||||
|
||||
@ -21,23 +21,32 @@ import org.apache.doris.catalog.Env;
|
||||
import org.apache.doris.common.UserException;
|
||||
import org.apache.doris.datasource.hive.HMSTransaction;
|
||||
import org.apache.doris.datasource.hive.HiveMetadataOps;
|
||||
import org.apache.doris.fs.FileSystemProvider;
|
||||
|
||||
import java.util.Map;
|
||||
import java.util.concurrent.ConcurrentHashMap;
|
||||
import java.util.concurrent.Executor;
|
||||
|
||||
public class HiveTransactionManager implements TransactionManager {
|
||||
|
||||
private final Map<Long, HMSTransaction> transactions = new ConcurrentHashMap<>();
|
||||
private final HiveMetadataOps ops;
|
||||
|
||||
public HiveTransactionManager(HiveMetadataOps ops) {
|
||||
private final FileSystemProvider fileSystemProvider;
|
||||
|
||||
private final Executor fileSystemExecutor;
|
||||
|
||||
public HiveTransactionManager(HiveMetadataOps ops, FileSystemProvider fileSystemProvider,
|
||||
Executor fileSystemExecutor) {
|
||||
this.ops = ops;
|
||||
this.fileSystemProvider = fileSystemProvider;
|
||||
this.fileSystemExecutor = fileSystemExecutor;
|
||||
}
|
||||
|
||||
@Override
|
||||
public long begin() {
|
||||
long id = Env.getCurrentEnv().getNextId();
|
||||
HMSTransaction hiveTransaction = new HMSTransaction(ops);
|
||||
HMSTransaction hiveTransaction = new HMSTransaction(ops, fileSystemProvider, fileSystemExecutor);
|
||||
transactions.put(id, hiveTransaction);
|
||||
return id;
|
||||
}
|
||||
|
||||
@ -18,10 +18,14 @@
|
||||
package org.apache.doris.transaction;
|
||||
|
||||
import org.apache.doris.datasource.hive.HiveMetadataOps;
|
||||
import org.apache.doris.fs.FileSystemProvider;
|
||||
|
||||
import java.util.concurrent.Executor;
|
||||
|
||||
public class TransactionManagerFactory {
|
||||
|
||||
public static TransactionManager createHiveTransactionManager(HiveMetadataOps ops) {
|
||||
return new HiveTransactionManager(ops);
|
||||
public static TransactionManager createHiveTransactionManager(HiveMetadataOps ops,
|
||||
FileSystemProvider fileSystemProvider, Executor fileSystemExecutor) {
|
||||
return new HiveTransactionManager(ops, fileSystemProvider, fileSystemExecutor);
|
||||
}
|
||||
}
|
||||
|
||||
@ -34,7 +34,7 @@ public class LocationPathTest {
|
||||
LocationPath locationPath = new LocationPath("hdfs://dir/file.path", rangeProps);
|
||||
Assertions.assertTrue(locationPath.get().startsWith("hdfs://"));
|
||||
|
||||
String beLocation = locationPath.toScanRangeLocation().toString();
|
||||
String beLocation = locationPath.toStorageLocation().toString();
|
||||
Assertions.assertTrue(beLocation.startsWith("hdfs://"));
|
||||
Assertions.assertEquals(LocationPath.getFSIdentity(beLocation, null).first, FileSystemType.DFS);
|
||||
|
||||
@ -45,21 +45,21 @@ public class LocationPathTest {
|
||||
Assertions.assertTrue(locationPath.get().startsWith("hdfs://")
|
||||
&& !locationPath.get().startsWith("hdfs:///"));
|
||||
|
||||
beLocation = locationPath.toScanRangeLocation().toString();
|
||||
beLocation = locationPath.toStorageLocation().toString();
|
||||
Assertions.assertTrue(beLocation.startsWith("hdfs://") && !beLocation.startsWith("hdfs:///"));
|
||||
|
||||
// nonstandard '/' for hdfs path
|
||||
locationPath = new LocationPath("hdfs:/dir/file.path", props);
|
||||
Assertions.assertTrue(locationPath.get().startsWith("hdfs://"));
|
||||
|
||||
beLocation = locationPath.toScanRangeLocation().toString();
|
||||
beLocation = locationPath.toStorageLocation().toString();
|
||||
Assertions.assertTrue(beLocation.startsWith("hdfs://"));
|
||||
|
||||
// empty ha nameservices
|
||||
props.put("dfs.nameservices", "");
|
||||
locationPath = new LocationPath("hdfs:/dir/file.path", props);
|
||||
|
||||
beLocation = locationPath.toScanRangeLocation().toString();
|
||||
beLocation = locationPath.toStorageLocation().toString();
|
||||
Assertions.assertTrue(locationPath.get().startsWith("/dir")
|
||||
&& !locationPath.get().startsWith("hdfs://"));
|
||||
Assertions.assertTrue(beLocation.startsWith("/dir") && !beLocation.startsWith("hdfs://"));
|
||||
@ -75,7 +75,7 @@ public class LocationPathTest {
|
||||
// FE
|
||||
Assertions.assertTrue(locationPath.get().startsWith("jfs://"));
|
||||
// BE
|
||||
loc = locationPath.toScanRangeLocation().toString();
|
||||
loc = locationPath.toStorageLocation().toString();
|
||||
Assertions.assertTrue(loc.startsWith("jfs://"));
|
||||
Assertions.assertEquals(LocationPath.getFSIdentity(loc, null).first, FileSystemType.JFS);
|
||||
}
|
||||
@ -89,7 +89,7 @@ public class LocationPathTest {
|
||||
// FE
|
||||
Assertions.assertTrue(locationPath.get().startsWith("s3://"));
|
||||
// BE
|
||||
String beLoc = locationPath.toScanRangeLocation().toString();
|
||||
String beLoc = locationPath.toStorageLocation().toString();
|
||||
Assertions.assertTrue(beLoc.startsWith("s3://"));
|
||||
Assertions.assertEquals(LocationPath.getFSIdentity(beLoc, null).first, FileSystemType.S3);
|
||||
}
|
||||
@ -101,7 +101,7 @@ public class LocationPathTest {
|
||||
// FE
|
||||
Assertions.assertTrue(locationPath.get().startsWith("oss://"));
|
||||
// BE
|
||||
String beLocation = locationPath.toScanRangeLocation().toString();
|
||||
String beLocation = locationPath.toStorageLocation().toString();
|
||||
Assertions.assertTrue(beLocation.startsWith("s3://"));
|
||||
Assertions.assertEquals(LocationPath.getFSIdentity(beLocation, null).first, FileSystemType.S3);
|
||||
|
||||
@ -109,7 +109,7 @@ public class LocationPathTest {
|
||||
// FE
|
||||
Assertions.assertTrue(locationPath.get().startsWith("oss://test.oss-dls.aliyuncs"));
|
||||
// BE
|
||||
beLocation = locationPath.toScanRangeLocation().toString();
|
||||
beLocation = locationPath.toStorageLocation().toString();
|
||||
Assertions.assertTrue(beLocation.startsWith("oss://test.oss-dls.aliyuncs"));
|
||||
Assertions.assertEquals(LocationPath.getFSIdentity(beLocation, null).first, FileSystemType.DFS);
|
||||
|
||||
@ -121,7 +121,7 @@ public class LocationPathTest {
|
||||
LocationPath locationPath = new LocationPath("cos://test.com", rangeProps);
|
||||
// FE
|
||||
Assertions.assertTrue(locationPath.get().startsWith("cos://"));
|
||||
String beLocation = locationPath.toScanRangeLocation().toString();
|
||||
String beLocation = locationPath.toStorageLocation().toString();
|
||||
// BE
|
||||
Assertions.assertTrue(beLocation.startsWith("s3://"));
|
||||
Assertions.assertEquals(LocationPath.getFSIdentity(beLocation, null).first, FileSystemType.S3);
|
||||
@ -130,7 +130,7 @@ public class LocationPathTest {
|
||||
// FE
|
||||
Assertions.assertTrue(locationPath.get().startsWith("cosn://"));
|
||||
// BE
|
||||
beLocation = locationPath.toScanRangeLocation().toString();
|
||||
beLocation = locationPath.toStorageLocation().toString();
|
||||
Assertions.assertTrue(beLocation.startsWith("s3://"));
|
||||
Assertions.assertEquals(LocationPath.getFSIdentity(beLocation, null).first, FileSystemType.S3);
|
||||
|
||||
@ -138,7 +138,7 @@ public class LocationPathTest {
|
||||
// FE
|
||||
Assertions.assertTrue(locationPath.get().startsWith("ofs://"));
|
||||
// BE
|
||||
beLocation = locationPath.toScanRangeLocation().toString();
|
||||
beLocation = locationPath.toStorageLocation().toString();
|
||||
Assertions.assertTrue(beLocation.startsWith("ofs://"));
|
||||
Assertions.assertEquals(LocationPath.getFSIdentity(beLocation, null).first, FileSystemType.OFS);
|
||||
|
||||
@ -147,7 +147,7 @@ public class LocationPathTest {
|
||||
// FE
|
||||
Assertions.assertTrue(locationPath.get().startsWith("gfs://"));
|
||||
// BE
|
||||
beLocation = locationPath.toScanRangeLocation().toString();
|
||||
beLocation = locationPath.toStorageLocation().toString();
|
||||
Assertions.assertTrue(beLocation.startsWith("gfs://"));
|
||||
Assertions.assertEquals(LocationPath.getFSIdentity(beLocation, null).first, FileSystemType.DFS);
|
||||
}
|
||||
@ -159,7 +159,7 @@ public class LocationPathTest {
|
||||
// FE
|
||||
Assertions.assertTrue(locationPath.get().startsWith("obs://"));
|
||||
// BE
|
||||
String beLocation = locationPath.toScanRangeLocation().toString();
|
||||
String beLocation = locationPath.toStorageLocation().toString();
|
||||
Assertions.assertTrue(beLocation.startsWith("s3://"));
|
||||
Assertions.assertEquals(LocationPath.getFSIdentity(beLocation, null).first, FileSystemType.S3);
|
||||
}
|
||||
@ -173,7 +173,7 @@ public class LocationPathTest {
|
||||
Assertions.assertTrue(locationPath.get().startsWith("unknown://"));
|
||||
Assertions.assertTrue(locationPath.getLocationType() == LocationPath.LocationType.UNKNOWN);
|
||||
// BE
|
||||
String beLocation = locationPath.toScanRangeLocation().toString();
|
||||
String beLocation = locationPath.toStorageLocation().toString();
|
||||
Assertions.assertTrue(beLocation.startsWith("unknown://"));
|
||||
}
|
||||
|
||||
@ -186,7 +186,7 @@ public class LocationPathTest {
|
||||
Assertions.assertTrue(locationPath.get().equalsIgnoreCase("/path/to/local"));
|
||||
Assertions.assertTrue(locationPath.getLocationType() == LocationPath.LocationType.NOSCHEME);
|
||||
// BE
|
||||
String beLocation = locationPath.toScanRangeLocation().toString();
|
||||
String beLocation = locationPath.toStorageLocation().toString();
|
||||
Assertions.assertTrue(beLocation.equalsIgnoreCase("/path/to/local"));
|
||||
}
|
||||
}
|
||||
|
||||
@ -21,7 +21,10 @@ import org.apache.doris.catalog.Column;
|
||||
import org.apache.doris.catalog.PrimitiveType;
|
||||
import org.apache.doris.common.util.DebugUtil;
|
||||
import org.apache.doris.datasource.TestHMSCachedClient;
|
||||
import org.apache.doris.fs.FileSystem;
|
||||
import org.apache.doris.fs.FileSystemProvider;
|
||||
import org.apache.doris.fs.LocalDfsFileSystem;
|
||||
import org.apache.doris.fs.remote.SwitchingFileSystem;
|
||||
import org.apache.doris.nereids.trees.plans.commands.insert.HiveInsertCommandContext;
|
||||
import org.apache.doris.qe.ConnectContext;
|
||||
import org.apache.doris.thrift.THiveLocationParams;
|
||||
@ -54,16 +57,21 @@ import java.util.Random;
|
||||
import java.util.UUID;
|
||||
import java.util.concurrent.CompletableFuture;
|
||||
import java.util.concurrent.Executor;
|
||||
import java.util.concurrent.Executors;
|
||||
import java.util.concurrent.atomic.AtomicBoolean;
|
||||
|
||||
public class HmsCommitTest {
|
||||
|
||||
private static HiveMetadataOps hmsOps;
|
||||
private static HMSCachedClient hmsClient;
|
||||
|
||||
private static FileSystemProvider fileSystemProvider;
|
||||
private static final String dbName = "test_db";
|
||||
private static final String tbWithPartition = "test_tb_with_partition";
|
||||
private static final String tbWithoutPartition = "test_tb_without_partition";
|
||||
private static LocalDfsFileSystem fs;
|
||||
private static FileSystem fs;
|
||||
private static LocalDfsFileSystem localDFSFileSystem;
|
||||
private static Executor fileSystemExecutor;
|
||||
static String dbLocation;
|
||||
static String writeLocation;
|
||||
static String uri = "thrift://127.0.0.1:9083";
|
||||
@ -86,7 +94,14 @@ public class HmsCommitTest {
|
||||
}
|
||||
|
||||
public static void createTestHiveCatalog() throws IOException {
|
||||
fs = new LocalDfsFileSystem();
|
||||
localDFSFileSystem = new LocalDfsFileSystem();
|
||||
new MockUp<SwitchingFileSystem>(SwitchingFileSystem.class) {
|
||||
@Mock
|
||||
public FileSystem fileSystem(String location) {
|
||||
return localDFSFileSystem;
|
||||
}
|
||||
};
|
||||
fs = new SwitchingFileSystem(null, null, null);
|
||||
|
||||
if (hasRealHmsService) {
|
||||
// If you have a real HMS service, then you can use this client to create real connections for testing
|
||||
@ -96,7 +111,9 @@ public class HmsCommitTest {
|
||||
} else {
|
||||
hmsClient = new TestHMSCachedClient();
|
||||
}
|
||||
hmsOps = new HiveMetadataOps(null, hmsClient, fs);
|
||||
hmsOps = new HiveMetadataOps(null, hmsClient);
|
||||
fileSystemProvider = ctx -> fs;
|
||||
fileSystemExecutor = Executors.newFixedThreadPool(16);
|
||||
}
|
||||
|
||||
public static void createTestHiveDatabase() {
|
||||
@ -339,9 +356,9 @@ public class HmsCommitTest {
|
||||
fs.makeDir(targetPath);
|
||||
}
|
||||
|
||||
fs.createFile(writePath + "/" + f1);
|
||||
fs.createFile(writePath + "/" + f2);
|
||||
fs.createFile(writePath + "/" + f3);
|
||||
localDFSFileSystem.createFile(writePath + "/" + f1);
|
||||
localDFSFileSystem.createFile(writePath + "/" + f2);
|
||||
localDFSFileSystem.createFile(writePath + "/" + f3);
|
||||
return pu;
|
||||
}
|
||||
|
||||
@ -363,7 +380,7 @@ public class HmsCommitTest {
|
||||
public void commit(String dbName,
|
||||
String tableName,
|
||||
List<THivePartitionUpdate> hivePUs) {
|
||||
HMSTransaction hmsTransaction = new HMSTransaction(hmsOps);
|
||||
HMSTransaction hmsTransaction = new HMSTransaction(hmsOps, fileSystemProvider, fileSystemExecutor);
|
||||
hmsTransaction.setHivePartitionUpdates(hivePUs);
|
||||
HiveInsertCommandContext ctx = new HiveInsertCommandContext();
|
||||
String queryId = DebugUtil.printId(ConnectContext.get().queryId());
|
||||
@ -634,3 +651,4 @@ public class HmsCommitTest {
|
||||
assertNumRows(3, pa);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user