[Load] Fix bug of wrong file group aggregation when handling broker load job (#2824)
**Describe the bug**
**First**, In the broker load, we allow users to add multiple data descriptions. Each data description
represents a description of a file (or set of files). Including file path, delimiter, table and
partitions to be loaded, and other information.
When the user specifies multiple data descriptions, Doris currently aggregates the data
descriptions belonging to the same table and generates a unified load task.
The problem here is that although different data descriptions point to the same table,
they may specify different partitions. Therefore, the aggregation of data description
should not only consider the table level, but also the partition level.
Examples are as follows:
data description 1 is:
```
DATA INFILE("hdfs://hdfs_host:hdfs_port/input/file1")
INTO TABLE `tbl1`
PARTITION (p1, p2)
```
data description 2 is:
```
DATA INFILE("hdfs://hdfs_host:hdfs_port/input/file2")
INTO TABLE `tbl1`
PARTITION (p3, p4)
```
What user expects is to load file1 into partition p1 and p2 of tbl1, and load file2 into paritition
p3 and p4 of same table. But currently, it will be aggregated together, which result in loading
file1 and file2 into all partitions p1, p2, p3 and p4.
**Second**, the following 2 data descriptions are not allowed:
```
DATA INFILE("hdfs://hdfs_host:hdfs_port/input/file1")
INTO TABLE `tbl1`
PARTITION (p1, p2)
DATA INFILE("hdfs://hdfs_host:hdfs_port/input/file2")
INTO TABLE `tbl1`
PARTITION (p2, p3)
```
They have overlapping partition(p2), which is not support yet. And we should throw an Exception
to cancel this load job.
**Third**, there is a problem with the code implementation. In the constructor of
`OlapTableSink.java`, we pass in a string of partition names separated by commas.
But at the `OlapTableSink` level, we should be able to pass in a list of partition ids directly,
instead of names.
ISSUE: #2823
This commit is contained in:
@ -47,7 +47,6 @@ import org.apache.doris.thrift.TUniqueId;
|
||||
import org.apache.doris.transaction.TransactionState;
|
||||
import org.apache.doris.transaction.TransactionState.LoadJobSourceType;
|
||||
|
||||
import com.google.common.base.Joiner;
|
||||
import com.google.common.base.Preconditions;
|
||||
import com.google.common.base.Strings;
|
||||
import com.google.common.collect.Lists;
|
||||
@ -87,7 +86,9 @@ public class InsertStmt extends DdlStmt {
|
||||
public static final String STREAMING = "STREAMING";
|
||||
|
||||
private final TableName tblName;
|
||||
private final Set<String> targetPartitions;
|
||||
private final Set<String> targetPartitionNames;
|
||||
// parsed from targetPartitionNames. empty means no partition specified
|
||||
private List<Long> targetPartitionIds = Lists.newArrayList();
|
||||
private final List<String> targetColumnNames;
|
||||
private final QueryStmt queryStmt;
|
||||
private final List<String> planHints;
|
||||
@ -117,7 +118,7 @@ public class InsertStmt extends DdlStmt {
|
||||
private DataSink dataSink;
|
||||
private DataPartition dataPartition;
|
||||
|
||||
List<Column> targetColumns = Lists.newArrayList();
|
||||
private List<Column> targetColumns = Lists.newArrayList();
|
||||
|
||||
/*
|
||||
* InsertStmt may be analyzed twice, but transaction must be only begun once.
|
||||
@ -129,10 +130,10 @@ public class InsertStmt extends DdlStmt {
|
||||
this.tblName = target.getTblName();
|
||||
List<String> tmpPartitions = target.getPartitions();
|
||||
if (tmpPartitions != null) {
|
||||
targetPartitions = Sets.newTreeSet(String.CASE_INSENSITIVE_ORDER);
|
||||
targetPartitions.addAll(tmpPartitions);
|
||||
targetPartitionNames = Sets.newTreeSet(String.CASE_INSENSITIVE_ORDER);
|
||||
targetPartitionNames.addAll(tmpPartitions);
|
||||
} else {
|
||||
targetPartitions = null;
|
||||
targetPartitionNames = null;
|
||||
}
|
||||
this.label = label;
|
||||
this.queryStmt = source.getQueryStmt();
|
||||
@ -147,7 +148,7 @@ public class InsertStmt extends DdlStmt {
|
||||
// Ctor for CreateTableAsSelectStmt
|
||||
public InsertStmt(TableName name, QueryStmt queryStmt) {
|
||||
this.tblName = name;
|
||||
this.targetPartitions = null;
|
||||
this.targetPartitionNames = null;
|
||||
this.targetColumnNames = null;
|
||||
this.queryStmt = queryStmt;
|
||||
this.planHints = null;
|
||||
@ -263,7 +264,7 @@ public class InsertStmt extends DdlStmt {
|
||||
}
|
||||
|
||||
// check partition
|
||||
if (targetPartitions != null && targetPartitions.isEmpty()) {
|
||||
if (targetPartitionNames != null && targetPartitionNames.isEmpty()) {
|
||||
ErrorReport.reportAnalysisException(ErrorCode.ERR_PARTITION_CLAUSE_ON_NONPARTITIONED);
|
||||
}
|
||||
|
||||
@ -320,16 +321,17 @@ public class InsertStmt extends DdlStmt {
|
||||
}
|
||||
|
||||
// partition
|
||||
if (targetPartitions != null) {
|
||||
if (targetPartitionNames != null) {
|
||||
if (olapTable.getPartitionInfo().getType() == PartitionType.UNPARTITIONED) {
|
||||
ErrorReport.reportAnalysisException(ErrorCode.ERR_PARTITION_CLAUSE_NO_ALLOWED);
|
||||
}
|
||||
for (String partName : targetPartitions) {
|
||||
for (String partName : targetPartitionNames) {
|
||||
Partition part = olapTable.getPartition(partName);
|
||||
if (part == null) {
|
||||
ErrorReport.reportAnalysisException(
|
||||
ErrorCode.ERR_UNKNOWN_PARTITION, partName, targetTable.getName());
|
||||
}
|
||||
targetPartitionIds.add(part.getId());
|
||||
}
|
||||
}
|
||||
// need a descriptor
|
||||
@ -349,11 +351,11 @@ public class InsertStmt extends DdlStmt {
|
||||
// will use it during create load job
|
||||
indexIdToSchemaHash = olapTable.getIndexIdToSchemaHash();
|
||||
} else if (targetTable instanceof MysqlTable) {
|
||||
if (targetPartitions != null) {
|
||||
if (targetPartitionNames != null) {
|
||||
ErrorReport.reportAnalysisException(ErrorCode.ERR_PARTITION_CLAUSE_NO_ALLOWED);
|
||||
}
|
||||
} else if (targetTable instanceof BrokerTable) {
|
||||
if (targetPartitions != null) {
|
||||
if (targetPartitionNames != null) {
|
||||
ErrorReport.reportAnalysisException(ErrorCode.ERR_PARTITION_CLAUSE_NO_ALLOWED);
|
||||
}
|
||||
|
||||
@ -714,8 +716,7 @@ public class InsertStmt extends DdlStmt {
|
||||
return dataSink;
|
||||
}
|
||||
if (targetTable instanceof OlapTable) {
|
||||
String partitionNames = targetPartitions == null ? null : Joiner.on(",").join(targetPartitions);
|
||||
dataSink = new OlapTableSink((OlapTable) targetTable, olapTuple, partitionNames);
|
||||
dataSink = new OlapTableSink((OlapTable) targetTable, olapTuple, targetPartitionIds);
|
||||
dataPartition = dataSink.getOutputPartition();
|
||||
} else if (targetTable instanceof BrokerTable) {
|
||||
BrokerTable table = (BrokerTable) targetTable;
|
||||
|
||||
@ -64,7 +64,7 @@ public class BrokerFileGroup implements Writable {
|
||||
// fileFormat may be null, which means format will be decided by file's suffix
|
||||
private String fileFormat;
|
||||
private boolean isNegative;
|
||||
private List<Long> partitionIds;
|
||||
private List<Long> partitionIds; // can be null, means no partition specified
|
||||
private List<String> filePaths;
|
||||
|
||||
private List<String> fileFieldNames;
|
||||
@ -77,7 +77,7 @@ public class BrokerFileGroup implements Writable {
|
||||
// filter the data which has been conformed
|
||||
private Expr whereExpr;
|
||||
|
||||
// Used for recovery from edit log
|
||||
// for unit test and edit log persistence
|
||||
private BrokerFileGroup() {
|
||||
}
|
||||
|
||||
|
||||
@ -0,0 +1,243 @@
|
||||
// Licensed to the Apache Software Foundation (ASF) under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing,
|
||||
// software distributed under the License is distributed on an
|
||||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
package org.apache.doris.load;
|
||||
|
||||
import org.apache.doris.common.DdlException;
|
||||
import org.apache.doris.common.io.Writable;
|
||||
|
||||
import com.google.common.collect.Lists;
|
||||
import com.google.common.collect.Maps;
|
||||
import com.google.common.collect.Sets;
|
||||
|
||||
import org.apache.logging.log4j.LogManager;
|
||||
import org.apache.logging.log4j.Logger;
|
||||
|
||||
import java.io.DataInput;
|
||||
import java.io.DataOutput;
|
||||
import java.io.IOException;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Objects;
|
||||
import java.util.Set;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
/*
|
||||
* This class is mainly used to aggregate information of multiple DataDescriptors.
|
||||
* When the table name and specified partitions in the two DataDescriptors are same,
|
||||
* the BrokerFileGroup information corresponding to the two DataDescriptors will be aggregated together.
|
||||
* eg1:
|
||||
*
|
||||
* DATA INFILE("hdfs://hdfs_host:hdfs_port/input/file1")
|
||||
* INTO TABLE `tbl1`
|
||||
* PARTITION (p1, p2)
|
||||
*
|
||||
* and
|
||||
*
|
||||
* DATA INFILE("hdfs://hdfs_host:hdfs_port/input/file2")
|
||||
* INTO TABLE `tbl1`
|
||||
* PARTITION (p1, p2)
|
||||
*
|
||||
* will be aggregated together, because they have same table name and specified partitions
|
||||
* =>
|
||||
* FileGroupAggKey(tbl1, [p1, p2]) => List(file1, file2);
|
||||
*
|
||||
* eg2:
|
||||
*
|
||||
* DATA INFILE("hdfs://hdfs_host:hdfs_port/input/file1")
|
||||
* INTO TABLE `tbl1`
|
||||
* PARTITION (p1)
|
||||
*
|
||||
* and
|
||||
*
|
||||
* DATA INFILE("hdfs://hdfs_host:hdfs_port/input/file2")
|
||||
* INTO TABLE `tbl1`
|
||||
* PARTITION (p2)
|
||||
*
|
||||
* will NOT be aggregated together, because they have same table name but different specified partitions
|
||||
* FileGroupAggKey(tbl1, [p1]) => List(file1);
|
||||
* FileGroupAggKey(tbl1, [p2]) => List(file2);
|
||||
*
|
||||
* eg3:
|
||||
*
|
||||
* DATA INFILE("hdfs://hdfs_host:hdfs_port/input/file1")
|
||||
* INTO TABLE `tbl1`
|
||||
* PARTITION (p1, p2)
|
||||
*
|
||||
* and
|
||||
*
|
||||
* DATA INFILE("hdfs://hdfs_host:hdfs_port/input/file2")
|
||||
* INTO TABLE `tbl1`
|
||||
* PARTITION (p2, p3)
|
||||
*
|
||||
* will throw an Exception, because there is an overlap partition(p2) between 2 data descriptions. And we
|
||||
* currently not allow this. You can rewrite the data descriptions like this:
|
||||
*
|
||||
* DATA INFILE("hdfs://hdfs_host:hdfs_port/input/file1")
|
||||
* INTO TABLE `tbl1`
|
||||
* PARTITION (p1)
|
||||
*
|
||||
* and
|
||||
*
|
||||
* DATA INFILE("hdfs://hdfs_host:hdfs_port/input/file2")
|
||||
* INTO TABLE `tbl1`
|
||||
* PARTITION (p3)
|
||||
*
|
||||
* and
|
||||
*
|
||||
* DATA INFILE("hdfs://hdfs_host:hdfs_port/input/file1")
|
||||
* INTO TABLE `tbl1`
|
||||
* PARTITION (p2)
|
||||
*
|
||||
* and
|
||||
*
|
||||
* DATA INFILE("hdfs://hdfs_host:hdfs_port/input/file2")
|
||||
* INTO TABLE `tbl1`
|
||||
* PARTITION (p2)
|
||||
*
|
||||
* they will be aggregate like:
|
||||
* FileGroupAggKey(tbl1, [p1]) => List(file1);
|
||||
* FileGroupAggKey(tbl1, [p3]) => List(file2);
|
||||
* FileGroupAggKey(tbl1, [p2]) => List(file1, file2);
|
||||
*
|
||||
* Although this transformation can be done automatically by system, but it change the "max_filter_ratio".
|
||||
* So we have to let user decide what to do.
|
||||
*/
|
||||
public class BrokerFileGroupAggInfo implements Writable {
|
||||
private static final Logger LOG = LogManager.getLogger(BrokerFileGroupAggInfo.class);
|
||||
|
||||
private Map<FileGroupAggKey, List<BrokerFileGroup>> aggKeyToFileGroups = Maps.newHashMap();
|
||||
// auxiliary structure, tbl id -> set of partition ids.
|
||||
// used to exam the overlapping partitions of same table.
|
||||
private Map<Long, Set<Long>> tableIdToPartitioIds = Maps.newHashMap();
|
||||
|
||||
// this inner class This class is used to distinguish different combinations of table and partitions
|
||||
public static class FileGroupAggKey {
|
||||
private long tableId;
|
||||
private Set<Long> partitionIds; // empty means partition is not specified
|
||||
|
||||
public FileGroupAggKey(long tableId, List<Long> partitionIds) {
|
||||
this.tableId = tableId;
|
||||
if (partitionIds != null) {
|
||||
this.partitionIds = Sets.newHashSet(partitionIds);
|
||||
} else {
|
||||
this.partitionIds = Sets.newHashSet();
|
||||
}
|
||||
}
|
||||
|
||||
public long getTableId() {
|
||||
return tableId;
|
||||
}
|
||||
|
||||
public Set<Long> getPartitionIds() {
|
||||
return partitionIds;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object obj) {
|
||||
if (this == obj) {
|
||||
return true;
|
||||
}
|
||||
if (!(obj instanceof FileGroupAggKey)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
FileGroupAggKey other = (FileGroupAggKey) obj;
|
||||
return other.tableId == this.tableId && other.partitionIds.equals(this.partitionIds);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return Objects.hash(tableId, partitionIds);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
StringBuilder sb = new StringBuilder();
|
||||
sb.append("[").append(tableId).append(": ").append(partitionIds).append("]");
|
||||
return sb.toString();
|
||||
}
|
||||
}
|
||||
|
||||
public void addFileGroup(BrokerFileGroup fileGroup) throws DdlException {
|
||||
FileGroupAggKey fileGroupAggKey = new FileGroupAggKey(fileGroup.getTableId(), fileGroup.getPartitionIds());
|
||||
List<BrokerFileGroup> fileGroupList = aggKeyToFileGroups.get(fileGroupAggKey);
|
||||
if (fileGroupList == null) {
|
||||
// check if there are overlapping partitions of same table
|
||||
if (tableIdToPartitioIds.containsKey(fileGroup.getTableId())
|
||||
&& tableIdToPartitioIds.get(fileGroup.getTableId()).stream().anyMatch(id -> fileGroup.getPartitionIds().contains(id))) {
|
||||
throw new DdlException("There are overlapping partitions of same table in data descrition of load job stmt");
|
||||
}
|
||||
|
||||
fileGroupList = Lists.newArrayList();
|
||||
aggKeyToFileGroups.put(fileGroupAggKey, fileGroupList);
|
||||
}
|
||||
// exist, aggregate them
|
||||
fileGroupList.add(fileGroup);
|
||||
|
||||
// update tableIdToPartitioIds
|
||||
Set<Long> partitionIds = tableIdToPartitioIds.get(fileGroup.getTableId());
|
||||
if (partitionIds == null) {
|
||||
partitionIds = Sets.newHashSet();
|
||||
tableIdToPartitioIds.put(fileGroup.getTableId(), partitionIds);
|
||||
}
|
||||
if (fileGroup.getPartitionIds() != null) {
|
||||
partitionIds.addAll(fileGroup.getPartitionIds());
|
||||
}
|
||||
}
|
||||
|
||||
public Set<Long> getAllTableIds() {
|
||||
return aggKeyToFileGroups.keySet().stream().map(k -> k.tableId).collect(Collectors.toSet());
|
||||
}
|
||||
|
||||
public Map<FileGroupAggKey, List<BrokerFileGroup>> getAggKeyToFileGroups() {
|
||||
return aggKeyToFileGroups;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
StringBuilder sb = new StringBuilder();
|
||||
sb.append(aggKeyToFileGroups);
|
||||
return sb.toString();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void write(DataOutput out) throws IOException {
|
||||
// The pull load source info doesn't need to be persisted.
|
||||
// It will be recreated by origin stmt in prepare of load job.
|
||||
// write 0 just for compatibility
|
||||
out.writeInt(0);
|
||||
}
|
||||
|
||||
public void readFields(DataInput in) throws IOException {
|
||||
int mapSize = in.readInt();
|
||||
// just for compatibility, the following read objects are useless
|
||||
for (int i = 0; i < mapSize; ++i) {
|
||||
long id = in.readLong();
|
||||
int listSize = in.readInt();
|
||||
for (int j = 0; j < listSize; ++j) {
|
||||
BrokerFileGroup fileGroup = BrokerFileGroup.read(in);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public static BrokerFileGroupAggInfo read(DataInput in) throws IOException {
|
||||
BrokerFileGroupAggInfo sourceInfo = new BrokerFileGroupAggInfo();
|
||||
sourceInfo.readFields(in);
|
||||
return sourceInfo;
|
||||
}
|
||||
}
|
||||
@ -493,7 +493,7 @@ public class Load {
|
||||
job.setIdToTableLoadInfo(idToTableLoadInfo);
|
||||
|
||||
if (etlJobType == EtlJobType.BROKER) {
|
||||
PullLoadSourceInfo sourceInfo = new PullLoadSourceInfo();
|
||||
BrokerFileGroupAggInfo sourceInfo = new BrokerFileGroupAggInfo();
|
||||
for (DataDescription dataDescription : dataDescriptions) {
|
||||
BrokerFileGroup fileGroup = new BrokerFileGroup(dataDescription);
|
||||
fileGroup.parse(db, dataDescription);
|
||||
|
||||
@ -43,8 +43,6 @@ import org.apache.doris.task.MasterTask;
|
||||
import org.apache.doris.task.MasterTaskExecutor;
|
||||
import org.apache.doris.task.MiniLoadEtlTask;
|
||||
import org.apache.doris.task.MiniLoadPendingTask;
|
||||
import org.apache.doris.task.PullLoadEtlTask;
|
||||
import org.apache.doris.task.PullLoadPendingTask;
|
||||
import org.apache.doris.task.PushTask;
|
||||
import org.apache.doris.thrift.TPriority;
|
||||
import org.apache.doris.thrift.TPushType;
|
||||
@ -169,9 +167,6 @@ public class LoadChecker extends MasterDaemon {
|
||||
case MINI:
|
||||
task = new MiniLoadPendingTask(job);
|
||||
break;
|
||||
case BROKER:
|
||||
task = new PullLoadPendingTask(job);
|
||||
break;
|
||||
default:
|
||||
LOG.warn("unknown etl job type. type: {}", etlJobType.name());
|
||||
break;
|
||||
@ -203,9 +198,6 @@ public class LoadChecker extends MasterDaemon {
|
||||
case INSERT:
|
||||
task = new InsertLoadEtlTask(job);
|
||||
break;
|
||||
case BROKER:
|
||||
task = new PullLoadEtlTask(job);
|
||||
break;
|
||||
default:
|
||||
LOG.warn("unknown etl job type. type: {}", etlJobType.name());
|
||||
break;
|
||||
|
||||
@ -87,7 +87,7 @@ public class LoadJob implements Writable {
|
||||
private JobState state;
|
||||
|
||||
private BrokerDesc brokerDesc;
|
||||
private PullLoadSourceInfo pullLoadSourceInfo;
|
||||
private BrokerFileGroupAggInfo pullLoadSourceInfo;
|
||||
|
||||
// progress has two functions at ETL stage:
|
||||
// 1. when progress < 100, it indicates ETL progress
|
||||
@ -392,11 +392,11 @@ public class LoadJob implements Writable {
|
||||
return brokerDesc;
|
||||
}
|
||||
|
||||
public void setPullLoadSourceInfo(PullLoadSourceInfo sourceInfo) {
|
||||
public void setPullLoadSourceInfo(BrokerFileGroupAggInfo sourceInfo) {
|
||||
this.pullLoadSourceInfo = sourceInfo;
|
||||
}
|
||||
|
||||
public PullLoadSourceInfo getPullLoadSourceInfo() {
|
||||
public BrokerFileGroupAggInfo getPullLoadSourceInfo() {
|
||||
return pullLoadSourceInfo;
|
||||
}
|
||||
|
||||
@ -931,7 +931,7 @@ public class LoadJob implements Writable {
|
||||
}
|
||||
// Pull load
|
||||
if (in.readBoolean()) {
|
||||
this.pullLoadSourceInfo = PullLoadSourceInfo.read(in);
|
||||
this.pullLoadSourceInfo = BrokerFileGroupAggInfo.read(in);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@ -1,119 +0,0 @@
|
||||
// Licensed to the Apache Software Foundation (ASF) under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing,
|
||||
// software distributed under the License is distributed on an
|
||||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
package org.apache.doris.load;
|
||||
|
||||
import org.apache.doris.common.io.Writable;
|
||||
|
||||
import com.google.common.collect.Lists;
|
||||
import com.google.common.collect.Maps;
|
||||
|
||||
import org.apache.logging.log4j.LogManager;
|
||||
import org.apache.logging.log4j.Logger;
|
||||
|
||||
import java.io.DataInput;
|
||||
import java.io.DataOutput;
|
||||
import java.io.IOException;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* PullLoadSourceInfo
|
||||
*/
|
||||
public class PullLoadSourceInfo implements Writable {
|
||||
private static final Logger LOG = LogManager.getLogger(PullLoadSourceInfo.class);
|
||||
|
||||
// Table id to file groups
|
||||
private Map<Long, List<BrokerFileGroup>> idToFileGroups;
|
||||
|
||||
public PullLoadSourceInfo() {
|
||||
idToFileGroups = Maps.newHashMap();
|
||||
}
|
||||
|
||||
public void addFileGroup(BrokerFileGroup fileGroup) {
|
||||
List<BrokerFileGroup> fileGroupList = idToFileGroups.get(fileGroup.getTableId());
|
||||
if (fileGroupList == null) {
|
||||
fileGroupList = Lists.newArrayList(fileGroup);
|
||||
idToFileGroups.put(fileGroup.getTableId(), fileGroupList);
|
||||
} else {
|
||||
fileGroupList.add(fileGroup);
|
||||
}
|
||||
}
|
||||
|
||||
public Map<Long, List<BrokerFileGroup>> getIdToFileGroups() {
|
||||
return idToFileGroups;
|
||||
}
|
||||
|
||||
public List<BrokerFileGroup> getBrokerFileGroups(long id) {
|
||||
return idToFileGroups.get(id);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
StringBuilder sb = new StringBuilder();
|
||||
sb.append("PullLoadSourceInfo{");
|
||||
int idx = 0;
|
||||
for (Map.Entry<Long, List<BrokerFileGroup>> entry : idToFileGroups.entrySet()) {
|
||||
if (idx++ > 0) {
|
||||
sb.append(",");
|
||||
}
|
||||
sb.append(entry.getKey()).append(":[");
|
||||
int groupIdx = 0;
|
||||
for (BrokerFileGroup fileGroup : entry.getValue()) {
|
||||
if (groupIdx++ > 0) {
|
||||
sb.append(",");
|
||||
}
|
||||
sb.append(fileGroup);
|
||||
}
|
||||
sb.append("]");
|
||||
}
|
||||
sb.append("}");
|
||||
|
||||
return sb.toString();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void write(DataOutput out) throws IOException {
|
||||
// The pull load source info doesn't need to be persisted.
|
||||
// It will be recreated by origin stmt in prepare of load job.
|
||||
out.writeInt(0);
|
||||
}
|
||||
|
||||
public void readFields(DataInput in) throws IOException {
|
||||
// The pull load source info also need to be replayed
|
||||
// because size of file groups in old broker load is more then zero.
|
||||
int mapSize = in.readInt();
|
||||
for (int i = 0; i < mapSize; ++i) {
|
||||
long id = in.readLong();
|
||||
|
||||
List<BrokerFileGroup> fileGroupList = Lists.newArrayList();
|
||||
int listSize = in.readInt();
|
||||
for (int j = 0; j < listSize; ++j) {
|
||||
BrokerFileGroup fileGroup = BrokerFileGroup.read(in);
|
||||
fileGroupList.add(fileGroup);
|
||||
}
|
||||
|
||||
idToFileGroups.put(id, fileGroupList);
|
||||
}
|
||||
}
|
||||
|
||||
public static PullLoadSourceInfo read(DataInput in) throws IOException {
|
||||
PullLoadSourceInfo sourceInfo = new PullLoadSourceInfo();
|
||||
sourceInfo.readFields(in);
|
||||
return sourceInfo;
|
||||
}
|
||||
}
|
||||
@ -40,9 +40,10 @@ import org.apache.doris.common.io.Text;
|
||||
import org.apache.doris.common.util.LogBuilder;
|
||||
import org.apache.doris.common.util.LogKey;
|
||||
import org.apache.doris.load.BrokerFileGroup;
|
||||
import org.apache.doris.load.BrokerFileGroupAggInfo;
|
||||
import org.apache.doris.load.BrokerFileGroupAggInfo.FileGroupAggKey;
|
||||
import org.apache.doris.load.EtlJobType;
|
||||
import org.apache.doris.load.FailMsg;
|
||||
import org.apache.doris.load.PullLoadSourceInfo;
|
||||
import org.apache.doris.qe.ConnectContext;
|
||||
import org.apache.doris.qe.SessionVariable;
|
||||
import org.apache.doris.qe.SqlModeHelper;
|
||||
@ -88,7 +89,7 @@ public class BrokerLoadJob extends LoadJob {
|
||||
private String originStmt = "";
|
||||
|
||||
// include broker desc and data desc
|
||||
private PullLoadSourceInfo dataSourceInfo = new PullLoadSourceInfo();
|
||||
private BrokerFileGroupAggInfo fileGroupAggInfo = new BrokerFileGroupAggInfo();
|
||||
private List<TabletCommitInfo> commitInfos = Lists.newArrayList();
|
||||
|
||||
// sessionVariable's name -> sessionVariable's value
|
||||
@ -145,7 +146,7 @@ public class BrokerLoadJob extends LoadJob {
|
||||
for (DataDescription dataDescription : dataDescriptions) {
|
||||
BrokerFileGroup fileGroup = new BrokerFileGroup(dataDescription);
|
||||
fileGroup.parse(db, dataDescription);
|
||||
dataSourceInfo.addFileGroup(fileGroup);
|
||||
fileGroupAggInfo.addFileGroup(fileGroup);
|
||||
}
|
||||
} finally {
|
||||
db.readUnlock();
|
||||
@ -165,12 +166,12 @@ public class BrokerLoadJob extends LoadJob {
|
||||
Set<String> result = Sets.newHashSet();
|
||||
Database database = Catalog.getCurrentCatalog().getDb(dbId);
|
||||
if (database == null) {
|
||||
for (long tableId : dataSourceInfo.getIdToFileGroups().keySet()) {
|
||||
for (long tableId : fileGroupAggInfo.getAllTableIds()) {
|
||||
result.add(String.valueOf(tableId));
|
||||
}
|
||||
return result;
|
||||
}
|
||||
for (long tableId : dataSourceInfo.getIdToFileGroups().keySet()) {
|
||||
for (long tableId : fileGroupAggInfo.getAllTableIds()) {
|
||||
Table table = database.getTable(tableId);
|
||||
if (table == null) {
|
||||
result.add(String.valueOf(tableId));
|
||||
@ -190,7 +191,7 @@ public class BrokerLoadJob extends LoadJob {
|
||||
}
|
||||
// The database will not be locked in here.
|
||||
// The getTable is a thread-safe method called without read lock of database
|
||||
for (long tableId : dataSourceInfo.getIdToFileGroups().keySet()) {
|
||||
for (long tableId : fileGroupAggInfo.getAllTableIds()) {
|
||||
Table table = database.getTable(tableId);
|
||||
if (table == null) {
|
||||
throw new MetaNotFoundException("Failed to find table " + tableId + " in db " + dbId);
|
||||
@ -212,7 +213,7 @@ public class BrokerLoadJob extends LoadJob {
|
||||
|
||||
@Override
|
||||
protected void unprotectedExecuteJob() {
|
||||
LoadTask task = new BrokerLoadPendingTask(this, dataSourceInfo.getIdToFileGroups(), brokerDesc);
|
||||
LoadTask task = new BrokerLoadPendingTask(this, fileGroupAggInfo.getAggKeyToFileGroups(), brokerDesc);
|
||||
idToTasks.put(task.getSignature(), task);
|
||||
Catalog.getCurrentCatalog().getLoadTaskScheduler().submit(task);
|
||||
}
|
||||
@ -280,7 +281,7 @@ public class BrokerLoadJob extends LoadJob {
|
||||
return;
|
||||
}
|
||||
// Reset dataSourceInfo, it will be re-created in analyze
|
||||
dataSourceInfo = new PullLoadSourceInfo();
|
||||
fileGroupAggInfo = new BrokerFileGroupAggInfo();
|
||||
SqlParser parser = new SqlParser(new SqlScanner(new StringReader(originStmt),
|
||||
Long.valueOf(sessionVariables.get(SessionVariable.SQL_MODE))));
|
||||
LoadStmt stmt = null;
|
||||
@ -357,9 +358,10 @@ public class BrokerLoadJob extends LoadJob {
|
||||
db.readLock();
|
||||
try {
|
||||
List<LoadLoadingTask> newLoadingTasks = Lists.newArrayList();
|
||||
for (Map.Entry<Long, List<BrokerFileGroup>> entry :
|
||||
dataSourceInfo.getIdToFileGroups().entrySet()) {
|
||||
long tableId = entry.getKey();
|
||||
for (Map.Entry<FileGroupAggKey, List<BrokerFileGroup>> entry : fileGroupAggInfo.getAggKeyToFileGroups().entrySet()) {
|
||||
FileGroupAggKey aggKey = entry.getKey();
|
||||
List<BrokerFileGroup> brokerFileGroups = entry.getValue();
|
||||
long tableId = aggKey.getTableId();
|
||||
OlapTable table = (OlapTable) db.getTable(tableId);
|
||||
if (table == null) {
|
||||
LOG.warn(new LogBuilder(LogKey.LOAD_JOB, id)
|
||||
@ -373,11 +375,11 @@ public class BrokerLoadJob extends LoadJob {
|
||||
|
||||
// Generate loading task and init the plan of task
|
||||
LoadLoadingTask task = new LoadLoadingTask(db, table, brokerDesc,
|
||||
entry.getValue(), getDeadlineMs(), execMemLimit,
|
||||
brokerFileGroups, getDeadlineMs(), execMemLimit,
|
||||
strictMode, transactionId, this, timezone, timeoutSecond);
|
||||
UUID uuid = UUID.randomUUID();
|
||||
TUniqueId loadId = new TUniqueId(uuid.getMostSignificantBits(), uuid.getLeastSignificantBits());
|
||||
task.init(loadId, attachment.getFileStatusByTable(tableId), attachment.getFileNumByTable(tableId));
|
||||
task.init(loadId, attachment.getFileStatusByTable(aggKey), attachment.getFileNumByTable(aggKey));
|
||||
idToTasks.put(task.getSignature(), task);
|
||||
// idToTasks contains previous LoadPendingTasks, so idToTasks is just used to save all tasks.
|
||||
// use newLoadingTasks to save new created loading tasks and submit them later.
|
||||
@ -532,7 +534,7 @@ public class BrokerLoadJob extends LoadJob {
|
||||
super.readFields(in);
|
||||
brokerDesc = BrokerDesc.read(in);
|
||||
if (Catalog.getCurrentCatalogJournalVersion() < FeMetaVersion.VERSION_61) {
|
||||
dataSourceInfo.readFields(in);
|
||||
fileGroupAggInfo.readFields(in);
|
||||
}
|
||||
|
||||
if (Catalog.getCurrentCatalogJournalVersion() >= FeMetaVersion.VERSION_58) {
|
||||
|
||||
@ -23,6 +23,7 @@ import org.apache.doris.common.util.BrokerUtil;
|
||||
import org.apache.doris.common.util.LogBuilder;
|
||||
import org.apache.doris.common.util.LogKey;
|
||||
import org.apache.doris.load.BrokerFileGroup;
|
||||
import org.apache.doris.load.BrokerFileGroupAggInfo.FileGroupAggKey;
|
||||
import org.apache.doris.load.FailMsg;
|
||||
import org.apache.doris.thrift.TBrokerFileStatus;
|
||||
|
||||
@ -38,16 +39,16 @@ public class BrokerLoadPendingTask extends LoadTask {
|
||||
|
||||
private static final Logger LOG = LogManager.getLogger(BrokerLoadPendingTask.class);
|
||||
|
||||
private Map<Long, List<BrokerFileGroup>> tableToBrokerFileList;
|
||||
private Map<FileGroupAggKey, List<BrokerFileGroup>> aggKeyToBrokerFileGroups;
|
||||
private BrokerDesc brokerDesc;
|
||||
|
||||
public BrokerLoadPendingTask(BrokerLoadJob loadTaskCallback,
|
||||
Map<Long, List<BrokerFileGroup>> tableToBrokerFileList,
|
||||
BrokerDesc brokerDesc) {
|
||||
Map<FileGroupAggKey, List<BrokerFileGroup>> aggKeyToBrokerFileGroups,
|
||||
BrokerDesc brokerDesc) {
|
||||
super(loadTaskCallback);
|
||||
this.retryTime = 3;
|
||||
this.attachment = new BrokerPendingTaskAttachment(signature);
|
||||
this.tableToBrokerFileList = tableToBrokerFileList;
|
||||
this.aggKeyToBrokerFileGroups = aggKeyToBrokerFileGroups;
|
||||
this.brokerDesc = brokerDesc;
|
||||
this.failMsg = new FailMsg(FailMsg.CancelType.ETL_RUN_FAIL);
|
||||
}
|
||||
@ -58,18 +59,17 @@ public class BrokerLoadPendingTask extends LoadTask {
|
||||
getAllFileStatus();
|
||||
}
|
||||
|
||||
private void getAllFileStatus()
|
||||
throws UserException {
|
||||
private void getAllFileStatus() throws UserException {
|
||||
long start = System.currentTimeMillis();
|
||||
long totalFileSize = 0;
|
||||
int totalFileNum = 0;
|
||||
for (Map.Entry<Long, List<BrokerFileGroup>> entry : tableToBrokerFileList.entrySet()) {
|
||||
long tableId = entry.getKey();
|
||||
for (Map.Entry<FileGroupAggKey, List<BrokerFileGroup>> entry : aggKeyToBrokerFileGroups.entrySet()) {
|
||||
FileGroupAggKey aggKey = entry.getKey();
|
||||
List<BrokerFileGroup> fileGroups = entry.getValue();
|
||||
|
||||
List<List<TBrokerFileStatus>> fileStatusList = Lists.newArrayList();
|
||||
List<BrokerFileGroup> fileGroups = entry.getValue();
|
||||
long tableTotalFileSize = 0;
|
||||
int tabletotalFileNum = 0;
|
||||
int tableTotalFileNum = 0;
|
||||
int groupNum = 0;
|
||||
for (BrokerFileGroup fileGroup : fileGroups) {
|
||||
long groupFileSize = 0;
|
||||
@ -86,17 +86,17 @@ public class BrokerLoadPendingTask extends LoadTask {
|
||||
}
|
||||
}
|
||||
tableTotalFileSize += groupFileSize;
|
||||
tabletotalFileNum += fileStatuses.size();
|
||||
tableTotalFileNum += fileStatuses.size();
|
||||
LOG.info("get {} files in file group {} for table {}. size: {}. job: {}",
|
||||
fileStatuses.size(), groupNum, entry.getKey(), groupFileSize, callback.getCallbackId());
|
||||
groupNum++;
|
||||
}
|
||||
|
||||
totalFileSize += tableTotalFileSize;
|
||||
totalFileNum += tabletotalFileNum;
|
||||
((BrokerPendingTaskAttachment) attachment).addFileStatus(tableId, fileStatusList);
|
||||
totalFileNum += tableTotalFileNum;
|
||||
((BrokerPendingTaskAttachment) attachment).addFileStatus(aggKey, fileStatusList);
|
||||
LOG.info("get {} files to be loaded. total size: {}. cost: {} ms, job: {}",
|
||||
tabletotalFileNum, tableTotalFileSize, (System.currentTimeMillis() - start), callback.getCallbackId());
|
||||
tableTotalFileNum, tableTotalFileSize, (System.currentTimeMillis() - start), callback.getCallbackId());
|
||||
}
|
||||
|
||||
((BrokerLoadJob) callback).setLoadFileInfo(totalFileNum, totalFileSize);
|
||||
|
||||
@ -17,6 +17,7 @@
|
||||
|
||||
package org.apache.doris.load.loadv2;
|
||||
|
||||
import org.apache.doris.load.BrokerFileGroupAggInfo.FileGroupAggKey;
|
||||
import org.apache.doris.thrift.TBrokerFileStatus;
|
||||
|
||||
import com.google.common.collect.Maps;
|
||||
@ -26,25 +27,25 @@ import java.util.Map;
|
||||
|
||||
public class BrokerPendingTaskAttachment extends TaskAttachment {
|
||||
|
||||
// table id -> file status
|
||||
private Map<Long, List<List<TBrokerFileStatus>>> fileStatusMap = Maps.newHashMap();
|
||||
// table id -> total file num
|
||||
private Map<Long, Integer> fileNumMap = Maps.newHashMap();
|
||||
// FileGroupAggKey -> status of files group by FileGroup
|
||||
private Map<FileGroupAggKey, List<List<TBrokerFileStatus>>> fileStatusMap = Maps.newHashMap();
|
||||
// FileGroupAggKey -> total file num
|
||||
private Map<FileGroupAggKey, Integer> fileNumMap = Maps.newHashMap();
|
||||
|
||||
public BrokerPendingTaskAttachment(long taskId) {
|
||||
super(taskId);
|
||||
}
|
||||
|
||||
public void addFileStatus(long tableId, List<List<TBrokerFileStatus>> fileStatusList) {
|
||||
fileStatusMap.put(tableId, fileStatusList);
|
||||
fileNumMap.put(tableId, fileStatusList.stream().mapToInt(entity -> entity.size()).sum());
|
||||
public void addFileStatus(FileGroupAggKey aggKey, List<List<TBrokerFileStatus>> fileStatusList) {
|
||||
fileStatusMap.put(aggKey, fileStatusList);
|
||||
fileNumMap.put(aggKey, fileStatusList.stream().mapToInt(entity -> entity.size()).sum());
|
||||
}
|
||||
|
||||
public List<List<TBrokerFileStatus>> getFileStatusByTable(long tableId) {
|
||||
return fileStatusMap.get(tableId);
|
||||
public List<List<TBrokerFileStatus>> getFileStatusByTable(FileGroupAggKey aggKey) {
|
||||
return fileStatusMap.get(aggKey);
|
||||
}
|
||||
|
||||
public int getFileNumByTable(long tableId) {
|
||||
return fileNumMap.get(tableId);
|
||||
public int getFileNumByTable(FileGroupAggKey aggKey) {
|
||||
return fileNumMap.get(aggKey);
|
||||
}
|
||||
}
|
||||
|
||||
@ -26,10 +26,7 @@ import org.apache.doris.analysis.SlotRef;
|
||||
import org.apache.doris.analysis.TupleDescriptor;
|
||||
import org.apache.doris.catalog.Catalog;
|
||||
import org.apache.doris.catalog.Column;
|
||||
import org.apache.doris.catalog.Database;
|
||||
import org.apache.doris.catalog.OlapTable;
|
||||
import org.apache.doris.catalog.Partition;
|
||||
import org.apache.doris.catalog.Table;
|
||||
import org.apache.doris.common.LoadException;
|
||||
import org.apache.doris.common.MetaNotFoundException;
|
||||
import org.apache.doris.common.NotImplementedException;
|
||||
@ -46,15 +43,15 @@ import org.apache.doris.planner.ScanNode;
|
||||
import org.apache.doris.thrift.TBrokerFileStatus;
|
||||
import org.apache.doris.thrift.TUniqueId;
|
||||
|
||||
import com.google.common.base.Joiner;
|
||||
import com.google.common.base.Strings;
|
||||
import com.google.common.collect.Lists;
|
||||
import com.google.common.collect.Sets;
|
||||
|
||||
import org.apache.logging.log4j.LogManager;
|
||||
import org.apache.logging.log4j.Logger;
|
||||
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
import java.util.Set;
|
||||
|
||||
public class LoadingTaskPlanner {
|
||||
private static final Logger LOG = LogManager.getLogger(LoadingTaskPlanner.class);
|
||||
@ -122,8 +119,8 @@ public class LoadingTaskPlanner {
|
||||
descTable.computeMemLayout();
|
||||
|
||||
// 2. Olap table sink
|
||||
String partitionNames = convertBrokerDescPartitionInfo();
|
||||
OlapTableSink olapTableSink = new OlapTableSink(table, tupleDesc, partitionNames);
|
||||
List<Long> partitionIds = getAllPartitionIds();
|
||||
OlapTableSink olapTableSink = new OlapTableSink(table, tupleDesc, partitionIds);
|
||||
olapTableSink.init(loadId, txnId, dbId, timeoutS);
|
||||
olapTableSink.finalize();
|
||||
|
||||
@ -161,52 +158,18 @@ public class LoadingTaskPlanner {
|
||||
return analyzer.getTimezone();
|
||||
}
|
||||
|
||||
private String convertBrokerDescPartitionInfo() throws LoadException, MetaNotFoundException {
|
||||
String result = "";
|
||||
private List<Long> getAllPartitionIds() throws LoadException, MetaNotFoundException {
|
||||
Set<Long> partitionIds = Sets.newHashSet();
|
||||
for (BrokerFileGroup brokerFileGroup : fileGroups) {
|
||||
List<String> partitionNames = getPartitionNames(brokerFileGroup);
|
||||
if (partitionNames == null) {
|
||||
continue;
|
||||
if (brokerFileGroup.getPartitionIds() != null) {
|
||||
partitionIds.addAll(brokerFileGroup.getPartitionIds());
|
||||
}
|
||||
result += Joiner.on(",").join(partitionNames);
|
||||
result += ",";
|
||||
// all file group in fileGroups should have same partitions, so only need to get partition ids
|
||||
// from one of these file groups
|
||||
break;
|
||||
}
|
||||
if (Strings.isNullOrEmpty(result)) {
|
||||
return null;
|
||||
}
|
||||
result = result.substring(0, result.length() - 1);
|
||||
return result;
|
||||
}
|
||||
|
||||
private List<String> getPartitionNames(BrokerFileGroup brokerFileGroup)
|
||||
throws MetaNotFoundException, LoadException {
|
||||
Database database = Catalog.getCurrentCatalog().getDb(dbId);
|
||||
if (database == null) {
|
||||
throw new MetaNotFoundException("Database " + dbId + " has been deleted when broker loading");
|
||||
}
|
||||
Table table = database.getTable(brokerFileGroup.getTableId());
|
||||
if (table == null) {
|
||||
throw new MetaNotFoundException("Table " + brokerFileGroup.getTableId()
|
||||
+ " has been deleted when broker loading");
|
||||
}
|
||||
if (!(table instanceof OlapTable)) {
|
||||
throw new LoadException("Only olap table is supported in broker load");
|
||||
}
|
||||
OlapTable olapTable = (OlapTable) table;
|
||||
List<Long> partitionIds = brokerFileGroup.getPartitionIds();
|
||||
if (partitionIds == null || partitionIds.isEmpty()) {
|
||||
return null;
|
||||
}
|
||||
List<String> result = Lists.newArrayList();
|
||||
for (long partitionId : brokerFileGroup.getPartitionIds()) {
|
||||
Partition partition = olapTable.getPartition(partitionId);
|
||||
if (partition == null) {
|
||||
throw new MetaNotFoundException("Unknown partition(" + partitionId + ") in table("
|
||||
+ table.getName() + ")");
|
||||
}
|
||||
result.add(partition.getName());
|
||||
}
|
||||
return result;
|
||||
return Lists.newArrayList(partitionIds);
|
||||
}
|
||||
|
||||
// when retry load by reusing this plan in load process, the load_id should be changed
|
||||
|
||||
@ -55,19 +55,16 @@ import org.apache.doris.thrift.TTabletLocation;
|
||||
import org.apache.doris.thrift.TUniqueId;
|
||||
|
||||
import com.google.common.base.Preconditions;
|
||||
import com.google.common.base.Strings;
|
||||
import com.google.common.collect.HashMultimap;
|
||||
import com.google.common.collect.Lists;
|
||||
import com.google.common.collect.Multimap;
|
||||
import com.google.common.collect.Range;
|
||||
import com.google.common.collect.Sets;
|
||||
|
||||
import org.apache.logging.log4j.LogManager;
|
||||
import org.apache.logging.log4j.Logger;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
public class OlapTableSink extends DataSink {
|
||||
@ -76,16 +73,19 @@ public class OlapTableSink extends DataSink {
|
||||
// input variables
|
||||
private OlapTable dstTable;
|
||||
private TupleDescriptor tupleDescriptor;
|
||||
private String partitions;
|
||||
private Set<String> partitionSet;
|
||||
// specified partition ids. empty means partition does not specified, so all partitions will be included.
|
||||
private List<Long> partitionIds;
|
||||
|
||||
// set after init called
|
||||
private TDataSink tDataSink;
|
||||
|
||||
public OlapTableSink(OlapTable dstTable, TupleDescriptor tupleDescriptor, String partitions) {
|
||||
public OlapTableSink(OlapTable dstTable, TupleDescriptor tupleDescriptor, List<Long> partitionIds) {
|
||||
this.dstTable = dstTable;
|
||||
this.tupleDescriptor = tupleDescriptor;
|
||||
this.partitions = Strings.emptyToNull(partitions);
|
||||
this.partitionIds = partitionIds;
|
||||
if (this.partitionIds == null) {
|
||||
this.partitionIds = Lists.newArrayList();
|
||||
}
|
||||
}
|
||||
|
||||
public void init(TUniqueId loadId, long txnId, long dbId, long loadChannelTimeoutS) throws AnalysisException {
|
||||
@ -99,22 +99,14 @@ public class OlapTableSink extends DataSink {
|
||||
tDataSink.setOlap_table_sink(tSink);
|
||||
|
||||
// check partition
|
||||
if (partitions != null) {
|
||||
if (dstTable.getPartitionInfo().getType() == PartitionType.UNPARTITIONED) {
|
||||
ErrorReport.reportAnalysisException(ErrorCode.ERR_PARTITION_CLAUSE_NO_ALLOWED);
|
||||
}
|
||||
partitionSet = Sets.newHashSet();
|
||||
String[] partNames = partitions.trim().split("\\s*,\\s*");
|
||||
for (String partName : partNames) {
|
||||
Partition part = dstTable.getPartition(partName);
|
||||
if (part == null) {
|
||||
ErrorReport.reportAnalysisException(
|
||||
ErrorCode.ERR_UNKNOWN_PARTITION, partName, dstTable.getName());
|
||||
}
|
||||
partitionSet.add(partName);
|
||||
}
|
||||
if (partitionSet.isEmpty()) {
|
||||
ErrorReport.reportAnalysisException(ErrorCode.ERR_PARTITION_CLAUSE_ON_NONPARTITIONED);
|
||||
if (!partitionIds.isEmpty() && dstTable.getPartitionInfo().getType() == PartitionType.UNPARTITIONED) {
|
||||
ErrorReport.reportAnalysisException(ErrorCode.ERR_PARTITION_CLAUSE_NO_ALLOWED);
|
||||
}
|
||||
|
||||
for (Long partitionId : partitionIds) {
|
||||
Partition part = dstTable.getPartition(partitionId);
|
||||
if (part == null) {
|
||||
ErrorReport.reportAnalysisException(ErrorCode.ERR_UNKNOWN_PARTITION, partitionId, dstTable.getName());
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -228,7 +220,7 @@ public class OlapTableSink extends DataSink {
|
||||
int partColNum = rangePartitionInfo.getPartitionColumns().size();
|
||||
DistributionInfo selectedDistInfo = null;
|
||||
for (Partition partition : table.getPartitions()) {
|
||||
if (partitionSet != null && !partitionSet.contains(partition.getName())) {
|
||||
if (!partitionIds.isEmpty() && !partitionIds.contains(partition.getId())) {
|
||||
continue;
|
||||
}
|
||||
TOlapTablePartition tPartition = new TOlapTablePartition();
|
||||
|
||||
@ -26,7 +26,10 @@ import org.apache.doris.catalog.Catalog;
|
||||
import org.apache.doris.catalog.Column;
|
||||
import org.apache.doris.catalog.Database;
|
||||
import org.apache.doris.catalog.OlapTable;
|
||||
import org.apache.doris.catalog.Partition;
|
||||
import org.apache.doris.common.DdlException;
|
||||
import org.apache.doris.common.ErrorCode;
|
||||
import org.apache.doris.common.ErrorReport;
|
||||
import org.apache.doris.common.UserException;
|
||||
import org.apache.doris.load.LoadErrorHub;
|
||||
import org.apache.doris.task.StreamLoadTask;
|
||||
@ -107,7 +110,8 @@ public class StreamLoadPlanner {
|
||||
scanNode.finalize(analyzer);
|
||||
|
||||
// create dest sink
|
||||
OlapTableSink olapTableSink = new OlapTableSink(destTable, tupleDesc, streamLoadTask.getPartitions());
|
||||
List<Long> partitionIds = getAllPartitionIds();
|
||||
OlapTableSink olapTableSink = new OlapTableSink(destTable, tupleDesc, partitionIds);
|
||||
olapTableSink.init(loadId, streamLoadTask.getTxnId(), db.getId(), streamLoadTask.getTimeout());
|
||||
olapTableSink.finalize();
|
||||
|
||||
@ -166,4 +170,22 @@ public class StreamLoadPlanner {
|
||||
// LOG.debug("stream load txn id: {}, plan: {}", streamLoadTask.getTxnId(), params);
|
||||
return params;
|
||||
}
|
||||
|
||||
// get all specified partition ids. return an empty list if no partition is specified.
|
||||
private List<Long> getAllPartitionIds() throws DdlException {
|
||||
List<Long> partitionIds = Lists.newArrayList();
|
||||
|
||||
String partitionsStr = streamLoadTask.getPartitions();
|
||||
if (partitionsStr != null) {
|
||||
String[] partNames = partitionsStr.trim().split("\\s*,\\s*");
|
||||
for (String partName : partNames) {
|
||||
Partition part = destTable.getPartition(partName);
|
||||
if (part == null) {
|
||||
ErrorReport.reportDdlException(ErrorCode.ERR_UNKNOWN_PARTITION, partName, destTable.getName());
|
||||
}
|
||||
partitionIds.add(part.getId());
|
||||
}
|
||||
}
|
||||
return partitionIds;
|
||||
}
|
||||
}
|
||||
|
||||
@ -1,132 +0,0 @@
|
||||
// Licensed to the Apache Software Foundation (ASF) under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing,
|
||||
// software distributed under the License is distributed on an
|
||||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
package org.apache.doris.task;
|
||||
|
||||
import org.apache.doris.catalog.Catalog;
|
||||
import org.apache.doris.catalog.OlapTable;
|
||||
import org.apache.doris.common.DdlException;
|
||||
import org.apache.doris.common.UserException;
|
||||
import org.apache.doris.common.util.BrokerUtil;
|
||||
import org.apache.doris.load.BrokerFileGroup;
|
||||
import org.apache.doris.load.EtlSubmitResult;
|
||||
import org.apache.doris.load.LoadJob;
|
||||
import org.apache.doris.load.TableLoadInfo;
|
||||
import org.apache.doris.thrift.TBrokerFileStatus;
|
||||
import org.apache.doris.thrift.TStatus;
|
||||
import org.apache.doris.thrift.TStatusCode;
|
||||
|
||||
import com.google.common.collect.Lists;
|
||||
import com.google.common.collect.Maps;
|
||||
|
||||
import org.apache.logging.log4j.LogManager;
|
||||
import org.apache.logging.log4j.Logger;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
// Making a pull load job to some tasks
|
||||
@Deprecated
|
||||
public class PullLoadPendingTask extends LoadPendingTask {
|
||||
private static final Logger LOG = LogManager.getLogger(PullLoadPendingTask.class);
|
||||
|
||||
private PullLoadJob pullLoadJob = null;
|
||||
|
||||
public PullLoadPendingTask(LoadJob job) {
|
||||
super(job);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void createEtlRequest() throws Exception {
|
||||
long jobDeadlineMs = -1;
|
||||
if (job.getTimeoutSecond() > 0) {
|
||||
jobDeadlineMs = job.getCreateTimeMs() + job.getTimeoutSecond() * 1000;
|
||||
}
|
||||
List<PullLoadTask> pullLoadTaskList = Lists.newArrayList();
|
||||
// we need to make sure that the 'Plan' used the correct schema version,
|
||||
// So, we generate task plan here
|
||||
|
||||
// first we should get file status outside the lock
|
||||
// table id -> file status
|
||||
Map<Long, List<List<TBrokerFileStatus>>> fileStatusMap = Maps.newHashMap();
|
||||
// table id -> total file num
|
||||
Map<Long, Integer> fileNumMap = Maps.newHashMap();
|
||||
getAllFileStatus(fileStatusMap, fileNumMap);
|
||||
|
||||
db.readLock();
|
||||
try {
|
||||
int nextTaskId = 1;
|
||||
// tableId -> BrokerFileGroups
|
||||
for (Map.Entry<Long, List<BrokerFileGroup>> entry :
|
||||
job.getPullLoadSourceInfo().getIdToFileGroups().entrySet()) {
|
||||
long tableId = entry.getKey();
|
||||
OlapTable table = (OlapTable) db.getTable(tableId);
|
||||
if (table == null) {
|
||||
throw new DdlException("Unknown table(" + tableId + ") in database(" + db.getFullName() + ")");
|
||||
}
|
||||
|
||||
// Generate pull load task, one
|
||||
PullLoadTask task = new PullLoadTask(
|
||||
job.getId(), nextTaskId, db, table,
|
||||
job.getBrokerDesc(), entry.getValue(), jobDeadlineMs, job.getExecMemLimit());
|
||||
task.init(fileStatusMap.get(tableId), fileNumMap.get(tableId));
|
||||
pullLoadTaskList.add(task);
|
||||
nextTaskId++;
|
||||
|
||||
// add schema hash to table load info
|
||||
TableLoadInfo tableLoadInfo = job.getTableLoadInfo(entry.getKey());
|
||||
tableLoadInfo.addAllSchemaHash(table.getIndexIdToSchemaHash());
|
||||
}
|
||||
} finally {
|
||||
db.readUnlock();
|
||||
}
|
||||
|
||||
pullLoadJob = new PullLoadJob(job, pullLoadTaskList);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected EtlSubmitResult submitEtlJob(int retry) {
|
||||
Catalog.getInstance().getPullLoadJobMgr().submit(pullLoadJob);
|
||||
return new EtlSubmitResult(new TStatus(TStatusCode.OK), null);
|
||||
}
|
||||
|
||||
private void getAllFileStatus(Map<Long, List<List<TBrokerFileStatus>>> fileStatusMap,
|
||||
Map<Long, Integer> fileNumMap)
|
||||
throws UserException {
|
||||
for (Map.Entry<Long, List<BrokerFileGroup>> entry : job.getPullLoadSourceInfo().getIdToFileGroups().entrySet()) {
|
||||
long tableId = entry.getKey();
|
||||
|
||||
List<List<TBrokerFileStatus>> fileStatusList = Lists.newArrayList();
|
||||
int filesAdded = 0;
|
||||
List<BrokerFileGroup> fileGroups = entry.getValue();
|
||||
for (BrokerFileGroup fileGroup : fileGroups) {
|
||||
List<TBrokerFileStatus> fileStatuses = Lists.newArrayList();
|
||||
for (String path : fileGroup.getFilePaths()) {
|
||||
BrokerUtil.parseBrokerFile(path, job.getBrokerDesc(), fileStatuses);
|
||||
}
|
||||
fileStatusList.add(fileStatuses);
|
||||
filesAdded += fileStatuses.size();
|
||||
for (TBrokerFileStatus fstatus : fileStatuses) {
|
||||
LOG.info("pull load job: {}. Add file status is {}", job.getId(), fstatus);
|
||||
}
|
||||
}
|
||||
|
||||
fileStatusMap.put(tableId, fileStatusList);
|
||||
fileNumMap.put(tableId, filesAdded);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -0,0 +1,119 @@
|
||||
// Licensed to the Apache Software Foundation (ASF) under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing,
|
||||
// software distributed under the License is distributed on an
|
||||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
package org.apache.doris.load.loadv2;
|
||||
|
||||
import org.apache.doris.common.DdlException;
|
||||
import org.apache.doris.common.jmockit.Deencapsulation;
|
||||
import org.apache.doris.load.BrokerFileGroup;
|
||||
import org.apache.doris.load.BrokerFileGroupAggInfo;
|
||||
import org.apache.doris.load.BrokerFileGroupAggInfo.FileGroupAggKey;
|
||||
|
||||
import com.google.common.collect.Lists;
|
||||
|
||||
import org.junit.Assert;
|
||||
import org.junit.Test;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
public class BrokerFileGroupAggInfoTest {
|
||||
|
||||
@Test
|
||||
public void test1() throws DdlException {
|
||||
/*
|
||||
* data description:
|
||||
* table 1 -> partition[10] file1
|
||||
* table 1 -> partition[10] file2
|
||||
* table 2 -> partition[] file3
|
||||
* table 3 -> partition[11, 12] file4
|
||||
*
|
||||
* output:
|
||||
* table 1 -> partition[10] (file1, file2)
|
||||
* table 2 -> partition[] file3
|
||||
* table 3 -> partition[11, 12] file4
|
||||
*/
|
||||
BrokerFileGroupAggInfo brokerFileGroupAggInfo = new BrokerFileGroupAggInfo();
|
||||
|
||||
BrokerFileGroup group1 = Deencapsulation.newInstance(BrokerFileGroup.class);
|
||||
Deencapsulation.setField(group1, "tableId", 1L);
|
||||
Deencapsulation.setField(group1, "partitionIds", Lists.newArrayList(10L));
|
||||
|
||||
BrokerFileGroup group2 = Deencapsulation.newInstance(BrokerFileGroup.class);
|
||||
Deencapsulation.setField(group2, "tableId", 1L);
|
||||
Deencapsulation.setField(group2, "partitionIds", Lists.newArrayList(10L));
|
||||
|
||||
BrokerFileGroup group3 = Deencapsulation.newInstance(BrokerFileGroup.class);
|
||||
Deencapsulation.setField(group3, "tableId", 2L);
|
||||
Deencapsulation.setField(group3, "partitionIds", Lists.newArrayList());
|
||||
|
||||
BrokerFileGroup group4 = Deencapsulation.newInstance(BrokerFileGroup.class);
|
||||
Deencapsulation.setField(group4, "tableId", 3L);
|
||||
Deencapsulation.setField(group4, "partitionIds", Lists.newArrayList(11L, 12L));
|
||||
|
||||
BrokerFileGroup group5 = Deencapsulation.newInstance(BrokerFileGroup.class);
|
||||
Deencapsulation.setField(group5, "tableId", 4L);
|
||||
Deencapsulation.setField(group5, "partitionIds", null);
|
||||
|
||||
brokerFileGroupAggInfo.addFileGroup(group1);
|
||||
brokerFileGroupAggInfo.addFileGroup(group2);
|
||||
brokerFileGroupAggInfo.addFileGroup(group3);
|
||||
brokerFileGroupAggInfo.addFileGroup(group4);
|
||||
brokerFileGroupAggInfo.addFileGroup(group5);
|
||||
|
||||
Map<FileGroupAggKey, List<BrokerFileGroup>> map = brokerFileGroupAggInfo.getAggKeyToFileGroups();
|
||||
Assert.assertEquals(4, map.keySet().size());
|
||||
FileGroupAggKey aggKey = new FileGroupAggKey(1L, Lists.newArrayList(10L));
|
||||
Assert.assertEquals(2, map.get(aggKey).size());
|
||||
aggKey = new FileGroupAggKey(2L, Lists.newArrayList());
|
||||
Assert.assertEquals(1, map.get(aggKey).size());
|
||||
aggKey = new FileGroupAggKey(3L, Lists.newArrayList(11L, 12L));
|
||||
Assert.assertEquals(1, map.get(aggKey).size());
|
||||
aggKey = new FileGroupAggKey(4L, Lists.newArrayList());
|
||||
Assert.assertEquals(1, map.get(aggKey).size());
|
||||
}
|
||||
|
||||
@Test(expected = DdlException.class)
|
||||
public void test2() throws DdlException {
|
||||
/*
|
||||
* data description:
|
||||
* table 1 -> partition[10, 11] file1
|
||||
* table 1 -> partition[11, 12] file2
|
||||
* table 2 -> partition[] file3
|
||||
*
|
||||
* output:
|
||||
* throw exception
|
||||
*/
|
||||
BrokerFileGroupAggInfo brokerFileGroupAggInfo = new BrokerFileGroupAggInfo();
|
||||
|
||||
BrokerFileGroup group1 = Deencapsulation.newInstance(BrokerFileGroup.class);
|
||||
Deencapsulation.setField(group1, "tableId", 1L);
|
||||
Deencapsulation.setField(group1, "partitionIds", Lists.newArrayList(10L, 11L));
|
||||
|
||||
BrokerFileGroup group2 = Deencapsulation.newInstance(BrokerFileGroup.class);
|
||||
Deencapsulation.setField(group2, "tableId", 1L);
|
||||
Deencapsulation.setField(group2, "partitionIds", Lists.newArrayList(11L, 12L));
|
||||
|
||||
BrokerFileGroup group3 = Deencapsulation.newInstance(BrokerFileGroup.class);
|
||||
Deencapsulation.setField(group3, "tableId", 2L);
|
||||
Deencapsulation.setField(group3, "partitionIds", Lists.newArrayList());
|
||||
|
||||
brokerFileGroupAggInfo.addFileGroup(group1);
|
||||
brokerFileGroupAggInfo.addFileGroup(group2);
|
||||
}
|
||||
|
||||
}
|
||||
@ -29,10 +29,11 @@ import org.apache.doris.common.DdlException;
|
||||
import org.apache.doris.common.MetaNotFoundException;
|
||||
import org.apache.doris.common.jmockit.Deencapsulation;
|
||||
import org.apache.doris.load.BrokerFileGroup;
|
||||
import org.apache.doris.load.BrokerFileGroupAggInfo;
|
||||
import org.apache.doris.load.BrokerFileGroupAggInfo.FileGroupAggKey;
|
||||
import org.apache.doris.load.EtlJobType;
|
||||
import org.apache.doris.load.EtlStatus;
|
||||
import org.apache.doris.load.Load;
|
||||
import org.apache.doris.load.PullLoadSourceInfo;
|
||||
import org.apache.doris.load.Source;
|
||||
import org.apache.doris.task.MasterTaskExecutor;
|
||||
import org.apache.doris.transaction.TransactionState;
|
||||
@ -172,23 +173,27 @@ public class BrokerLoadJobTest {
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testGetTableNames(@Injectable PullLoadSourceInfo dataSourceInfo,
|
||||
public void testGetTableNames(@Injectable BrokerFileGroupAggInfo fileGroupAggInfo,
|
||||
@Injectable BrokerFileGroup brokerFileGroup,
|
||||
@Mocked Catalog catalog,
|
||||
@Injectable Database database,
|
||||
@Injectable Table table) throws MetaNotFoundException {
|
||||
List<BrokerFileGroup> brokerFileGroups = Lists.newArrayList();
|
||||
brokerFileGroups.add(brokerFileGroup);
|
||||
Map<Long, List<BrokerFileGroup>> idToFileGroups = Maps.newHashMap();
|
||||
idToFileGroups.put(1L, brokerFileGroups);
|
||||
Map<FileGroupAggKey, List<BrokerFileGroup>> aggKeyToFileGroups = Maps.newHashMap();
|
||||
FileGroupAggKey aggKey = new FileGroupAggKey(1L, null);
|
||||
aggKeyToFileGroups.put(aggKey, brokerFileGroups);
|
||||
BrokerLoadJob brokerLoadJob = new BrokerLoadJob();
|
||||
Deencapsulation.setField(brokerLoadJob, "dataSourceInfo", dataSourceInfo);
|
||||
Deencapsulation.setField(brokerLoadJob, "fileGroupAggInfo", fileGroupAggInfo);
|
||||
String tableName = "table";
|
||||
new Expectations() {
|
||||
{
|
||||
dataSourceInfo.getIdToFileGroups();
|
||||
fileGroupAggInfo.getAggKeyToFileGroups();
|
||||
minTimes = 0;
|
||||
result = idToFileGroups;
|
||||
result = aggKeyToFileGroups;
|
||||
fileGroupAggInfo.getAllTableIds();
|
||||
minTimes = 0;
|
||||
result = Sets.newHashSet(1L);
|
||||
catalog.getDb(anyLong);
|
||||
minTimes = 0;
|
||||
result = database;
|
||||
@ -249,7 +254,7 @@ public class BrokerLoadJobTest {
|
||||
public void testPendingTaskOnFinished(@Injectable BrokerPendingTaskAttachment attachment,
|
||||
@Mocked Catalog catalog,
|
||||
@Injectable Database database,
|
||||
@Injectable PullLoadSourceInfo dataSourceInfo,
|
||||
@Injectable BrokerFileGroupAggInfo fileGroupAggInfo,
|
||||
@Injectable BrokerFileGroup brokerFileGroup1,
|
||||
@Injectable BrokerFileGroup brokerFileGroup2,
|
||||
@Injectable BrokerFileGroup brokerFileGroup3,
|
||||
@ -261,15 +266,22 @@ public class BrokerLoadJobTest {
|
||||
long taskId = 1L;
|
||||
long tableId1 = 1L;
|
||||
long tableId2 = 2L;
|
||||
Map<Long, List<BrokerFileGroup>> idToFileGroups = Maps.newHashMap();
|
||||
long partitionId1 = 3L;
|
||||
long partitionId2 = 4;
|
||||
|
||||
Map<FileGroupAggKey, List<BrokerFileGroup>> aggKeyToFileGroups = Maps.newHashMap();
|
||||
List<BrokerFileGroup> fileGroups1 = Lists.newArrayList();
|
||||
fileGroups1.add(brokerFileGroup1);
|
||||
idToFileGroups.put(tableId1, fileGroups1);
|
||||
aggKeyToFileGroups.put(new FileGroupAggKey(tableId1, null), fileGroups1);
|
||||
|
||||
List<BrokerFileGroup> fileGroups2 = Lists.newArrayList();
|
||||
fileGroups2.add(brokerFileGroup2);
|
||||
fileGroups2.add(brokerFileGroup3);
|
||||
idToFileGroups.put(tableId2, fileGroups2);
|
||||
Deencapsulation.setField(brokerLoadJob, "dataSourceInfo", dataSourceInfo);
|
||||
aggKeyToFileGroups.put(new FileGroupAggKey(tableId2, Lists.newArrayList(partitionId1)), fileGroups2);
|
||||
// add another file groups with different partition id
|
||||
aggKeyToFileGroups.put(new FileGroupAggKey(tableId2, Lists.newArrayList(partitionId2)), fileGroups2);
|
||||
|
||||
Deencapsulation.setField(brokerLoadJob, "fileGroupAggInfo", fileGroupAggInfo);
|
||||
new Expectations() {
|
||||
{
|
||||
attachment.getTaskId();
|
||||
@ -278,9 +290,9 @@ public class BrokerLoadJobTest {
|
||||
catalog.getDb(anyLong);
|
||||
minTimes = 0;
|
||||
result = database;
|
||||
dataSourceInfo.getIdToFileGroups();
|
||||
fileGroupAggInfo.getAggKeyToFileGroups();
|
||||
minTimes = 0;
|
||||
result = idToFileGroups;
|
||||
result = aggKeyToFileGroups;
|
||||
database.getTable(anyLong);
|
||||
minTimes = 0;
|
||||
result = olapTable;
|
||||
@ -288,6 +300,7 @@ public class BrokerLoadJobTest {
|
||||
minTimes = 0;
|
||||
result = 1L;
|
||||
result = 2L;
|
||||
result = 3L;
|
||||
}
|
||||
};
|
||||
|
||||
@ -296,7 +309,7 @@ public class BrokerLoadJobTest {
|
||||
Assert.assertEquals(1, finishedTaskIds.size());
|
||||
Assert.assertEquals(true, finishedTaskIds.contains(taskId));
|
||||
Map<Long, LoadTask> idToTasks = Deencapsulation.getField(brokerLoadJob, "idToTasks");
|
||||
Assert.assertEquals(2, idToTasks.size());
|
||||
Assert.assertEquals(3, idToTasks.size());
|
||||
}
|
||||
|
||||
@Test
|
||||
|
||||
@ -20,9 +20,10 @@ package org.apache.doris.load.loadv2;
|
||||
import org.apache.doris.analysis.BrokerDesc;
|
||||
import org.apache.doris.catalog.Catalog;
|
||||
import org.apache.doris.common.UserException;
|
||||
import org.apache.doris.common.util.BrokerUtil;
|
||||
import org.apache.doris.common.jmockit.Deencapsulation;
|
||||
import org.apache.doris.common.util.BrokerUtil;
|
||||
import org.apache.doris.load.BrokerFileGroup;
|
||||
import org.apache.doris.load.BrokerFileGroupAggInfo.FileGroupAggKey;
|
||||
import org.apache.doris.thrift.TBrokerFileStatus;
|
||||
|
||||
import com.google.common.collect.Lists;
|
||||
@ -48,10 +49,11 @@ public class BrokerLoadPendingTaskTest {
|
||||
@Injectable BrokerDesc brokerDesc,
|
||||
@Mocked Catalog catalog,
|
||||
@Injectable TBrokerFileStatus tBrokerFileStatus) throws UserException {
|
||||
Map<Long, List<BrokerFileGroup>> tableToFileGroups = Maps.newHashMap();
|
||||
Map<FileGroupAggKey, List<BrokerFileGroup>> aggKeyToFileGroups = Maps.newHashMap();
|
||||
List<BrokerFileGroup> brokerFileGroups = Lists.newArrayList();
|
||||
brokerFileGroups.add(brokerFileGroup);
|
||||
tableToFileGroups.put(1L, brokerFileGroups);
|
||||
FileGroupAggKey aggKey = new FileGroupAggKey(1L, null);
|
||||
aggKeyToFileGroups.put(aggKey, brokerFileGroups);
|
||||
new Expectations() {
|
||||
{
|
||||
catalog.getNextId();
|
||||
@ -67,10 +69,10 @@ public class BrokerLoadPendingTaskTest {
|
||||
}
|
||||
};
|
||||
|
||||
BrokerLoadPendingTask brokerLoadPendingTask = new BrokerLoadPendingTask(brokerLoadJob, tableToFileGroups, brokerDesc);
|
||||
BrokerLoadPendingTask brokerLoadPendingTask = new BrokerLoadPendingTask(brokerLoadJob, aggKeyToFileGroups, brokerDesc);
|
||||
brokerLoadPendingTask.executeTask();
|
||||
BrokerPendingTaskAttachment brokerPendingTaskAttachment = Deencapsulation.getField(brokerLoadPendingTask, "attachment");
|
||||
Assert.assertEquals(1, brokerPendingTaskAttachment.getFileNumByTable(1L));
|
||||
Assert.assertEquals(tBrokerFileStatus, brokerPendingTaskAttachment.getFileStatusByTable(1L).get(0).get(0));
|
||||
Assert.assertEquals(1, brokerPendingTaskAttachment.getFileNumByTable(aggKey));
|
||||
Assert.assertEquals(tBrokerFileStatus, brokerPendingTaskAttachment.getFileStatusByTable(aggKey).get(0).get(0));
|
||||
}
|
||||
}
|
||||
|
||||
@ -99,7 +99,7 @@ public class OlapTableSinkTest {
|
||||
dstTable.getPartitions(); result = Lists.newArrayList(partition);
|
||||
}};
|
||||
|
||||
OlapTableSink sink = new OlapTableSink(dstTable, tuple, "");
|
||||
OlapTableSink sink = new OlapTableSink(dstTable, tuple, Lists.newArrayList());
|
||||
sink.init(new TUniqueId(1, 2), 3, 4, 1000);
|
||||
sink.finalize();
|
||||
LOG.info("sink is {}", sink.toThrift());
|
||||
@ -126,16 +126,12 @@ public class OlapTableSinkTest {
|
||||
partInfo.getType(); result = PartitionType.RANGE;
|
||||
partInfo.getPartitionColumns(); result = Lists.newArrayList(partKey);
|
||||
partInfo.getRange(1); result = Range.lessThan(key);
|
||||
// partInfo.getRange(2); result = Range.atLeast(key);
|
||||
dstTable.getPartitions(); result = Lists.newArrayList(p1, p2);
|
||||
dstTable.getPartition("p1"); result = p1;
|
||||
|
||||
dstTable.getPartition(p1.getId()); result = p1;
|
||||
index.getTablets(); result = Lists.newArrayList(new Tablet(1));
|
||||
// systemInfoService.getBackendIds(anyBoolean); result = Lists.newArrayList(new Long(1));
|
||||
// systemInfoService.getBackend(new Long(1)); result = new Backend(1, "abc", 1234);
|
||||
}};
|
||||
|
||||
OlapTableSink sink = new OlapTableSink(dstTable, tuple, "p1");
|
||||
OlapTableSink sink = new OlapTableSink(dstTable, tuple, Lists.newArrayList(p1.getId()));
|
||||
sink.init(new TUniqueId(1, 2), 3, 4, 1000);
|
||||
try {
|
||||
sink.finalize();
|
||||
@ -152,12 +148,13 @@ public class OlapTableSinkTest {
|
||||
@Injectable MaterializedIndex index) throws UserException {
|
||||
TupleDescriptor tuple = getTuple();
|
||||
|
||||
long unknownPartId = 12345L;
|
||||
new Expectations() {{
|
||||
partInfo.getType(); result = PartitionType.RANGE;
|
||||
dstTable.getPartition("p3"); result = null;
|
||||
dstTable.getPartition(unknownPartId); result = null;
|
||||
}};
|
||||
|
||||
OlapTableSink sink = new OlapTableSink(dstTable, tuple, "p3");
|
||||
OlapTableSink sink = new OlapTableSink(dstTable, tuple, Lists.newArrayList(unknownPartId));
|
||||
sink.init(new TUniqueId(1, 2), 3, 4, 1000);
|
||||
sink.finalize();
|
||||
LOG.info("sink is {}", sink.toThrift());
|
||||
@ -174,7 +171,7 @@ public class OlapTableSinkTest {
|
||||
partInfo.getType(); result = PartitionType.UNPARTITIONED;
|
||||
}};
|
||||
|
||||
OlapTableSink sink = new OlapTableSink(dstTable, tuple, "p1");
|
||||
OlapTableSink sink = new OlapTableSink(dstTable, tuple, Lists.newArrayList(1L));
|
||||
sink.init(new TUniqueId(1, 2), 3, 4, 1000);
|
||||
sink.finalize();
|
||||
LOG.info("sink is {}", sink.toThrift());
|
||||
|
||||
Reference in New Issue
Block a user