[improvement](insert) refactor group commit stream load (#25560)
This commit is contained in:
@ -48,6 +48,7 @@ import org.apache.doris.mysql.privilege.PrivPredicate;
|
||||
import org.apache.doris.planner.DataPartition;
|
||||
import org.apache.doris.planner.DataSink;
|
||||
import org.apache.doris.planner.ExportSink;
|
||||
import org.apache.doris.planner.GroupCommitBlockSink;
|
||||
import org.apache.doris.planner.GroupCommitOlapTableSink;
|
||||
import org.apache.doris.planner.OlapTableSink;
|
||||
import org.apache.doris.planner.StreamLoadPlanner;
|
||||
@ -167,6 +168,7 @@ public class NativeInsertStmt extends InsertStmt {
|
||||
private long tableId = -1;
|
||||
// true if be generates an insert from group commit tvf stmt and executes to load data
|
||||
public boolean isGroupCommitTvf = false;
|
||||
public boolean isGroupCommitStreamLoadSql = false;
|
||||
|
||||
private boolean isFromDeleteOrUpdateStmt = false;
|
||||
|
||||
@ -933,10 +935,17 @@ public class NativeInsertStmt extends InsertStmt {
|
||||
}
|
||||
if (targetTable instanceof OlapTable) {
|
||||
checkInnerGroupCommit();
|
||||
OlapTableSink sink = isGroupCommitTvf ? new GroupCommitOlapTableSink((OlapTable) targetTable, olapTuple,
|
||||
targetPartitionIds, analyzer.getContext().getSessionVariable().isEnableSingleReplicaInsert())
|
||||
: new OlapTableSink((OlapTable) targetTable, olapTuple, targetPartitionIds,
|
||||
analyzer.getContext().getSessionVariable().isEnableSingleReplicaInsert());
|
||||
OlapTableSink sink;
|
||||
if (isGroupCommitTvf) {
|
||||
sink = new GroupCommitOlapTableSink((OlapTable) targetTable, olapTuple,
|
||||
targetPartitionIds, analyzer.getContext().getSessionVariable().isEnableSingleReplicaInsert());
|
||||
} else if (isGroupCommitStreamLoadSql) {
|
||||
sink = new GroupCommitBlockSink((OlapTable) targetTable, olapTuple,
|
||||
targetPartitionIds, analyzer.getContext().getSessionVariable().isEnableSingleReplicaInsert());
|
||||
} else {
|
||||
sink = new OlapTableSink((OlapTable) targetTable, olapTuple, targetPartitionIds,
|
||||
analyzer.getContext().getSessionVariable().isEnableSingleReplicaInsert());
|
||||
}
|
||||
dataSink = sink;
|
||||
sink.setPartialUpdateInputColumns(isPartialUpdate, partialUpdateCols);
|
||||
dataPartition = dataSink.getOutputPartition();
|
||||
@ -1092,7 +1101,8 @@ public class NativeInsertStmt extends InsertStmt {
|
||||
streamLoadPutRequest.setDb(db.getFullName()).setMaxFilterRatio(1)
|
||||
.setTbl(getTbl())
|
||||
.setFileType(TFileType.FILE_STREAM).setFormatType(TFileFormatType.FORMAT_CSV_PLAIN)
|
||||
.setMergeType(TMergeType.APPEND).setThriftRpcTimeoutMs(5000).setLoadId(queryId);
|
||||
.setMergeType(TMergeType.APPEND).setThriftRpcTimeoutMs(5000).setLoadId(queryId)
|
||||
.setGroupCommit(true);
|
||||
StreamLoadTask streamLoadTask = StreamLoadTask.fromTStreamLoadPutRequest(streamLoadPutRequest);
|
||||
StreamLoadPlanner planner = new StreamLoadPlanner((Database) getDbObj(), olapTable, streamLoadTask);
|
||||
// Will using load id as query id in fragment
|
||||
|
||||
@ -0,0 +1,36 @@
|
||||
// Licensed to the Apache Software Foundation (ASF) under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing,
|
||||
// software distributed under the License is distributed on an
|
||||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
package org.apache.doris.planner;
|
||||
|
||||
import org.apache.doris.analysis.TupleDescriptor;
|
||||
import org.apache.doris.catalog.OlapTable;
|
||||
import org.apache.doris.thrift.TDataSinkType;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
public class GroupCommitBlockSink extends OlapTableSink {
|
||||
|
||||
public GroupCommitBlockSink(OlapTable dstTable, TupleDescriptor tupleDescriptor, List<Long> partitionIds,
|
||||
boolean singleReplicaLoad) {
|
||||
super(dstTable, tupleDescriptor, partitionIds, singleReplicaLoad);
|
||||
}
|
||||
|
||||
protected TDataSinkType getDataSinkType() {
|
||||
return TDataSinkType.GROUP_COMMIT_BLOCK_SINK;
|
||||
}
|
||||
}
|
||||
@ -125,6 +125,7 @@ public class OlapTableSink extends DataSink {
|
||||
tSink.setLoadId(loadId);
|
||||
tSink.setTxnId(txnId);
|
||||
tSink.setDbId(dbId);
|
||||
tSink.setBaseSchemaVersion(dstTable.getBaseSchemaVersion());
|
||||
tSink.setLoadChannelTimeoutS(loadChannelTimeoutS);
|
||||
tSink.setSendBatchParallelism(sendBatchParallelism);
|
||||
this.isStrictMode = isStrictMode;
|
||||
|
||||
@ -48,6 +48,7 @@ import org.apache.doris.load.loadv2.LoadTask;
|
||||
import org.apache.doris.load.routineload.RoutineLoadJob;
|
||||
import org.apache.doris.service.FrontendOptions;
|
||||
import org.apache.doris.task.LoadTaskInfo;
|
||||
import org.apache.doris.task.StreamLoadTask;
|
||||
import org.apache.doris.thrift.PaloInternalServiceVersion;
|
||||
import org.apache.doris.thrift.TBrokerFileStatus;
|
||||
import org.apache.doris.thrift.TExecPlanFragmentParams;
|
||||
@ -254,10 +255,15 @@ public class StreamLoadPlanner {
|
||||
|
||||
// create dest sink
|
||||
List<Long> partitionIds = getAllPartitionIds();
|
||||
OlapTableSink olapTableSink = new OlapTableSink(destTable, tupleDesc, partitionIds,
|
||||
Config.enable_single_replica_load);
|
||||
olapTableSink.init(loadId, taskInfo.getTxnId(), db.getId(), timeout,
|
||||
taskInfo.getSendBatchParallelism(), taskInfo.isLoadToSingleTablet(), taskInfo.isStrictMode());
|
||||
OlapTableSink olapTableSink;
|
||||
if (taskInfo instanceof StreamLoadTask && ((StreamLoadTask) taskInfo).isGroupCommit()) {
|
||||
olapTableSink = new GroupCommitBlockSink(destTable, tupleDesc, partitionIds,
|
||||
Config.enable_single_replica_load);
|
||||
} else {
|
||||
olapTableSink = new OlapTableSink(destTable, tupleDesc, partitionIds, Config.enable_single_replica_load);
|
||||
}
|
||||
olapTableSink.init(loadId, taskInfo.getTxnId(), db.getId(), timeout, taskInfo.getSendBatchParallelism(),
|
||||
taskInfo.isLoadToSingleTablet(), taskInfo.isStrictMode());
|
||||
olapTableSink.setPartialUpdateInputColumns(isPartialUpdate, partialUpdateInputColumns);
|
||||
olapTableSink.complete(analyzer);
|
||||
|
||||
@ -463,8 +469,13 @@ public class StreamLoadPlanner {
|
||||
|
||||
// create dest sink
|
||||
List<Long> partitionIds = getAllPartitionIds();
|
||||
OlapTableSink olapTableSink = new OlapTableSink(destTable, tupleDesc, partitionIds,
|
||||
Config.enable_single_replica_load);
|
||||
OlapTableSink olapTableSink;
|
||||
if (taskInfo instanceof StreamLoadTask && ((StreamLoadTask) taskInfo).isGroupCommit()) {
|
||||
olapTableSink = new GroupCommitBlockSink(destTable, tupleDesc, partitionIds,
|
||||
Config.enable_single_replica_load);
|
||||
} else {
|
||||
olapTableSink = new OlapTableSink(destTable, tupleDesc, partitionIds, Config.enable_single_replica_load);
|
||||
}
|
||||
olapTableSink.init(loadId, taskInfo.getTxnId(), db.getId(), timeout,
|
||||
taskInfo.getSendBatchParallelism(), taskInfo.isLoadToSingleTablet(), taskInfo.isStrictMode());
|
||||
olapTableSink.setPartialUpdateInputColumns(isPartialUpdate, partialUpdateInputColumns);
|
||||
|
||||
@ -2090,8 +2090,11 @@ public class FrontendServiceImpl implements FrontendService.Iface {
|
||||
NativeInsertStmt parsedStmt = (NativeInsertStmt) SqlParserUtils.getFirstStmt(parser);
|
||||
parsedStmt.setOrigStmt(new OriginStatement(originStmt, 0));
|
||||
parsedStmt.setUserInfo(ctx.getCurrentUserIdentity());
|
||||
if (request.isGroupCommit() && parsedStmt.getLabel() != null) {
|
||||
throw new AnalysisException("label and group_commit can't be set at the same time");
|
||||
if (request.isGroupCommit()) {
|
||||
if (parsedStmt.getLabel() != null) {
|
||||
throw new AnalysisException("label and group_commit can't be set at the same time");
|
||||
}
|
||||
parsedStmt.isGroupCommitStreamLoadSql = true;
|
||||
}
|
||||
StmtExecutor executor = new StmtExecutor(ctx, parsedStmt);
|
||||
ctx.setExecutor(executor);
|
||||
@ -2235,13 +2238,15 @@ public class FrontendServiceImpl implements FrontendService.Iface {
|
||||
StreamLoadPlanner planner = new StreamLoadPlanner(db, table, streamLoadTask);
|
||||
TPipelineFragmentParams plan = planner.planForPipeline(streamLoadTask.getId(),
|
||||
multiTableFragmentInstanceIdIndex);
|
||||
// add table indexes to transaction state
|
||||
TransactionState txnState = Env.getCurrentGlobalTransactionMgr()
|
||||
.getTransactionState(db.getId(), request.getTxnId());
|
||||
if (txnState == null) {
|
||||
throw new UserException("txn does not exist: " + request.getTxnId());
|
||||
if (!request.isGroupCommit()) {
|
||||
// add table indexes to transaction state
|
||||
TransactionState txnState = Env.getCurrentGlobalTransactionMgr()
|
||||
.getTransactionState(db.getId(), request.getTxnId());
|
||||
if (txnState == null) {
|
||||
throw new UserException("txn does not exist: " + request.getTxnId());
|
||||
}
|
||||
txnState.addTableIndexes(table);
|
||||
}
|
||||
txnState.addTableIndexes(table);
|
||||
return plan;
|
||||
} finally {
|
||||
table.readUnlock();
|
||||
|
||||
@ -69,10 +69,10 @@ public class GroupCommitTableValuedFunction extends ExternalFileTableValuedFunct
|
||||
Column deleteSignColumn = ((OlapTable) table).getDeleteSignColumn();
|
||||
List<Column> tableColumns = table.getBaseSchema(false);
|
||||
for (int i = 1; i <= tableColumns.size(); i++) {
|
||||
fileColumns.add(new Column("c" + i, tableColumns.get(i - 1).getDataType(), true));
|
||||
fileColumns.add(new Column("c" + i, tableColumns.get(i - 1).getType(), true));
|
||||
}
|
||||
if (deleteSignColumn != null) {
|
||||
fileColumns.add(new Column("c" + (tableColumns.size() + 1), deleteSignColumn.getDataType(), true));
|
||||
fileColumns.add(new Column("c" + (tableColumns.size() + 1), deleteSignColumn.getType(), true));
|
||||
}
|
||||
return fileColumns;
|
||||
}
|
||||
|
||||
@ -94,6 +94,8 @@ public class StreamLoadTask implements LoadTaskInfo {
|
||||
|
||||
private byte escape = 0;
|
||||
|
||||
private boolean groupCommit = false;
|
||||
|
||||
public StreamLoadTask(TUniqueId id, long txnId, TFileType fileType, TFileFormatType formatType,
|
||||
TFileCompressType compressType) {
|
||||
this.id = id;
|
||||
@ -312,6 +314,7 @@ public class StreamLoadTask implements LoadTaskInfo {
|
||||
request.getFileType(), request.getFormatType(),
|
||||
request.getCompressType());
|
||||
streamLoadTask.setOptionalFromTSLPutRequest(request);
|
||||
streamLoadTask.setGroupCommit(request.isGroupCommit());
|
||||
if (request.isSetFileSize()) {
|
||||
streamLoadTask.fileSize = request.getFileSize();
|
||||
}
|
||||
@ -519,5 +522,13 @@ public class StreamLoadTask implements LoadTaskInfo {
|
||||
public double getMaxFilterRatio() {
|
||||
return maxFilterRatio;
|
||||
}
|
||||
|
||||
public void setGroupCommit(boolean groupCommit) {
|
||||
this.groupCommit = groupCommit;
|
||||
}
|
||||
|
||||
public boolean isGroupCommit() {
|
||||
return groupCommit;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user