[improve](group commit) Group commit support max filter ratio when rows is less than value in config (#28139)

This commit is contained in:
meiyi
2023-12-12 16:33:36 +08:00
committed by GitHub
parent d25cbdd4dc
commit 45b2dbab6a
21 changed files with 477 additions and 38 deletions

View File

@ -970,7 +970,7 @@ public class NativeInsertStmt extends InsertStmt {
if (isGroupCommitStreamLoadSql) {
sink = new GroupCommitBlockSink((OlapTable) targetTable, olapTuple,
targetPartitionIds, analyzer.getContext().getSessionVariable().isEnableSingleReplicaInsert(),
ConnectContext.get().getSessionVariable().getGroupCommit());
ConnectContext.get().getSessionVariable().getGroupCommit(), 0);
} else {
sink = new OlapTableSink((OlapTable) targetTable, olapTuple, targetPartitionIds,
analyzer.getContext().getSessionVariable().isEnableSingleReplicaInsert());

View File

@ -29,11 +29,13 @@ import java.util.List;
public class GroupCommitBlockSink extends OlapTableSink {
private String groupCommit;
private double maxFilterRatio;
public GroupCommitBlockSink(OlapTable dstTable, TupleDescriptor tupleDescriptor, List<Long> partitionIds,
boolean singleReplicaLoad, String groupCommit) {
boolean singleReplicaLoad, String groupCommit, double maxFilterRatio) {
super(dstTable, tupleDescriptor, partitionIds, singleReplicaLoad);
this.groupCommit = groupCommit;
this.maxFilterRatio = maxFilterRatio;
}
protected TDataSinkType getDataSinkType() {
@ -45,6 +47,7 @@ public class GroupCommitBlockSink extends OlapTableSink {
TGroupCommitMode groupCommitMode = parseGroupCommit(groupCommit);
Preconditions.checkNotNull(groupCommitMode, "Group commit is: " + groupCommit);
tDataSink.olap_table_sink.setGroupCommitMode(groupCommitMode);
tDataSink.olap_table_sink.setMaxFilterRatio(maxFilterRatio);
return tDataSink;
}

View File

@ -98,11 +98,12 @@ public class GroupCommitPlanner {
}
streamLoadPutRequest
.setDb(db.getFullName())
.setMaxFilterRatio(1)
.setMaxFilterRatio(ConnectContext.get().getSessionVariable().enableInsertStrict ? 0 : 1)
.setTbl(table.getName())
.setFileType(TFileType.FILE_STREAM).setFormatType(TFileFormatType.FORMAT_CSV_PLAIN)
.setMergeType(TMergeType.APPEND).setThriftRpcTimeoutMs(5000).setLoadId(queryId)
.setTrimDoubleQuotes(true).setGroupCommitMode(groupCommit);
.setTrimDoubleQuotes(true).setGroupCommitMode(groupCommit)
.setStrictMode(ConnectContext.get().getSessionVariable().enableInsertStrict);
StreamLoadTask streamLoadTask = StreamLoadTask.fromTStreamLoadPutRequest(streamLoadPutRequest);
StreamLoadPlanner planner = new StreamLoadPlanner(db, table, streamLoadTask);
// Will using load id as query id in fragment

View File

@ -261,7 +261,8 @@ public class StreamLoadPlanner {
OlapTableSink olapTableSink;
if (taskInfo instanceof StreamLoadTask && ((StreamLoadTask) taskInfo).getGroupCommit() != null) {
olapTableSink = new GroupCommitBlockSink(destTable, tupleDesc, partitionIds,
Config.enable_single_replica_load, ((StreamLoadTask) taskInfo).getGroupCommit());
Config.enable_single_replica_load, ((StreamLoadTask) taskInfo).getGroupCommit(),
taskInfo.getMaxFilterRatio());
} else {
olapTableSink = new OlapTableSink(destTable, tupleDesc, partitionIds, Config.enable_single_replica_load);
}
@ -481,7 +482,8 @@ public class StreamLoadPlanner {
OlapTableSink olapTableSink;
if (taskInfo instanceof StreamLoadTask && ((StreamLoadTask) taskInfo).getGroupCommit() != null) {
olapTableSink = new GroupCommitBlockSink(destTable, tupleDesc, partitionIds,
Config.enable_single_replica_load, ((StreamLoadTask) taskInfo).getGroupCommit());
Config.enable_single_replica_load, ((StreamLoadTask) taskInfo).getGroupCommit(),
taskInfo.getMaxFilterRatio());
} else {
olapTableSink = new OlapTableSink(destTable, tupleDesc, partitionIds, Config.enable_single_replica_load);
}

View File

@ -176,6 +176,7 @@ import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import com.google.common.collect.Sets;
import com.google.protobuf.ByteString;
import com.google.protobuf.ProtocolStringList;
import lombok.Setter;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
@ -1891,7 +1892,9 @@ public class StmtExecutor {
List<InternalService.PDataRow> rows = groupCommitPlanner.getRows(nativeInsertStmt);
PGroupCommitInsertResponse response = groupCommitPlanner.executeGroupCommitInsert(context, rows);
TStatusCode code = TStatusCode.findByValue(response.getStatus().getStatusCode());
if (code == TStatusCode.DATA_QUALITY_ERROR) {
ProtocolStringList errorMsgsList = response.getStatus().getErrorMsgsList();
if (code == TStatusCode.DATA_QUALITY_ERROR && !errorMsgsList.isEmpty() && errorMsgsList.get(0)
.contains("schema version not match")) {
LOG.info("group commit insert failed. stmt: {}, backend id: {}, status: {}, "
+ "schema version: {}, retry: {}", insertStmt.getOrigStmt().originStmt,
groupCommitPlanner.getBackend().getId(),