[opt](nereids) if column stats are unknown, 10-20 table-join optimization use cascading instead of dphyp (#29902)
* if column stats are unknown, do not use dphyp tpcds query64 is optimized in case of no stats sf500, query64 improved from 15sec to 7sec on hdfs, and from 4sec to 3.85sec on olaptable
This commit is contained in:
@ -74,6 +74,13 @@ public class StatementContext {
|
||||
private boolean isDpHyp = false;
|
||||
private boolean isOtherJoinReorder = false;
|
||||
|
||||
// hasUnknownColStats true if any column stats in the tables used by this sql is unknown
|
||||
// the algorithm to derive plan when column stats are unknown is implemented in cascading framework, not in dphyper.
|
||||
// And hence, when column stats are unknown, even if the tables used by a sql is more than
|
||||
// MAX_TABLE_COUNT_USE_CASCADES_JOIN_REORDER, join reorder should choose cascading framework.
|
||||
// Thus hasUnknownColStats has higher priority than isDpHyp
|
||||
private boolean hasUnknownColStats = false;
|
||||
|
||||
private final IdGenerator<ExprId> exprIdGenerator = ExprId.createGenerator();
|
||||
private final IdGenerator<ObjectId> objectIdGenerator = ObjectId.createGenerator();
|
||||
private final IdGenerator<RelationId> relationIdGenerator = RelationId.createGenerator();
|
||||
@ -261,4 +268,12 @@ public class StatementContext {
|
||||
public void addJoinFilters(Collection<Expression> newJoinFilters) {
|
||||
this.joinFilters.addAll(newJoinFilters);
|
||||
}
|
||||
|
||||
public boolean isHasUnknownColStats() {
|
||||
return hasUnknownColStats;
|
||||
}
|
||||
|
||||
public void setHasUnknownColStats(boolean hasUnknownColStats) {
|
||||
this.hasUnknownColStats = hasUnknownColStats;
|
||||
}
|
||||
}
|
||||
|
||||
@ -22,6 +22,7 @@ import org.apache.doris.nereids.jobs.cascades.DeriveStatsJob;
|
||||
import org.apache.doris.nereids.jobs.cascades.OptimizeGroupJob;
|
||||
import org.apache.doris.nereids.jobs.joinorder.JoinOrderJob;
|
||||
import org.apache.doris.nereids.memo.Group;
|
||||
import org.apache.doris.qe.ConnectContext;
|
||||
import org.apache.doris.qe.SessionVariable;
|
||||
|
||||
import java.util.Objects;
|
||||
@ -49,11 +50,22 @@ public class Optimizer {
|
||||
cascadesContext.pushJob(new DeriveStatsJob(cascadesContext.getMemo().getRoot().getLogicalExpression(),
|
||||
cascadesContext.getCurrentJobContext()));
|
||||
cascadesContext.getJobScheduler().executeJobPool(cascadesContext);
|
||||
boolean optimizeWithUnknownColStats = false;
|
||||
if (ConnectContext.get() != null && ConnectContext.get().getStatementContext() != null) {
|
||||
if (ConnectContext.get().getStatementContext().isHasUnknownColStats()) {
|
||||
optimizeWithUnknownColStats = true;
|
||||
}
|
||||
}
|
||||
// DPHyp optimize
|
||||
int maxTableCount = getSessionVariable().getMaxTableCountUseCascadesJoinReorder();
|
||||
if (optimizeWithUnknownColStats) {
|
||||
// if column stats are unknown, 10~20 table-join is optimized by cascading framework
|
||||
maxTableCount = 2 * maxTableCount;
|
||||
}
|
||||
int maxJoinCount = cascadesContext.getMemo().countMaxContinuousJoin();
|
||||
cascadesContext.getStatementContext().setMaxContinuousJoin(maxJoinCount);
|
||||
boolean isDpHyp = getSessionVariable().enableDPHypOptimizer
|
||||
|| maxJoinCount > getSessionVariable().getMaxTableCountUseCascadesJoinReorder();
|
||||
|| maxJoinCount > maxTableCount;
|
||||
cascadesContext.getStatementContext().setDpHyp(isDpHyp);
|
||||
cascadesContext.getStatementContext().setOtherJoinReorder(false);
|
||||
if (!getSessionVariable().isDisableJoinReorder() && isDpHyp
|
||||
|
||||
@ -623,6 +623,7 @@ public class StatsCalculator extends DefaultPlanVisitor<Statistics, Void> {
|
||||
Map<Expression, ColumnStatistic> columnStatisticMap = new HashMap<>();
|
||||
TableIf table = catalogRelation.getTable();
|
||||
double rowCount = catalogRelation.getTable().estimatedRowCount();
|
||||
boolean hasUnknownCol = false;
|
||||
for (SlotReference slotReference : slotSet) {
|
||||
String colName = slotReference.getName();
|
||||
boolean shouldIgnoreThisCol = StatisticConstants.shouldIgnoreCol(table, slotReference.getColumn().get());
|
||||
@ -644,13 +645,19 @@ public class StatsCalculator extends DefaultPlanVisitor<Statistics, Void> {
|
||||
}
|
||||
if (!cache.isUnKnown) {
|
||||
rowCount = Math.max(rowCount, cache.count);
|
||||
} else {
|
||||
hasUnknownCol = true;
|
||||
}
|
||||
if (ConnectContext.get() != null && ConnectContext.get().getSessionVariable().enableStats) {
|
||||
columnStatisticMap.put(slotReference, cache);
|
||||
} else {
|
||||
columnStatisticMap.put(slotReference, ColumnStatistic.UNKNOWN);
|
||||
hasUnknownCol = true;
|
||||
}
|
||||
}
|
||||
if (hasUnknownCol && ConnectContext.get() != null && ConnectContext.get().getStatementContext() != null) {
|
||||
ConnectContext.get().getStatementContext().setHasUnknownColStats(true);
|
||||
}
|
||||
Statistics stats = new Statistics(rowCount, columnStatisticMap);
|
||||
stats = normalizeCatalogRelationColumnStatsRowCount(stats);
|
||||
return stats;
|
||||
|
||||
Reference in New Issue
Block a user