[feature](statistics) Statistics derivation.Step 2:OtherNode implemen… (#9458)

closed #9644

Second step of statistics derivation: implementation of nodes other than scan_node.
The statistical information derivation interface of all nodes is uniformly placed in DeriveFactory.java.
Added one-sided to verify the derivation is correct.

Statistics derivation for each node is placed in its own *StatsDerive.java
detailed design: https://docs.google.com/document/d/1u1L6XhyzKShoyYRwFQ6kE1rnvY2iFwauwg289au5Qq0/edit
This commit is contained in:
zhengshiJ
2022-06-07 21:10:28 +08:00
committed by GitHub
parent f2aa5f32b8
commit e97d835ba7
38 changed files with 1152 additions and 148 deletions

View File

@ -28,6 +28,7 @@ import org.apache.doris.analysis.SlotId;
import org.apache.doris.common.NotImplementedException;
import org.apache.doris.common.UserException;
import org.apache.doris.common.util.VectorizedUtil;
import org.apache.doris.statistics.StatsRecursiveDerive;
import org.apache.doris.thrift.TAggregationNode;
import org.apache.doris.thrift.TExplainLevel;
import org.apache.doris.thrift.TExpr;
@ -49,7 +50,7 @@ import java.util.Set;
* Aggregation computation.
*/
public class AggregationNode extends PlanNode {
private final static Logger LOG = LogManager.getLogger(AggregationNode.class);
private static final Logger LOG = LogManager.getLogger(AggregationNode.class);
private final AggregateInfo aggInfo;
// Set to true if this aggregation node needs to run the Finalize step. This
@ -64,7 +65,7 @@ public class AggregationNode extends PlanNode {
* isIntermediate is true if it is a slave node in a 2-part agg plan.
*/
public AggregationNode(PlanNodeId id, PlanNode input, AggregateInfo aggInfo) {
super(id, aggInfo.getOutputTupleId().asList(), "AGGREGATE");
super(id, aggInfo.getOutputTupleId().asList(), "AGGREGATE", NodeType.AGG_NODE);
this.aggInfo = aggInfo;
this.children.add(input);
this.needsFinalize = true;
@ -75,7 +76,7 @@ public class AggregationNode extends PlanNode {
* Copy c'tor used in clone().
*/
private AggregationNode(PlanNodeId id, AggregationNode src) {
super(id, src, "AGGREGATE");
super(id, src, "AGGREGATE", NodeType.AGG_NODE);
aggInfo = src.aggInfo;
needsFinalize = src.needsFinalize;
}
@ -169,46 +170,14 @@ public class AggregationNode extends PlanNode {
}
@Override
public void computeStats(Analyzer analyzer) {
public void computeStats(Analyzer analyzer) throws UserException {
super.computeStats(analyzer);
if (!analyzer.safeIsEnableJoinReorderBasedCost()) {
return;
}
List<Expr> groupingExprs = aggInfo.getGroupingExprs();
cardinality = 1;
// cardinality: product of # of distinct values produced by grouping exprs
for (Expr groupingExpr : groupingExprs) {
long numDistinct = groupingExpr.getNumDistinctValues();
LOG.debug("grouping expr: " + groupingExpr.toSql() + " #distinct=" + Long.toString(
numDistinct));
if (numDistinct == -1) {
cardinality = -1;
break;
}
// This is prone to overflow, because we keep multiplying cardinalities,
// even if the grouping exprs are functionally dependent (example:
// group by the primary key of a table plus a number of other columns from that
// same table)
// TODO: try to recognize functional dependencies
// TODO: as a shortcut, instead of recognizing functional dependencies,
// limit the contribution of a single table to the number of rows
// of that table (so that when we're grouping by the primary key col plus
// some others, the estimate doesn't overshoot dramatically)
cardinality *= numDistinct;
}
if (cardinality > 0) {
LOG.debug("sel=" + Double.toString(computeSelectivity()));
applyConjunctsSelectivity();
}
// if we ended up with an overflow, the estimate is certain to be wrong
if (cardinality < 0) {
cardinality = -1;
}
capCardinalityAtLimit();
if (LOG.isDebugEnabled()) {
LOG.debug("stats Agg: cardinality={}", cardinality);
}
StatsRecursiveDerive.getStatsRecursiveDerive().statsRecursiveDerive(this);
cardinality = statsDeriveResult.getRowCount();
}
@Override

View File

@ -27,6 +27,7 @@ import org.apache.doris.analysis.ExprSubstitutionMap;
import org.apache.doris.analysis.OrderByElement;
import org.apache.doris.analysis.TupleDescriptor;
import org.apache.doris.common.UserException;
import org.apache.doris.statistics.StatsRecursiveDerive;
import org.apache.doris.thrift.TAnalyticNode;
import org.apache.doris.thrift.TExplainLevel;
import org.apache.doris.thrift.TPlanNode;
@ -80,7 +81,7 @@ public class AnalyticEvalNode extends PlanNode {
AnalyticWindow analyticWindow, TupleDescriptor intermediateTupleDesc,
TupleDescriptor outputTupleDesc, ExprSubstitutionMap logicalToPhysicalSmap,
Expr partitionByEq, Expr orderByEq, TupleDescriptor bufferedTupleDesc) {
super(id, input.getTupleIds(), "ANALYTIC");
super(id, input.getTupleIds(), "ANALYTIC", NodeType.ANALYTIC_EVAL_NODE);
Preconditions.checkState(!tupleIds.contains(outputTupleDesc.getId()));
// we're materializing the input row augmented with the analytic output tuple
tupleIds.add(outputTupleDesc.getId());
@ -135,17 +136,13 @@ public class AnalyticEvalNode extends PlanNode {
}
@Override
protected void computeStats(Analyzer analyzer) {
protected void computeStats(Analyzer analyzer) throws UserException {
super.computeStats(analyzer);
if (!analyzer.safeIsEnableJoinReorderBasedCost()) {
return;
}
cardinality = cardinality == -1 ? getChild(0).cardinality : cardinality;
applyConjunctsSelectivity();
capCardinalityAtLimit();
if (LOG.isDebugEnabled()) {
LOG.debug("stats AnalyticEval: cardinality={}", cardinality);
}
StatsRecursiveDerive.getStatsRecursiveDerive().statsRecursiveDerive(this);
cardinality = statsDeriveResult.getRowCount();
}
@Override

View File

@ -20,6 +20,7 @@ package org.apache.doris.planner;
import org.apache.doris.analysis.Analyzer;
import org.apache.doris.analysis.AssertNumRowsElement;
import org.apache.doris.common.UserException;
import org.apache.doris.statistics.StatsRecursiveDerive;
import org.apache.doris.thrift.TAssertNumRowsNode;
import org.apache.doris.thrift.TExplainLevel;
import org.apache.doris.thrift.TPlanNode;
@ -42,7 +43,7 @@ public class AssertNumRowsNode extends PlanNode {
private AssertNumRowsElement.Assertion assertion;
public AssertNumRowsNode(PlanNodeId id, PlanNode input, AssertNumRowsElement assertNumRowsElement) {
super(id, "ASSERT NUMBER OF ROWS");
super(id, "ASSERT NUMBER OF ROWS", NodeType.ASSERT_NUM_ROWS_NODE);
this.desiredNumOfRows = assertNumRowsElement.getDesiredNumOfRows();
this.subqueryString = assertNumRowsElement.getSubqueryString();
this.assertion = assertNumRowsElement.getAssertion();
@ -57,7 +58,8 @@ public class AssertNumRowsNode extends PlanNode {
super.init(analyzer);
super.computeStats(analyzer);
if (analyzer.safeIsEnableJoinReorderBasedCost()) {
cardinality = 1;
StatsRecursiveDerive.getStatsRecursiveDerive().statsRecursiveDerive(this);
cardinality = statsDeriveResult.getRowCount();
}
if (LOG.isDebugEnabled()) {
LOG.debug("stats AssertNumRows: cardinality={}", cardinality);

View File

@ -19,8 +19,8 @@ package org.apache.doris.planner;
import org.apache.doris.analysis.Analyzer;
import org.apache.doris.analysis.TableRef;
import org.apache.doris.common.CheckedMath;
import org.apache.doris.common.UserException;
import org.apache.doris.statistics.StatsRecursiveDerive;
import org.apache.doris.thrift.TExplainLevel;
import org.apache.doris.thrift.TPlanNode;
import org.apache.doris.thrift.TPlanNodeType;
@ -41,7 +41,7 @@ public class CrossJoinNode extends PlanNode {
private final TableRef innerRef;
public CrossJoinNode(PlanNodeId id, PlanNode outer, PlanNode inner, TableRef innerRef) {
super(id, "CROSS JOIN");
super(id, "CROSS JOIN", NodeType.CROSS_JOIN_NODE);
this.innerRef = innerRef;
tupleIds.addAll(outer.getTupleIds());
tupleIds.addAll(inner.getTupleIds());
@ -68,21 +68,13 @@ public class CrossJoinNode extends PlanNode {
}
@Override
public void computeStats(Analyzer analyzer) {
public void computeStats(Analyzer analyzer) throws UserException {
super.computeStats(analyzer);
if (!analyzer.safeIsEnableJoinReorderBasedCost()) {
return;
}
if (getChild(0).cardinality == -1 || getChild(1).cardinality == -1) {
cardinality = -1;
} else {
cardinality = CheckedMath.checkedMultiply(getChild(0).cardinality, getChild(1).cardinality);
applyConjunctsSelectivity();
capCardinalityAtLimit();
}
if (LOG.isDebugEnabled()) {
LOG.debug("stats CrossJoin: cardinality={}", Long.toString(cardinality));
}
StatsRecursiveDerive.getStatsRecursiveDerive().statsRecursiveDerive(this);
cardinality = statsDeriveResult.getRowCount();
}
@Override

View File

@ -19,6 +19,8 @@ package org.apache.doris.planner;
import org.apache.doris.analysis.Analyzer;
import org.apache.doris.analysis.TupleId;
import org.apache.doris.common.UserException;
import org.apache.doris.statistics.StatsRecursiveDerive;
import org.apache.doris.thrift.TPlanNode;
import org.apache.doris.thrift.TPlanNodeType;
@ -35,17 +37,18 @@ import java.util.ArrayList;
* construct a valid row empty batch.
*/
public class EmptySetNode extends PlanNode {
private final static Logger LOG = LogManager.getLogger(EmptySetNode.class);
private static final Logger LOG = LogManager.getLogger(EmptySetNode.class);
public EmptySetNode(PlanNodeId id, ArrayList<TupleId> tupleIds) {
super(id, tupleIds, "EMPTYSET");
super(id, tupleIds, "EMPTYSET", NodeType.EMPTY_SET_NODE);
Preconditions.checkArgument(tupleIds.size() > 0);
}
@Override
public void computeStats(Analyzer analyzer) {
public void computeStats(Analyzer analyzer) throws UserException {
StatsRecursiveDerive.getStatsRecursiveDerive().statsRecursiveDerive(this);
cardinality = statsDeriveResult.getRowCount();
avgRowSize = 0;
cardinality = 0;
numNodes = 1;
if (LOG.isDebugEnabled()) {
LOG.debug("stats EmptySet:" + id + ", cardinality: " + cardinality);
@ -53,7 +56,7 @@ public class EmptySetNode extends PlanNode {
}
@Override
public void init(Analyzer analyzer) {
public void init(Analyzer analyzer) throws UserException {
Preconditions.checkState(conjuncts.isEmpty());
// If the physical output tuple produced by an AnalyticEvalNode wasn't created
// the logical output tuple is returned by getMaterializedTupleIds(). It needs

View File

@ -31,7 +31,7 @@ public class ExceptNode extends SetOperationNode {
protected ExceptNode(PlanNodeId id, TupleId tupleId,
List<Expr> setOpResultExprs, boolean isInSubplan) {
super(id, tupleId, "EXCEPT", setOpResultExprs, isInSubplan);
super(id, tupleId, "EXCEPT", setOpResultExprs, isInSubplan, NodeType.EXCEPT_NODE);
}
@Override

View File

@ -26,6 +26,7 @@ import org.apache.doris.analysis.SortInfo;
import org.apache.doris.analysis.TupleId;
import org.apache.doris.common.UserException;
import org.apache.doris.common.util.VectorizedUtil;
import org.apache.doris.statistics.StatsRecursiveDerive;
import org.apache.doris.thrift.TExchangeNode;
import org.apache.doris.thrift.TPlanNode;
import org.apache.doris.thrift.TPlanNodeType;
@ -70,7 +71,7 @@ public class ExchangeNode extends PlanNode {
* need to compute the cardinality here.
*/
public ExchangeNode(PlanNodeId id, PlanNode inputNode, boolean copyConjuncts) {
super(id, inputNode, EXCHANGE_NODE);
super(id, inputNode, EXCHANGE_NODE, NodeType.EXCHANGE_NODE);
offset = 0;
children.add(inputNode);
if (!copyConjuncts) {
@ -109,10 +110,10 @@ public class ExchangeNode extends PlanNode {
}
@Override
protected void computeStats(Analyzer analyzer) {
protected void computeStats(Analyzer analyzer) throws UserException {
Preconditions.checkState(children.size() == 1);
cardinality = children.get(0).cardinality;
capCardinalityAtLimit();
StatsRecursiveDerive.getStatsRecursiveDerive().statsRecursiveDerive(this);
cardinality = statsDeriveResult.getRowCount();
if (LOG.isDebugEnabled()) {
LOG.debug("stats Exchange:" + id + ", cardinality: " + cardinality);
}

View File

@ -38,6 +38,7 @@ import org.apache.doris.common.NotImplementedException;
import org.apache.doris.common.Pair;
import org.apache.doris.common.UserException;
import org.apache.doris.common.util.VectorizedUtil;
import org.apache.doris.statistics.StatsRecursiveDerive;
import org.apache.doris.thrift.TEqJoinCondition;
import org.apache.doris.thrift.TExplainLevel;
import org.apache.doris.thrift.THashJoinNode;
@ -85,7 +86,7 @@ public class HashJoinNode extends PlanNode {
public HashJoinNode(PlanNodeId id, PlanNode outer, PlanNode inner, TableRef innerRef,
List<Expr> eqJoinConjuncts, List<Expr> otherJoinConjuncts) {
super(id, "HASH JOIN");
super(id, "HASH JOIN", NodeType.HASH_JOIN_NODE);
Preconditions.checkArgument(eqJoinConjuncts != null && !eqJoinConjuncts.isEmpty());
Preconditions.checkArgument(otherJoinConjuncts != null);
tblRefIds.addAll(outer.getTblRefIds());
@ -452,20 +453,16 @@ public class HashJoinNode extends PlanNode {
@Override
public void computeStats(Analyzer analyzer) {
public void computeStats(Analyzer analyzer) throws UserException {
super.computeStats(analyzer);
if (!analyzer.safeIsEnableJoinReorderBasedCost()) {
return;
}
if (joinOp.isSemiAntiJoin()) {
cardinality = getSemiJoinCardinality();
} else if (joinOp.isInnerJoin() || joinOp.isOuterJoin()) {
cardinality = getJoinCardinality();
} else {
Preconditions.checkState(false, "joinOp is not supported");
}
capCardinalityAtLimit();
StatsRecursiveDerive.getStatsRecursiveDerive().statsRecursiveDerive(this);
cardinality = statsDeriveResult.getRowCount();
if (LOG.isDebugEnabled()) {
LOG.debug("stats HashJoin:" + id + ", cardinality: " + cardinality);
}

View File

@ -46,7 +46,7 @@ public class IcebergScanNode extends BrokerScanNode {
public IcebergScanNode(PlanNodeId id, TupleDescriptor desc, String planNodeName,
List<List<TBrokerFileStatus>> fileStatusesList, int filesAdded) {
super(id, desc, planNodeName, fileStatusesList, filesAdded, NodeType.ICEBREG_SCAN_NODE);
super(id, desc, planNodeName, fileStatusesList, filesAdded, NodeType.ICEBERG_SCAN_NODE);
icebergTable = (IcebergTable) desc.getTable();
}

View File

@ -26,6 +26,7 @@ import org.apache.doris.analysis.TupleDescriptor;
import org.apache.doris.catalog.Column;
import org.apache.doris.catalog.MysqlTable;
import org.apache.doris.common.UserException;
import org.apache.doris.statistics.StatsRecursiveDerive;
import org.apache.doris.thrift.TExplainLevel;
import org.apache.doris.thrift.TMySQLScanNode;
import org.apache.doris.thrift.TPlanNode;
@ -163,13 +164,12 @@ public class MysqlScanNode extends ScanNode {
}
@Override
public void computeStats(Analyzer analyzer) {
public void computeStats(Analyzer analyzer) throws UserException {
super.computeStats(analyzer);
// even if current node scan has no data,at least on backend will be assigned when the fragment actually execute
numNodes = numNodes <= 0 ? 1 : numNodes;
// this is just to avoid mysql scan node's cardinality being -1. So that we can calculate the join cost
// normally.
// We assume that the data volume of all mysql tables is very small, so set cardinality directly to 1.
cardinality = cardinality == -1 ? 1 : cardinality;
StatsRecursiveDerive.getStatsRecursiveDerive().statsRecursiveDerive(this);
cardinality = statsDeriveResult.getRowCount();
}
}

View File

@ -27,6 +27,7 @@ import org.apache.doris.analysis.TupleDescriptor;
import org.apache.doris.catalog.Column;
import org.apache.doris.catalog.OdbcTable;
import org.apache.doris.common.UserException;
import org.apache.doris.statistics.StatsRecursiveDerive;
import org.apache.doris.thrift.TExplainLevel;
import org.apache.doris.thrift.TOdbcScanNode;
import org.apache.doris.thrift.TOdbcTableType;
@ -215,13 +216,12 @@ public class OdbcScanNode extends ScanNode {
}
@Override
public void computeStats(Analyzer analyzer) {
public void computeStats(Analyzer analyzer) throws UserException {
super.computeStats(analyzer);
// even if current node scan has no data,at least on backend will be assigned when the fragment actually execute
numNodes = numNodes <= 0 ? 1 : numNodes;
// this is just to avoid odbc scan node's cardinality being -1. So that we can calculate the join cost
// normally.
// We assume that the data volume of all odbc tables is very small, so set cardinality directly to 1.
cardinality = cardinality == -1 ? 1 : cardinality;
StatsRecursiveDerive.getStatsRecursiveDerive().statsRecursiveDerive(this);
cardinality = statsDeriveResult.getRowCount();
}
}

View File

@ -349,8 +349,8 @@ public class OlapScanNode extends ScanNode {
* - When Join reorder is turned on, the cardinality must be calculated before the reorder algorithm.
* - So only an inaccurate cardinality can be calculated here.
*/
mockRowCountInStatistic();
if (analyzer.safeIsEnableJoinReorderBasedCost()) {
mockRowCountInStatistic();
computeInaccurateCardinality();
}
}
@ -397,7 +397,7 @@ public class OlapScanNode extends ScanNode {
}
@Override
public void computeStats(Analyzer analyzer) {
public void computeStats(Analyzer analyzer) throws UserException {
super.computeStats(analyzer);
if (cardinality > 0) {
avgRowSize = totalBytes / (float) cardinality * COMPRESSION_RATIO;

View File

@ -140,7 +140,7 @@ abstract public class PlanNode extends TreeNode<PlanNode> {
protected NodeType nodeType = NodeType.DEFAULT;
protected StatsDeriveResult statsDeriveResult;
protected PlanNode(PlanNodeId id, ArrayList<TupleId> tupleIds, String planNodeName) {
protected PlanNode(PlanNodeId id, ArrayList<TupleId> tupleIds, String planNodeName, NodeType nodeType) {
this.id = id;
this.limit = -1;
// make a copy, just to be on the safe side
@ -149,9 +149,10 @@ abstract public class PlanNode extends TreeNode<PlanNode> {
this.cardinality = -1;
this.planNodeName = VectorizedUtil.isVectorized() ? "V" + planNodeName : planNodeName;
this.numInstances = 1;
this.nodeType = nodeType;
}
protected PlanNode(PlanNodeId id, String planNodeName) {
protected PlanNode(PlanNodeId id, String planNodeName, NodeType nodeType) {
this.id = id;
this.limit = -1;
this.tupleIds = Lists.newArrayList();
@ -159,12 +160,13 @@ abstract public class PlanNode extends TreeNode<PlanNode> {
this.cardinality = -1;
this.planNodeName = VectorizedUtil.isVectorized() ? "V" + planNodeName : planNodeName;
this.numInstances = 1;
this.nodeType = nodeType;
}
/**
* Copy ctor. Also passes in new id.
*/
protected PlanNode(PlanNodeId id, PlanNode node, String planNodeName) {
protected PlanNode(PlanNodeId id, PlanNode node, String planNodeName, NodeType nodeType) {
this.id = id;
this.limit = node.limit;
this.tupleIds = Lists.newArrayList(node.tupleIds);
@ -181,17 +183,30 @@ abstract public class PlanNode extends TreeNode<PlanNode> {
public enum NodeType {
DEFAULT,
AGG_NODE,
ANALYTIC_EVAL_NODE,
ASSERT_NUM_ROWS_NODE,
BROKER_SCAN_NODE,
CROSS_JOIN_NODE,
EMPTY_SET_NODE,
ES_SCAN_NODE,
EXCEPT_NODE,
EXCHANGE_NODE,
HASH_JOIN_NODE,
HIVE_SCAN_NODE,
MERGE_NODE,
ES_SCAN_NODE,
ICEBREG_SCAN_NODE,
ICEBERG_SCAN_NODE,
INTERSECT_NODE,
LOAD_SCAN_NODE,
MYSQL_SCAN_NODE,
ODBC_SCAN_NODE,
OLAP_SCAN_NODE,
REPEAT_NODE,
SELECT_NODE,
SET_OPERATION_NODE,
SCHEMA_SCAN_NODE,
SORT_NODE,
STREAM_LOAD_SCAN_NODE,
TABLE_FUNCTION_NODE,
UNION_NODE,
}
public String getPlanNodeName() {
@ -579,7 +594,7 @@ abstract public class PlanNode extends TreeNode<PlanNode> {
* from finalize() (to facilitate inserting additional nodes during plan
* partitioning w/o the need to call finalize() recursively on the whole tree again).
*/
protected void computeStats(Analyzer analyzer) {
protected void computeStats(Analyzer analyzer) throws UserException {
avgRowSize = 0.0F;
for (TupleId tid : tupleIds) {
TupleDescriptor desc = analyzer.getTupleDesc(tid);

View File

@ -30,6 +30,7 @@ import org.apache.doris.analysis.TupleDescriptor;
import org.apache.doris.analysis.TupleId;
import org.apache.doris.analysis.VirtualSlotRef;
import org.apache.doris.common.UserException;
import org.apache.doris.statistics.StatsRecursiveDerive;
import org.apache.doris.thrift.TExplainLevel;
import org.apache.doris.thrift.TPlanNode;
import org.apache.doris.thrift.TPlanNodeType;
@ -66,7 +67,7 @@ public class RepeatNode extends PlanNode {
private GroupByClause groupByClause;
protected RepeatNode(PlanNodeId id, PlanNode input, GroupingInfo groupingInfo, GroupByClause groupByClause) {
super(id, input.getTupleIds(), "REPEAT_NODE");
super(id, input.getTupleIds(), "REPEAT_NODE", NodeType.REPEAT_NODE);
this.children.add(input);
this.groupingInfo = groupingInfo;
this.input = input;
@ -77,7 +78,7 @@ public class RepeatNode extends PlanNode {
// only for unittest
protected RepeatNode(PlanNodeId id, PlanNode input, List<Set<SlotId>> repeatSlotIdList,
TupleDescriptor outputTupleDesc, List<List<Long>> groupingList) {
super(id, input.getTupleIds(), "REPEAT_NODE");
super(id, input.getTupleIds(), "REPEAT_NODE", NodeType.REPEAT_NODE);
this.children.add(input);
this.repeatSlotIdList = buildIdSetList(repeatSlotIdList);
this.groupingList = groupingList;
@ -99,10 +100,13 @@ public class RepeatNode extends PlanNode {
}
@Override
public void computeStats(Analyzer analyzer) {
public void computeStats(Analyzer analyzer) throws UserException {
avgRowSize = 0;
numNodes = 1;
cardinality = 0;
StatsRecursiveDerive.getStatsRecursiveDerive().statsRecursiveDerive(this);
cardinality = statsDeriveResult.getRowCount();
if (LOG.isDebugEnabled()) {
LOG.debug("stats Sort: cardinality=" + cardinality);
}

View File

@ -64,8 +64,7 @@ abstract public class ScanNode extends PlanNode {
protected Analyzer analyzer;
public ScanNode(PlanNodeId id, TupleDescriptor desc, String planNodeName, NodeType nodeType) {
super(id, desc.getId().asList(), planNodeName);
super.nodeType = nodeType;
super(id, desc.getId().asList(), planNodeName, nodeType);
this.desc = desc;
}

View File

@ -23,6 +23,7 @@ package org.apache.doris.planner;
import org.apache.doris.analysis.Analyzer;
import org.apache.doris.analysis.Expr;
import org.apache.doris.common.UserException;
import org.apache.doris.statistics.StatsRecursiveDerive;
import org.apache.doris.thrift.TExplainLevel;
import org.apache.doris.thrift.TPlanNode;
import org.apache.doris.thrift.TPlanNodeType;
@ -39,12 +40,12 @@ public class SelectNode extends PlanNode {
private final static Logger LOG = LogManager.getLogger(SelectNode.class);
protected SelectNode(PlanNodeId id, PlanNode child) {
super(id, child.getTupleIds(), "SELECT");
super(id, child.getTupleIds(), "SELECT", NodeType.SELECT_NODE);
addChild(child);
this.nullableTupleIds = child.nullableTupleIds;
}
protected SelectNode(PlanNodeId id, PlanNode child, List<Expr> conjuncts) {
super(id, child.getTupleIds(), "SELECT");
super(id, child.getTupleIds(), "SELECT", NodeType.SELECT_NODE);
addChild(child);
this.tblRefIds = child.tblRefIds;
this.nullableTupleIds = child.nullableTupleIds;
@ -64,14 +65,14 @@ public class SelectNode extends PlanNode {
}
@Override
public void computeStats(Analyzer analyzer) {
public void computeStats(Analyzer analyzer) throws UserException {
super.computeStats(analyzer);
if (!analyzer.safeIsEnableJoinReorderBasedCost()) {
return;
}
cardinality = getChild(0).cardinality;
applyConjunctsSelectivity();
capCardinalityAtLimit();
StatsRecursiveDerive.getStatsRecursiveDerive().statsRecursiveDerive(this);
cardinality = statsDeriveResult.getRowCount();
if (LOG.isDebugEnabled()) {
LOG.debug("stats Select: cardinality={}", this.cardinality);
}

View File

@ -82,16 +82,31 @@ public abstract class SetOperationNode extends PlanNode {
protected final TupleId tupleId;
protected SetOperationNode(PlanNodeId id, TupleId tupleId, String planNodeName) {
super(id, tupleId.asList(), planNodeName);
setOpResultExprs = Lists.newArrayList();
protected SetOperationNode(PlanNodeId id, TupleId tupleId, String planNodeName, NodeType nodeType) {
super(id, tupleId.asList(), planNodeName, nodeType);
this.setOpResultExprs = Lists.newArrayList();
this.tupleId = tupleId;
isInSubplan = false;
this.isInSubplan = false;
}
protected SetOperationNode(PlanNodeId id, TupleId tupleId, String planNodeName,
List<Expr> setOpResultExprs, boolean isInSubplan, NodeType nodeType) {
super(id, tupleId.asList(), planNodeName, nodeType);
this.setOpResultExprs = setOpResultExprs;
this.tupleId = tupleId;
this.isInSubplan = isInSubplan;
}
protected SetOperationNode(PlanNodeId id, TupleId tupleId, String planNodeName) {
super(id, tupleId.asList(), planNodeName, NodeType.SET_OPERATION_NODE);
this.setOpResultExprs = Lists.newArrayList();
this.tupleId = tupleId;
this.isInSubplan = false;
}
protected SetOperationNode(PlanNodeId id, TupleId tupleId, String planNodeName,
List<Expr> setOpResultExprs, boolean isInSubplan) {
super(id, tupleId.asList(), planNodeName);
super(id, tupleId.asList(), planNodeName, NodeType.SET_OPERATION_NODE);
this.setOpResultExprs = setOpResultExprs;
this.tupleId = tupleId;
this.isInSubplan = isInSubplan;
@ -181,7 +196,7 @@ public abstract class SetOperationNode extends PlanNode {
}
@Override
public void computeStats(Analyzer analyzer) {
public void computeStats(Analyzer analyzer) throws UserException {
super.computeStats(analyzer);
if (!analyzer.safeIsEnableJoinReorderBasedCost()) {
return;
@ -314,7 +329,7 @@ public abstract class SetOperationNode extends PlanNode {
* been evaluated during registration to set analyzer.hasEmptyResultSet_.
*/
@Override
public void init(Analyzer analyzer) {
public void init(Analyzer analyzer) throws UserException {
Preconditions.checkState(conjuncts.isEmpty());
computeTupleStatAndMemLayout(analyzer);
computeStats(analyzer);

View File

@ -175,7 +175,7 @@ public class SingleNodePlanner {
* they are never unnested, and therefore the corresponding parent scan should not
* materialize them.
*/
private PlanNode createEmptyNode(PlanNode inputPlan, QueryStmt stmt, Analyzer analyzer) {
private PlanNode createEmptyNode(PlanNode inputPlan, QueryStmt stmt, Analyzer analyzer) throws UserException {
ArrayList<TupleId> tupleIds = Lists.newArrayList();
if (inputPlan != null) {
tupleIds = inputPlan.tupleIds;
@ -1165,7 +1165,7 @@ public class SingleNodePlanner {
* Returns a MergeNode that materializes the exprs of the constant selectStmt. Replaces the resultExprs of the
* selectStmt with SlotRefs into the materialized tuple.
*/
private PlanNode createConstantSelectPlan(SelectStmt selectStmt, Analyzer analyzer) {
private PlanNode createConstantSelectPlan(SelectStmt selectStmt, Analyzer analyzer) throws UserException {
Preconditions.checkState(selectStmt.getTableRefs().isEmpty());
ArrayList<Expr> resultExprs = selectStmt.getResultExprs();
// Create tuple descriptor for materialized tuple.

View File

@ -29,6 +29,7 @@ import org.apache.doris.analysis.SlotRef;
import org.apache.doris.analysis.SortInfo;
import org.apache.doris.common.NotImplementedException;
import org.apache.doris.common.UserException;
import org.apache.doris.statistics.StatsRecursiveDerive;
import org.apache.doris.thrift.TExplainLevel;
import org.apache.doris.thrift.TPlanNode;
import org.apache.doris.thrift.TPlanNodeType;
@ -79,7 +80,7 @@ public class SortNode extends PlanNode {
public SortNode(PlanNodeId id, PlanNode input, SortInfo info, boolean useTopN,
boolean isDefaultLimit, long offset) {
super(id, useTopN ? "TOP-N" : "SORT");
super(id, useTopN ? "TOP-N" : "SORT", NodeType.SORT_NODE);
this.info = info;
this.useTopN = useTopN;
this.isDefaultLimit = isDefaultLimit;
@ -95,7 +96,7 @@ public class SortNode extends PlanNode {
* Clone 'inputSortNode' for distributed Top-N
*/
public SortNode(PlanNodeId id, SortNode inputSortNode, PlanNode child) {
super(id, inputSortNode, inputSortNode.useTopN ? "TOP-N" : "SORT");
super(id, inputSortNode, inputSortNode.useTopN ? "TOP-N" : "SORT", NodeType.SORT_NODE);
this.info = inputSortNode.info;
this.useTopN = inputSortNode.useTopN;
this.isDefaultLimit = inputSortNode.isDefaultLimit;
@ -127,14 +128,15 @@ public class SortNode extends PlanNode {
}
@Override
protected void computeStats(Analyzer analyzer) {
protected void computeStats(Analyzer analyzer) throws UserException {
super.computeStats(analyzer);
if (!analyzer.safeIsEnableJoinReorderBasedCost()) {
return;
}
cardinality = getChild(0).cardinality;
applyConjunctsSelectivity();
capCardinalityAtLimit();
StatsRecursiveDerive.getStatsRecursiveDerive().statsRecursiveDerive(this);
cardinality = statsDeriveResult.getRowCount();
if (LOG.isDebugEnabled()) {
LOG.debug("stats Sort: cardinality=" + cardinality);
}

View File

@ -75,7 +75,7 @@ public class StreamLoadScanNode extends LoadScanNode {
// used to construct for streaming loading
public StreamLoadScanNode(
TUniqueId loadId, PlanNodeId id, TupleDescriptor tupleDesc, Table dstTable, LoadTaskInfo taskInfo) {
super(id, tupleDesc, "StreamLoadScanNode");
super(id, tupleDesc, "StreamLoadScanNode", NodeType.STREAM_LOAD_SCAN_NODE);
this.loadId = loadId;
this.dstTable = dstTable;
this.taskInfo = taskInfo;

View File

@ -26,6 +26,7 @@ import org.apache.doris.analysis.SlotRef;
import org.apache.doris.analysis.TupleId;
import org.apache.doris.common.AnalysisException;
import org.apache.doris.common.UserException;
import org.apache.doris.statistics.StatsRecursiveDerive;
import org.apache.doris.thrift.TExplainLevel;
import org.apache.doris.thrift.TPlanNode;
import org.apache.doris.thrift.TPlanNodeType;
@ -50,7 +51,7 @@ public class TableFunctionNode extends PlanNode {
private List<SlotId> outputSlotIds = Lists.newArrayList();
protected TableFunctionNode(PlanNodeId id, PlanNode inputNode, List<LateralViewRef> lateralViewRefs) {
super(id, "TABLE FUNCTION NODE");
super(id, "TABLE FUNCTION NODE", NodeType.TABLE_FUNCTION_NODE);
tupleIds.addAll(inputNode.getTupleIds());
tblRefIds.addAll(inputNode.getTupleIds());
lateralViewTupleIds = lateralViewRefs.stream().map(e -> e.getDesc().getId())
@ -131,10 +132,11 @@ public class TableFunctionNode extends PlanNode {
}
@Override
protected void computeStats(Analyzer analyzer) {
protected void computeStats(Analyzer analyzer) throws UserException {
super.computeStats(analyzer);
// TODO the cardinality = child cardinality * cardinality of list column
cardinality = children.get(0).cardinality;
StatsRecursiveDerive.getStatsRecursiveDerive().statsRecursiveDerive(this);
cardinality = statsDeriveResult.getRowCount();
}
@Override

View File

@ -29,12 +29,12 @@ import java.util.List;
public class UnionNode extends SetOperationNode {
protected UnionNode(PlanNodeId id, TupleId tupleId) {
super(id, tupleId, "UNION");
super(id, tupleId, "UNION", NodeType.UNION_NODE);
}
protected UnionNode(PlanNodeId id, TupleId tupleId,
List<Expr> setOpResultExprs, boolean isInSubplan) {
super(id, tupleId, "UNION", setOpResultExprs, isInSubplan);
super(id, tupleId, "UNION", setOpResultExprs, isInSubplan, NodeType.UNION_NODE);
}
@Override

View File

@ -918,6 +918,14 @@ public class SessionVariable implements Serializable, Writable {
this.trimTailingSpacesForExternalTableQuery = trimTailingSpacesForExternalTableQuery;
}
public void setEnableJoinReorderBasedCost(boolean enableJoinReorderBasedCost) {
this.enableJoinReorderBasedCost = enableJoinReorderBasedCost;
}
public void setDisableJoinReorder(boolean disableJoinReorder) {
this.disableJoinReorder = disableJoinReorder;
}
// Serialize to thrift object
// used for rest api
public TQueryOptions toThrift() {

View File

@ -0,0 +1,89 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
package org.apache.doris.statistics;
import org.apache.doris.analysis.Expr;
import org.apache.doris.common.UserException;
import org.apache.doris.planner.AggregationNode;
import org.apache.doris.planner.PlanNode;
import com.google.common.base.Preconditions;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import java.util.ArrayList;
import java.util.List;
/**
* Derive AggNode statistics.
*/
public class AggStatsDerive extends BaseStatsDerive {
private static final Logger LOG = LogManager.getLogger(AggStatsDerive.class);
List<Expr> groupingExprs = new ArrayList<>();
@Override
public void init(PlanNode node) throws UserException {
Preconditions.checkState(node instanceof AggregationNode);
super.init(node);
groupingExprs.addAll(((AggregationNode) node).getAggInfo().getGroupingExprs());
}
@Override
public StatsDeriveResult deriveStats() {
return new StatsDeriveResult(deriveRowCount(), deriveColumnToDataSize(), deriveColumnToNdv());
}
@Override
protected long deriveRowCount() {
rowCount = 1;
// rowCount: product of # of distinct values produced by grouping exprs
for (Expr groupingExpr : groupingExprs) {
long numDistinct = groupingExpr.getNumDistinctValues();
LOG.debug("grouping expr: " + groupingExpr.toSql() + " #distinct=" + Long.toString(
numDistinct));
if (numDistinct == -1) {
rowCount = -1;
break;
}
// This is prone to overflow, because we keep multiplying cardinalities,
// even if the grouping exprs are functionally dependent (example:
// group by the primary key of a table plus a number of other columns from that
// same table)
// TODO: try to recognize functional dependencies
// TODO: as a shortcut, instead of recognizing functional dependencies,
// limit the contribution of a single table to the number of rows
// of that table (so that when we're grouping by the primary key col plus
// some others, the estimate doesn't overshoot dramatically)
rowCount *= numDistinct;
}
if (rowCount > 0) {
LOG.debug("sel=" + Double.toString(computeSelectivity()));
applyConjunctsSelectivity();
}
// if we ended up with an overflow, the estimate is certain to be wrong
if (rowCount < 0) {
rowCount = -1;
}
capRowCountAtLimit();
if (LOG.isDebugEnabled()) {
LOG.debug("stats Agg: rowCount={}", rowCount);
}
return rowCount;
}
}

View File

@ -0,0 +1,46 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
package org.apache.doris.statistics;
import com.google.common.base.Preconditions;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
/**
* Derive AnalyticEvalNode statistics.
*/
public class AnalyticEvalStatsDerive extends BaseStatsDerive {
private static final Logger LOG = LogManager.getLogger(AggStatsDerive.class);
@Override
public StatsDeriveResult deriveStats() {
return new StatsDeriveResult(deriveRowCount(), deriveColumnToDataSize(), deriveColumnToNdv());
}
@Override
protected long deriveRowCount() {
Preconditions.checkState(!childrenStatsResult.isEmpty());
rowCount = rowCount == -1 ? childrenStatsResult.get(0).getRowCount() : rowCount;
applyConjunctsSelectivity();
capRowCountAtLimit();
if (LOG.isDebugEnabled()) {
LOG.debug("stats AnalyticEval: rowCount={}", rowCount);
}
return rowCount;
}
}

View File

@ -0,0 +1,34 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
package org.apache.doris.statistics;
/**
* Derive AssertNumRowsNode statistics.
*/
public class AssertNumRowsStatsDerive extends BaseStatsDerive {
@Override
public StatsDeriveResult deriveStats() {
return new StatsDeriveResult(deriveRowCount(), deriveColumnToDataSize(), deriveColumnToNdv());
}
@Override
protected long deriveRowCount() {
rowCount = 1;
return rowCount;
}
}

View File

@ -32,6 +32,9 @@ import java.util.Collections;
import java.util.HashMap;
import java.util.List;
/**
* Base class for statistics derive.
*/
public class BaseStatsDerive {
private static final Logger LOG = LogManager.getLogger(BaseStatsDerive.class);
// estimate of the output rowCount of this node;
@ -49,8 +52,9 @@ public class BaseStatsDerive {
for (PlanNode childNode : node.getChildren()) {
StatsDeriveResult result = childNode.getStatsDeriveResult();
if (result == null) {
throw new UserException("childNode statsDeriveResult is null, childNodeType is " + childNode.getNodeType()
+ "parentNodeType is " + node.getNodeType());
throw new UserException(
"childNode statsDeriveResult is null, childNodeType is " + childNode.getNodeType()
+ "parentNodeType is " + node.getNodeType());
}
childrenStatsResult.add(result);
}
@ -92,14 +96,19 @@ public class BaseStatsDerive {
/**
* Returns the estimated combined selectivity of all conjuncts. Uses heuristics to
* address the following estimation challenges:
* 1. The individual selectivities of conjuncts may be unknown.
* 2. Two selectivities, whether known or unknown, could be correlated. Assuming
* independence can lead to significant underestimation.
*
* <p>
* The first issue is addressed by using a single default selectivity that is
* representative of all conjuncts with unknown selectivities.
* The second issue is addressed by an exponential backoff when multiplying each
* additional selectivity into the final result.
* * 1. The individual selectivities of conjuncts may be unknown.
* * 2. Two selectivities, whether known or unknown, could be correlated. Assuming
* * independence can lead to significant underestimation.
* </p>
*
* <p>
* * The first issue is addressed by using a single default selectivity that is
* * representative of all conjuncts with unknown selectivities.
* * The second issue is addressed by an exponential backoff when multiplying each
* * additional selectivity into the final result.
* </p>
*/
protected double computeCombinedSelectivity(List<Expr> conjuncts) {
// Collect all estimated selectivities.

View File

@ -0,0 +1,53 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
package org.apache.doris.statistics;
import org.apache.doris.common.CheckedMath;
import com.google.common.base.Preconditions;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
/**
* Derive CrossJoinNode statistics.
*/
public class CrossJoinStatsDerive extends BaseStatsDerive {
private static final Logger LOG = LogManager.getLogger(CrossJoinStatsDerive.class);
@Override
public StatsDeriveResult deriveStats() {
return new StatsDeriveResult(deriveRowCount(), deriveColumnToDataSize(), deriveColumnToNdv());
}
@Override
protected long deriveRowCount() {
Preconditions.checkState(childrenStatsResult.size() == 2);
if (childrenStatsResult.get(0).getRowCount() == -1 || childrenStatsResult.get(1).getRowCount() == -1) {
rowCount = -1;
} else {
rowCount = CheckedMath.checkedMultiply(childrenStatsResult.get(0).getRowCount(),
childrenStatsResult.get(1).getRowCount());
applyConjunctsSelectivity();
capRowCountAtLimit();
}
if (LOG.isDebugEnabled()) {
LOG.debug("stats CrossJoin: rowCount={}", Long.toString(rowCount));
}
return rowCount;
}
}

View File

@ -23,11 +23,40 @@ public class DeriveFactory {
public BaseStatsDerive getStatsDerive(PlanNode.NodeType nodeType) {
switch (nodeType) {
case AGG_NODE:
return new AggStatsDerive();
case ANALYTIC_EVAL_NODE:
return new AnalyticEvalStatsDerive();
case ASSERT_NUM_ROWS_NODE:
return new AssertNumRowsStatsDerive();
case CROSS_JOIN_NODE:
return new CrossJoinStatsDerive();
case EMPTY_SET_NODE:
case REPEAT_NODE:
return new EmptySetStatsDerive();
case EXCHANGE_NODE:
return new ExchangeStatsDerive();
case HASH_JOIN_NODE:
return new HashJoinStatsDerive();
case OLAP_SCAN_NODE:
return new OlapScanStatsDerive();
case AGG_NODE:
case HASH_JOIN_NODE:
case MERGE_NODE:
case MYSQL_SCAN_NODE:
case ODBC_SCAN_NODE:
return new MysqlStatsDerive();
case SELECT_NODE:
case SORT_NODE:
return new SelectStatsDerive();
case TABLE_FUNCTION_NODE:
return new TableFunctionStatsDerive();
case BROKER_SCAN_NODE:
case EXCEPT_NODE:
case ES_SCAN_NODE:
case HIVE_SCAN_NODE:
case ICEBERG_SCAN_NODE:
case INTERSECT_NODE:
case SCHEMA_SCAN_NODE:
case STREAM_LOAD_SCAN_NODE:
case UNION_NODE:
case DEFAULT:
default:
return new BaseStatsDerive();

View File

@ -0,0 +1,35 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
package org.apache.doris.statistics;
/**
* Derive EmptySetNode statistics.
*/
public class EmptySetStatsDerive extends BaseStatsDerive {
// Current REPEAT_NODE also uses this derivation method
@Override
public StatsDeriveResult deriveStats() {
return new StatsDeriveResult(deriveRowCount(), deriveColumnToDataSize(), deriveColumnToNdv());
}
@Override
protected long deriveRowCount() {
rowCount = 0;
return rowCount;
}
}

View File

@ -0,0 +1,38 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
package org.apache.doris.statistics;
import com.google.common.base.Preconditions;
/**
* Derive ExchangeNode statistics.
*/
public class ExchangeStatsDerive extends BaseStatsDerive {
@Override
public StatsDeriveResult deriveStats() {
return new StatsDeriveResult(deriveRowCount(), deriveColumnToDataSize(), deriveColumnToNdv());
}
@Override
protected long deriveRowCount() {
Preconditions.checkState(!childrenStatsResult.isEmpty());
rowCount = childrenStatsResult.get(0).getRowCount();
capRowCountAtLimit();
return rowCount;
}
}

View File

@ -0,0 +1,253 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
package org.apache.doris.statistics;
import org.apache.doris.analysis.BinaryPredicate;
import org.apache.doris.analysis.Expr;
import org.apache.doris.analysis.JoinOperator;
import org.apache.doris.analysis.SlotDescriptor;
import org.apache.doris.analysis.SlotRef;
import org.apache.doris.catalog.ColumnStats;
import org.apache.doris.common.CheckedMath;
import org.apache.doris.common.UserException;
import org.apache.doris.planner.HashJoinNode;
import org.apache.doris.planner.PlanNode;
import com.google.common.base.Preconditions;
import com.google.common.collect.Lists;
import java.util.ArrayList;
import java.util.List;
/**
* Derive HashJoinNode statistics.
*/
public class HashJoinStatsDerive extends BaseStatsDerive {
private JoinOperator joinOp;
private List<BinaryPredicate> eqJoinConjuncts = Lists.newArrayList();
@Override
public void init(PlanNode node) throws UserException {
Preconditions.checkState(node instanceof HashJoinNode);
super.init(node);
joinOp = ((HashJoinNode) node).getJoinOp();
eqJoinConjuncts.addAll(((HashJoinNode) node).getEqJoinConjuncts());
}
@Override
public StatsDeriveResult deriveStats() {
return new StatsDeriveResult(deriveRowCount(), deriveColumnToDataSize(), deriveColumnToNdv());
}
@Override
protected long deriveRowCount() {
if (joinOp.isSemiAntiJoin()) {
rowCount = getSemiJoinrowCount();
} else if (joinOp.isInnerJoin() || joinOp.isOuterJoin()) {
rowCount = getJoinrowCount();
} else {
Preconditions.checkState(false, "joinOp is not supported");
}
capRowCountAtLimit();
return rowCount;
}
/**
* Returns the estimated rowCount of a semi join node.
* For a left semi join between child(0) and child(1), we look for equality join
* conditions "L.c = R.d" (with L being from child(0) and R from child(1)) and use as
* the rowCount estimate the minimum of
* |child(0)| * Min(NDV(L.c), NDV(R.d)) / NDV(L.c)
* over all suitable join conditions. The reasoning is that:
* -each row in child(0) is returned at most once
* -the probability of a row in child(0) having a match in R is
* Min(NDV(L.c), NDV(R.d)) / NDV(L.c)
*
*<p>
* For a left anti join we estimate the rowCount as the minimum of:
* |L| * Max(NDV(L.c) - NDV(R.d), NDV(L.c)) / NDV(L.c)
* over all suitable join conditions. The reasoning is that:
* - each row in child(0) is returned at most once
* - if NDV(L.c) > NDV(R.d) then the probability of row in L having a match
* in child(1) is (NDV(L.c) - NDV(R.d)) / NDV(L.c)
* - otherwise, we conservatively use |L| to avoid underestimation
*</p>
*
*<p>
* We analogously estimate the rowCount for right semi/anti joins, and treat the
* null-aware anti join like a regular anti join
*</p>
*/
private long getSemiJoinrowCount() {
Preconditions.checkState(joinOp.isSemiJoin());
// Return -1 if the rowCount of the returned side is unknown.
long rowCount;
if (joinOp == JoinOperator.RIGHT_SEMI_JOIN
|| joinOp == JoinOperator.RIGHT_ANTI_JOIN) {
if (childrenStatsResult.get(1).getRowCount() == -1) {
return -1;
}
rowCount = childrenStatsResult.get(1).getRowCount();
} else {
if (childrenStatsResult.get(0).getRowCount() == -1) {
return -1;
}
rowCount = childrenStatsResult.get(0).getRowCount();
}
double minSelectivity = 1.0;
for (Expr eqJoinPredicate : eqJoinConjuncts) {
long lhsNdv = getNdv(eqJoinPredicate.getChild(0));
lhsNdv = Math.min(lhsNdv, childrenStatsResult.get(0).getRowCount());
long rhsNdv = getNdv(eqJoinPredicate.getChild(1));
rhsNdv = Math.min(rhsNdv, childrenStatsResult.get(1).getRowCount());
// Skip conjuncts with unknown NDV on either side.
if (lhsNdv == -1 || rhsNdv == -1) {
continue;
}
double selectivity = 1.0;
switch (joinOp) {
case LEFT_SEMI_JOIN: {
selectivity = (double) Math.min(lhsNdv, rhsNdv) / (double) (lhsNdv);
break;
}
case RIGHT_SEMI_JOIN: {
selectivity = (double) Math.min(lhsNdv, rhsNdv) / (double) (rhsNdv);
break;
}
case LEFT_ANTI_JOIN:
case NULL_AWARE_LEFT_ANTI_JOIN: {
selectivity = (double) (lhsNdv > rhsNdv ? (lhsNdv - rhsNdv) : lhsNdv) / (double) lhsNdv;
break;
}
case RIGHT_ANTI_JOIN: {
selectivity = (double) (rhsNdv > lhsNdv ? (rhsNdv - lhsNdv) : rhsNdv) / (double) rhsNdv;
break;
}
default:
Preconditions.checkState(false);
}
minSelectivity = Math.min(minSelectivity, selectivity);
}
Preconditions.checkState(rowCount != -1);
return Math.round(rowCount * minSelectivity);
}
/**
* Unwraps the SlotRef in expr and returns the NDVs of it.
* Returns -1 if the NDVs are unknown or if expr is not a SlotRef.
*/
private long getNdv(Expr expr) {
SlotRef slotRef = expr.unwrapSlotRef(false);
if (slotRef == null) {
return -1;
}
SlotDescriptor slotDesc = slotRef.getDesc();
if (slotDesc == null) {
return -1;
}
ColumnStats stats = slotDesc.getStats();
if (!stats.hasNumDistinctValues()) {
return -1;
}
return stats.getNumDistinctValues();
}
private long getJoinrowCount() {
Preconditions.checkState(joinOp.isInnerJoin() || joinOp.isOuterJoin());
Preconditions.checkState(childrenStatsResult.size() == 2);
long lhsCard = childrenStatsResult.get(0).getRowCount();
long rhsCard = childrenStatsResult.get(1).getRowCount();
if (lhsCard == -1 || rhsCard == -1) {
return lhsCard;
}
// Collect join conjuncts that are eligible to participate in rowCount estimation.
List<HashJoinNode.EqJoinConjunctScanSlots> eqJoinConjunctSlots = new ArrayList<>();
for (Expr eqJoinConjunct : eqJoinConjuncts) {
HashJoinNode.EqJoinConjunctScanSlots slots = HashJoinNode.EqJoinConjunctScanSlots.create(eqJoinConjunct);
if (slots != null) {
eqJoinConjunctSlots.add(slots);
}
}
if (eqJoinConjunctSlots.isEmpty()) {
// There are no eligible equi-join conjuncts.
return lhsCard;
}
return getGenericJoinrowCount(eqJoinConjunctSlots, lhsCard, rhsCard);
}
/**
* Returns the estimated join rowCount of a generic N:M inner or outer join based
* on the given list of equi-join conjunct slots and the join input cardinalities.
* The returned result is >= 0.
* The list of join conjuncts must be non-empty and the cardinalities must be >= 0.
*
* <p>
* Generic estimation:
* rowCount = |child(0)| * |child(1)| / max(NDV(L.c), NDV(R.d))
* - case A: NDV(L.c) <= NDV(R.d)
* every row from child(0) joins with |child(1)| / NDV(R.d) rows
* - case B: NDV(L.c) > NDV(R.d)
* every row from child(1) joins with |child(0)| / NDV(L.c) rows
* - we adjust the NDVs from both sides to account for predicates that may
* might have reduce the rowCount and NDVs
*</p>
*/
private long getGenericJoinrowCount(List<HashJoinNode.EqJoinConjunctScanSlots> eqJoinConjunctSlots,
long lhsCard,
long rhsCard) {
Preconditions.checkState(joinOp.isInnerJoin() || joinOp.isOuterJoin());
Preconditions.checkState(!eqJoinConjunctSlots.isEmpty());
Preconditions.checkState(lhsCard >= 0 && rhsCard >= 0);
long result = -1;
for (HashJoinNode.EqJoinConjunctScanSlots slots : eqJoinConjunctSlots) {
// Adjust the NDVs on both sides to account for predicates. Intuitively, the NDVs
// should only decrease. We ignore adjustments that would lead to an increase.
double lhsAdjNdv = slots.lhsNdv();
if (slots.lhsNumRows() > lhsCard) {
lhsAdjNdv *= lhsCard / slots.lhsNumRows();
}
double rhsAdjNdv = slots.rhsNdv();
if (slots.rhsNumRows() > rhsCard) {
rhsAdjNdv *= rhsCard / slots.rhsNumRows();
}
// A lower limit of 1 on the max Adjusted Ndv ensures we don't estimate
// rowCount more than the max possible.
long tmpNdv = Double.doubleToLongBits(Math.max(1, Math.max(lhsAdjNdv, rhsAdjNdv)));
long joinCard = tmpNdv == rhsCard
? lhsCard
: CheckedMath.checkedMultiply(
Math.round((lhsCard / Math.max(1, Math.max(lhsAdjNdv, rhsAdjNdv)))), rhsCard);
if (result == -1) {
result = joinCard;
} else {
result = Math.min(result, joinCard);
}
}
Preconditions.checkState(result >= 0);
return result;
}
}

View File

@ -0,0 +1,39 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
package org.apache.doris.statistics;
/**
* Derive MysqlScanNode statistics.
*/
public class MysqlStatsDerive extends BaseStatsDerive {
// Current ODBC_SCAN_NODE also uses this derivation method
@Override
public StatsDeriveResult deriveStats() {
return new StatsDeriveResult(deriveRowCount(), deriveColumnToDataSize(), deriveColumnToNdv());
}
@Override
protected long deriveRowCount() {
// this is just to avoid mysql scan node's rowCount being -1. So that we can calculate the join cost
// normally.
// We assume that the data volume of all mysql tables is very small, so set rowCount directly to 1.
rowCount = rowCount == -1 ? 1 : rowCount;
return rowCount;
}
}

View File

@ -30,6 +30,9 @@ import com.google.common.base.Preconditions;
import java.util.HashMap;
import java.util.Map;
/**
* Derive OlapScanNode Statistics.
*/
public class OlapScanStatsDerive extends BaseStatsDerive {
// Currently, due to the structure of doris,
// the selected materialized view is not determined when calculating the statistical information of scan,
@ -50,7 +53,7 @@ public class OlapScanStatsDerive extends BaseStatsDerive {
@Override
public StatsDeriveResult deriveStats() {
/**
/*
* Compute InAccurate cardinality before mv selector and tablet pruning.
* - Accurate statistical information relies on the selector of materialized views and bucket reduction.
* - However, Those both processes occur after the reorder algorithm is completed.
@ -68,9 +71,16 @@ public class OlapScanStatsDerive extends BaseStatsDerive {
return new StatsDeriveResult(deriveRowCount(), slotIdToDataSize, slotIdToNdv);
}
/**
* Desc: Build OlapScaNode infrastructure.
*
* @param: node
* @return: void
*/
public void buildStructure(OlapScanNode node) {
slotIdToDataSize = new HashMap<>();
slotIdToNdv = new HashMap<>();
slotIdToTableIdAndColumnName = new HashMap<>();
if (node.getTupleDesc() != null
&& node.getTupleDesc().getTable() != null) {
long tableId = node.getTupleDesc().getTable().getId();
@ -90,6 +100,12 @@ public class OlapScanStatsDerive extends BaseStatsDerive {
//TODO:Implement the getStatistics interface
//now there is nothing in statistics, need to wait for collection finished
/**
* Desc: Get ndv and dataSize from statistics.
*
* @param pair TableId and ColumnName
* @return {@link Pair}
*/
public Pair<Long, Float> getNdvAndDataSizeFromStatistics(Pair<Long, String> pair) {
long ndv = -1;
float dataSize = -1;

View File

@ -0,0 +1,41 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
package org.apache.doris.statistics;
import com.google.common.base.Preconditions;
/**
* Derive SelectNode statistics.
*/
public class SelectStatsDerive extends BaseStatsDerive {
// Current SORT_NODE also uses this derivation method
@Override
public StatsDeriveResult deriveStats() {
return new StatsDeriveResult(deriveRowCount(), deriveColumnToDataSize(), deriveColumnToNdv());
}
@Override
protected long deriveRowCount() {
Preconditions.checkState(!childrenStatsResult.isEmpty());
rowCount = childrenStatsResult.get(0).getRowCount();
applyConjunctsSelectivity();
capRowCountAtLimit();
return rowCount;
}
}

View File

@ -181,4 +181,5 @@ public class StatisticsManager {
public Statistics getStatistics() {
return statistics;
}
}

View File

@ -0,0 +1,38 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
package org.apache.doris.statistics;
import com.google.common.base.Preconditions;
/**
* Derive TableFunctionNode statistics.
*/
public class TableFunctionStatsDerive extends BaseStatsDerive {
@Override
public StatsDeriveResult deriveStats() {
return new StatsDeriveResult(deriveRowCount(), deriveColumnToDataSize(), deriveColumnToNdv());
}
@Override
protected long deriveRowCount() {
Preconditions.checkState(!childrenStatsResult.isEmpty());
// TODO the rowCount = child rowCount * rowCount of list column
rowCount = childrenStatsResult.get(0).getRowCount();
return rowCount;
}
}

View File

@ -0,0 +1,276 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
package org.apache.doris.planner;
import org.apache.doris.qe.SessionVariable;
import org.apache.doris.qe.StmtExecutor;
import org.apache.doris.utframe.TestWithFeService;
import org.junit.Assert;
import org.junit.jupiter.api.Test;
public class StatisticDeriveTest extends TestWithFeService {
@Override
protected void runBeforeAll() throws Exception {
// create database
createDatabase("test");
createTable(
"CREATE TABLE test.join1 (\n"
+ " `dt` int(11) COMMENT \"\",\n"
+ " `id` int(11) COMMENT \"\",\n"
+ " `value` bigint(8) COMMENT \"\"\n"
+ ") ENGINE=OLAP\n"
+ "DUPLICATE KEY(`dt`, `id`)\n"
+ "PARTITION BY RANGE(`dt`)\n"
+ "(PARTITION p1 VALUES LESS THAN (\"10\"))\n"
+ "DISTRIBUTED BY HASH(`id`) BUCKETS 10\n"
+ "PROPERTIES (\n"
+ " \"replication_num\" = \"1\"\n"
+ ");");
createTable(
"CREATE TABLE test.join2 (\n"
+ " `dt` int(11) COMMENT \"\",\n"
+ " `id` int(11) COMMENT \"\",\n"
+ " `value` varchar(8) COMMENT \"\"\n"
+ ") ENGINE=OLAP\n"
+ "DUPLICATE KEY(`dt`, `id`)\n"
+ "PARTITION BY RANGE(`dt`)\n"
+ "(PARTITION p1 VALUES LESS THAN (\"10\"))\n"
+ "DISTRIBUTED BY HASH(`id`) BUCKETS 10\n"
+ "PROPERTIES (\n"
+ " \"replication_num\" = \"1\"\n"
+ ");");
createTable("create external table test.mysql_table\n"
+ "(k1 int, k2 int)\n"
+ "ENGINE=MYSQL\n"
+ "PROPERTIES (\n"
+ "\"host\" = \"127.0.0.1\",\n"
+ "\"port\" = \"3306\",\n"
+ "\"user\" = \"root\",\n"
+ "\"password\" = \"123\",\n"
+ "\"database\" = \"db1\",\n"
+ "\"table\" = \"tbl1\"\n"
+ ");");
createTable("create external table test.odbc_oracle\n"
+ "(k1 float, k2 int)\n"
+ "ENGINE=ODBC\n"
+ "PROPERTIES (\n"
+ "\"host\" = \"127.0.0.1\",\n"
+ "\"port\" = \"3306\",\n"
+ "\"user\" = \"root\",\n"
+ "\"password\" = \"123\",\n"
+ "\"database\" = \"db1\",\n"
+ "\"table\" = \"tbl1\",\n"
+ "\"driver\" = \"Oracle Driver\",\n"
+ "\"odbc_type\" = \"oracle\"\n"
+ ");");
createTable(
"create external table test.odbc_mysql\n"
+ "(k1 int, k2 int)\n"
+ "ENGINE=ODBC\n"
+ "PROPERTIES (\n"
+ "\"host\" = \"127.0.0.1\",\n"
+ "\"port\" = \"3306\",\n"
+ "\"user\" = \"root\",\n"
+ "\"password\" = \"123\",\n"
+ "\"database\" = \"db1\",\n"
+ "\"table\" = \"tbl1\",\n"
+ "\"driver\" = \"Oracle Driver\",\n"
+ "\"odbc_type\" = \"mysql\"\n"
+ ");");
}
@Test
public void testAggStatsDerive() throws Exception {
// contain AggNode/OlapScanNode
String sql = "select dt, max(id), value from test.join1 group by dt, value;";
StmtExecutor stmtExecutor = new StmtExecutor(connectContext, sql);
SessionVariable sessionVariable = connectContext.getSessionVariable();
sessionVariable.setEnableJoinReorderBasedCost(true);
sessionVariable.setDisableJoinReorder(false);
stmtExecutor.execute();
Assert.assertNotNull(stmtExecutor.planner());
Assert.assertNotNull(stmtExecutor.planner().getFragments());
Assert.assertNotEquals(0, stmtExecutor.planner().getFragments().size());
assertSQLPlanOrErrorMsgContains(sql, "AGGREGATE");
assertSQLPlanOrErrorMsgContains(sql, "OlapScanNode");
}
@Test
public void testAnalyticEvalStatsDerive() throws Exception {
// contain SortNode/ExchangeNode/OlapScanNode
String sql = "select dt, min(id) OVER (PARTITION BY dt ORDER BY id) from test.join1";
StmtExecutor stmtExecutor = new StmtExecutor(connectContext, sql);
SessionVariable sessionVariable = connectContext.getSessionVariable();
sessionVariable.setEnableJoinReorderBasedCost(true);
sessionVariable.setDisableJoinReorder(false);
stmtExecutor.execute();
Assert.assertNotNull(stmtExecutor.planner());
Assert.assertNotNull(stmtExecutor.planner().getFragments());
Assert.assertNotEquals(0, stmtExecutor.planner().getFragments().size());
System.out.println(getSQLPlanOrErrorMsg("explain " + sql));
assertSQLPlanOrErrorMsgContains(sql, "ANALYTIC");
assertSQLPlanOrErrorMsgContains(sql, "SORT");
assertSQLPlanOrErrorMsgContains(sql, "EXCHANGE");
}
@Test
public void testAssertNumberRowsStatsDerive() throws Exception {
// contain CrossJoinNode/ExchangeNode/AssertNumberRowsNode/AggNode/OlapScanNode
String sql = "SELECT CASE\n"
+ "WHEN (\n"
+ "SELECT COUNT(*) / 2\n"
+ "FROM test.join1\n"
+ ") > id THEN (\n"
+ "SELECT AVG(id)\n"
+ "FROM test.join1\n"
+ ")\n"
+ "ELSE (\n"
+ "SELECT SUM(id)\n"
+ "FROM test.join1\n"
+ ")\n"
+ "END AS kk4\n"
+ "FROM test.join1;";
StmtExecutor stmtExecutor = new StmtExecutor(connectContext, sql);
SessionVariable sessionVariable = connectContext.getSessionVariable();
sessionVariable.setEnableJoinReorderBasedCost(true);
sessionVariable.setDisableJoinReorder(false);
stmtExecutor.execute();
Assert.assertNotNull(stmtExecutor.planner());
Assert.assertNotNull(stmtExecutor.planner().getFragments());
Assert.assertNotEquals(0, stmtExecutor.planner().getFragments().size());
System.out.println(getSQLPlanOrErrorMsg("explain " + sql));
assertSQLPlanOrErrorMsgContains(sql, "CROSS JOIN");
assertSQLPlanOrErrorMsgContains(sql, "ASSERT NUMBER OF ROWS");
assertSQLPlanOrErrorMsgContains(sql, "EXCHANGE");
assertSQLPlanOrErrorMsgContains(sql, "AGGREGATE");
assertSQLPlanOrErrorMsgContains(sql, "OlapScanNode");
}
@Test
public void testEmptySetStatsDerive() throws Exception {
String sql = "select * from test.join1 where 1 = 2";
StmtExecutor stmtExecutor = new StmtExecutor(connectContext, sql);
SessionVariable sessionVariable = connectContext.getSessionVariable();
sessionVariable.setEnableJoinReorderBasedCost(true);
sessionVariable.setDisableJoinReorder(false);
stmtExecutor.execute();
Assert.assertNotNull(stmtExecutor.planner());
Assert.assertNotNull(stmtExecutor.planner().getFragments());
Assert.assertNotEquals(0, stmtExecutor.planner().getFragments().size());
System.out.println(getSQLPlanOrErrorMsg("explain " + sql));
assertSQLPlanOrErrorMsgContains(sql, "EMPTYSET");
}
@Test
public void testRepeatStatsDerive() throws Exception {
String sql = "select dt, id, sum(value) from test.join1 group by rollup (dt, id)";
StmtExecutor stmtExecutor = new StmtExecutor(connectContext, sql);
SessionVariable sessionVariable = connectContext.getSessionVariable();
sessionVariable.setEnableJoinReorderBasedCost(true);
sessionVariable.setDisableJoinReorder(false);
stmtExecutor.execute();
Assert.assertNotNull(stmtExecutor.planner());
Assert.assertNotNull(stmtExecutor.planner().getFragments());
Assert.assertNotEquals(0, stmtExecutor.planner().getFragments().size());
System.out.println(getSQLPlanOrErrorMsg("explain " + sql));
assertSQLPlanOrErrorMsgContains(sql, "REPEAT_NODE");
}
@Test
public void testHashJoinStatsDerive() throws Exception {
// contain HashJoinNode/ExchangeNode/OlapScanNode
String sql = "select * from test.join1 a inner join test.join2 b on a.dt = b.dt";
StmtExecutor stmtExecutor = new StmtExecutor(connectContext, sql);
SessionVariable sessionVariable = connectContext.getSessionVariable();
sessionVariable.setEnableJoinReorderBasedCost(true);
sessionVariable.setDisableJoinReorder(false);
stmtExecutor.execute();
Assert.assertNotNull(stmtExecutor.planner());
Assert.assertNotNull(stmtExecutor.planner().getFragments());
Assert.assertNotEquals(0, stmtExecutor.planner().getFragments().size());
System.out.println(getSQLPlanOrErrorMsg("explain " + sql));
assertSQLPlanOrErrorMsgContains(sql, "HASH JOIN");
}
@Test
public void testMysqlScanStatsDerive() throws Exception {
String sql = "select * from test.mysql_table";
SessionVariable sessionVariable = connectContext.getSessionVariable();
sessionVariable.setEnableJoinReorderBasedCost(true);
sessionVariable.setDisableJoinReorder(false);
StmtExecutor stmtExecutor = new StmtExecutor(connectContext, sql);
stmtExecutor.execute();
Assert.assertNotNull(stmtExecutor.planner());
Assert.assertNotNull(stmtExecutor.planner().getFragments());
Assert.assertNotEquals(0, stmtExecutor.planner().getFragments().size());
System.out.println(getSQLPlanOrErrorMsg("explain " + sql));
assertSQLPlanOrErrorMsgContains(sql, "SCAN MYSQL");
}
@Test
public void testOdbcScanStatsDerive() throws Exception {
String sql = "select * from test.odbc_mysql";
SessionVariable sessionVariable = connectContext.getSessionVariable();
sessionVariable.setEnableJoinReorderBasedCost(true);
sessionVariable.setDisableJoinReorder(false);
StmtExecutor stmtExecutor = new StmtExecutor(connectContext, sql);
stmtExecutor.execute();
Assert.assertNotNull(stmtExecutor.planner());
Assert.assertNotNull(stmtExecutor.planner().getFragments());
Assert.assertNotEquals(0, stmtExecutor.planner().getFragments().size());
System.out.println(getSQLPlanOrErrorMsg("explain " + sql));
assertSQLPlanOrErrorMsgContains(sql, "SCAN ODBC");
}
@Test
public void testTableFunctionStatsDerive() throws Exception {
String sql = "select * from test.join2 lateral view explode_split(value, \",\") tmp as e1";
SessionVariable sessionVariable = connectContext.getSessionVariable();
sessionVariable.setEnableJoinReorderBasedCost(true);
sessionVariable.setDisableJoinReorder(false);
StmtExecutor stmtExecutor = new StmtExecutor(connectContext, sql);
stmtExecutor.execute();
Assert.assertNotNull(stmtExecutor.planner());
Assert.assertNotNull(stmtExecutor.planner().getFragments());
Assert.assertNotEquals(0, stmtExecutor.planner().getFragments().size());
System.out.println(getSQLPlanOrErrorMsg("explain " + sql));
assertSQLPlanOrErrorMsgContains(sql, "TABLE FUNCTION NODE");
}
@Test
public void testUnionStatsDerive() throws Exception {
String sql = "select * from test.join1 union select * from test.join2";
SessionVariable sessionVariable = connectContext.getSessionVariable();
sessionVariable.setEnableJoinReorderBasedCost(true);
sessionVariable.setDisableJoinReorder(false);
StmtExecutor stmtExecutor = new StmtExecutor(connectContext, sql);
stmtExecutor.execute();
Assert.assertNotNull(stmtExecutor.planner());
Assert.assertNotNull(stmtExecutor.planner().getFragments());
Assert.assertNotEquals(0, stmtExecutor.planner().getFragments().size());
System.out.println(getSQLPlanOrErrorMsg("explain " + sql));
assertSQLPlanOrErrorMsgContains(sql, "UNION");
}
}