[feature](statistics) Statistics derivation.Step 1:ScanNode implement… (#8947)

* [feature](statistics) Statistics derivation.Step 1:ScanNode implementation

Co-authored-by: jianghaochen <jianghaochen@meituan.com>
This commit is contained in:
zhengshiJ
2022-04-29 10:41:12 +08:00
committed by GitHub
parent 93a41b2625
commit 9ef09b8354
23 changed files with 545 additions and 50 deletions

View File

@ -138,7 +138,14 @@ public class BrokerScanNode extends LoadScanNode {
public BrokerScanNode(PlanNodeId id, TupleDescriptor destTupleDesc, String planNodeName,
List<List<TBrokerFileStatus>> fileStatusesList, int filesAdded) {
super(id, destTupleDesc, planNodeName);
super(id, destTupleDesc, planNodeName, NodeType.BROKER_SCAN_NODE);
this.fileStatusesList = fileStatusesList;
this.filesAdded = filesAdded;
}
public BrokerScanNode(PlanNodeId id, TupleDescriptor destTupleDesc, String planNodeName,
List<List<TBrokerFileStatus>> fileStatusesList, int filesAdded, NodeType nodeType) {
super(id, destTupleDesc, planNodeName, nodeType);
this.fileStatusesList = fileStatusesList;
this.filesAdded = filesAdded;
}

View File

@ -72,7 +72,7 @@ public class EsScanNode extends ScanNode {
boolean isFinalized = false;
public EsScanNode(PlanNodeId id, TupleDescriptor desc, String planNodeName) {
super(id, desc, planNodeName);
super(id, desc, planNodeName, NodeType.ES_SCAN_NODE);
table = (EsTable) (desc.getTable());
esTablePartitions = table.getEsTablePartitions();
}

View File

@ -100,7 +100,7 @@ public class HiveScanNode extends BrokerScanNode {
public HiveScanNode(PlanNodeId id, TupleDescriptor destTupleDesc, String planNodeName,
List<List<TBrokerFileStatus>> fileStatusesList, int filesAdded) {
super(id, destTupleDesc, planNodeName, fileStatusesList, filesAdded);
super(id, destTupleDesc, planNodeName, fileStatusesList, filesAdded, NodeType.HIVE_SCAN_NODE);
this.hiveTable = (HiveTable) destTupleDesc.getTable();
}

View File

@ -47,7 +47,7 @@ public class IcebergScanNode extends BrokerScanNode {
public IcebergScanNode(PlanNodeId id, TupleDescriptor desc, String planNodeName,
List<List<TBrokerFileStatus>> fileStatusesList, int filesAdded) {
super(id, desc, planNodeName, fileStatusesList, filesAdded);
super(id, desc, planNodeName, fileStatusesList, filesAdded, NodeType.ICEBREG_SCAN_NODE);
icebergTable = (IcebergTable) desc.getTable();
}

View File

@ -54,7 +54,11 @@ public abstract class LoadScanNode extends ScanNode {
protected LoadTask.MergeType mergeType = LoadTask.MergeType.APPEND;
public LoadScanNode(PlanNodeId id, TupleDescriptor desc, String planNodeName) {
super(id, desc, planNodeName);
super(id, desc, planNodeName, NodeType.LOAD_SCAN_NODE);
}
public LoadScanNode(PlanNodeId id, TupleDescriptor desc, String planNodeName, NodeType nodeType) {
super(id, desc, planNodeName, nodeType);
}
protected void initAndSetWhereExpr(Expr whereExpr, TupleDescriptor tupleDesc, Analyzer analyzer) throws UserException {

View File

@ -56,7 +56,7 @@ public class MysqlScanNode extends ScanNode {
* Constructs node to scan given data files of table 'tbl'.
*/
public MysqlScanNode(PlanNodeId id, TupleDescriptor desc, MysqlTable tbl) {
super(id, desc, "SCAN MYSQL");
super(id, desc, "SCAN MYSQL", NodeType.MYSQL_SCAN_NODE);
tblName = "`" + tbl.getMysqlTableName() + "`";
}

View File

@ -73,7 +73,7 @@ public class OdbcScanNode extends ScanNode {
* Constructs node to scan given data files of table 'tbl'.
*/
public OdbcScanNode(PlanNodeId id, TupleDescriptor desc, OdbcTable tbl) {
super(id, desc, "SCAN ODBC");
super(id, desc, "SCAN ODBC", NodeType.ODBC_SCAN_NODE);
connectString = tbl.getConnectString();
odbcType = tbl.getOdbcTableType();
tblName = OdbcTable.databaseProperName(odbcType, tbl.getOdbcTableName());

View File

@ -53,6 +53,7 @@ import org.apache.doris.common.util.Util;
import org.apache.doris.qe.ConnectContext;
import org.apache.doris.qe.SessionVariable;
import org.apache.doris.resource.Tag;
import org.apache.doris.statistics.StatsRecursiveDerive;
import org.apache.doris.system.Backend;
import org.apache.doris.thrift.TExplainLevel;
import org.apache.doris.thrift.TNetworkAddress;
@ -146,7 +147,7 @@ public class OlapScanNode extends ScanNode {
// Constructs node to scan given data files of table 'tbl'.
public OlapScanNode(PlanNodeId id, TupleDescriptor desc, String planNodeName) {
super(id, desc, planNodeName);
super(id, desc, planNodeName, NodeType.OLAP_SCAN_NODE);
olapTable = (OlapTable) desc.getTable();
}
@ -346,10 +347,25 @@ public class OlapScanNode extends ScanNode {
* - So only an inaccurate cardinality can be calculated here.
*/
if (analyzer.safeIsEnableJoinReorderBasedCost()) {
mockRowCountInStatistic();
computeInaccurateCardinality();
}
}
/**
* Remove the method after statistics collection is working properly
*/
public void mockRowCountInStatistic() {
long tableId = desc.getTable().getId();
cardinality = 0;
for (long selectedPartitionId : selectedPartitionIds) {
final Partition partition = olapTable.getPartition(selectedPartitionId);
final MaterializedIndex baseIndex = partition.getBaseIndex();
cardinality += baseIndex.getRowCount();
}
Catalog.getCurrentCatalog().getStatisticsManager().getStatistics().mockTableStatsWithRowCount(tableId, cardinality);
}
@Override
public void finalize(Analyzer analyzer) throws UserException {
LOG.debug("OlapScanNode get scan range locations. Tuple: {}", desc);
@ -386,6 +402,12 @@ public class OlapScanNode extends ScanNode {
}
// when node scan has no data, cardinality should be 0 instead of a invalid value after computeStats()
cardinality = cardinality == -1 ? 0 : cardinality;
// update statsDeriveResult for real statistics
// After statistics collection is complete, remove the logic
if (analyzer.safeIsEnableJoinReorderBasedCost()) {
statsDeriveResult.setRowCount(cardinality);
}
}
@Override
@ -397,30 +419,9 @@ public class OlapScanNode extends ScanNode {
numNodes = numNodes <= 0 ? 1 : numNodes;
}
/**
* Calculate inaccurate cardinality.
* cardinality: the value of cardinality is the sum of rowcount which belongs to selectedPartitionIds
* The cardinality here is actually inaccurate, it will be greater than the actual value.
* There are two reasons
* 1. During the actual execution, not all tablets belonging to the selected partition will be scanned.
* Some tablets may have been pruned before execution.
* 2. The base index may eventually be replaced by mv index.
* <p>
* There are three steps to calculate cardinality
* 1. Calculate how many rows were scanned
* 2. Apply conjunct
* 3. Apply limit
*/
private void computeInaccurateCardinality() {
// step1: Calculate how many rows were scanned
cardinality = 0;
for (long selectedPartitionId : selectedPartitionIds) {
final Partition partition = olapTable.getPartition(selectedPartitionId);
final MaterializedIndex baseIndex = partition.getBaseIndex();
cardinality += baseIndex.getRowCount();
}
applyConjunctsSelectivity();
capCardinalityAtLimit();
private void computeInaccurateCardinality() throws UserException {
StatsRecursiveDerive.getStatsRecursiveDerive().statsRecursiveDerive(this);
cardinality = statsDeriveResult.getRowCount();
}
private Collection<Long> partitionPrune(PartitionInfo partitionInfo, PartitionNames partitionNames) throws AnalysisException {
@ -563,7 +564,7 @@ public class OlapScanNode extends ScanNode {
result.add(scanRangeLocations);
}
// FIXME(dhc): we use cardinality here to simulate ndv
if (tablets.size() == 0) {
desc.setCardinality(0);
} else {

View File

@ -36,6 +36,7 @@ import org.apache.doris.common.NotImplementedException;
import org.apache.doris.common.TreeNode;
import org.apache.doris.common.UserException;
import org.apache.doris.common.util.VectorizedUtil;
import org.apache.doris.statistics.StatsDeriveResult;
import org.apache.doris.thrift.TExplainLevel;
import org.apache.doris.thrift.TFunctionBinaryType;
import org.apache.doris.thrift.TPlan;
@ -135,6 +136,9 @@ abstract public class PlanNode extends TreeNode<PlanNode> {
protected List<SlotId> outputSlotIds;
protected NodeType nodeType = NodeType.DEFAULT;
protected StatsDeriveResult statsDeriveResult;
protected PlanNode(PlanNodeId id, ArrayList<TupleId> tupleIds, String planNodeName) {
this.id = id;
this.limit = -1;
@ -173,12 +177,41 @@ abstract public class PlanNode extends TreeNode<PlanNode> {
this.planNodeName = VectorizedUtil.isVectorized() ?
"V" + planNodeName : planNodeName;
this.numInstances = 1;
this.nodeType = nodeType;
}
public enum NodeType {
DEFAULT,
AGG_NODE,
BROKER_SCAN_NODE,
HASH_JOIN_NODE,
HIVE_SCAN_NODE,
MERGE_NODE,
ES_SCAN_NODE,
ICEBREG_SCAN_NODE,
LOAD_SCAN_NODE,
MYSQL_SCAN_NODE,
ODBC_SCAN_NODE,
OLAP_SCAN_NODE,
SCHEMA_SCAN_NODE,
}
public String getPlanNodeName() {
return planNodeName;
}
public StatsDeriveResult getStatsDeriveResult() {
return statsDeriveResult;
}
public NodeType getNodeType() {
return nodeType;
}
public void setStatsDeriveResult(StatsDeriveResult statsDeriveResult) {
this.statsDeriveResult = statsDeriveResult;
}
/**
* Sets tblRefIds_, tupleIds_, and nullableTupleIds_.
* The default implementation is a no-op.

View File

@ -65,8 +65,9 @@ abstract public class ScanNode extends PlanNode {
protected String sortColumn = null;
protected Analyzer analyzer;
public ScanNode(PlanNodeId id, TupleDescriptor desc, String planNodeName) {
public ScanNode(PlanNodeId id, TupleDescriptor desc, String planNodeName, NodeType nodeType) {
super(id, desc.getId().asList(), planNodeName);
super.nodeType = nodeType;
this.desc = desc;
}

View File

@ -57,7 +57,7 @@ public class SchemaScanNode extends ScanNode {
* Constructs node to scan given data files of table 'tbl'.
*/
public SchemaScanNode(PlanNodeId id, TupleDescriptor desc) {
super(id, desc, "SCAN SCHEMA");
super(id, desc, "SCAN SCHEMA", NodeType.SCHEMA_SCAN_NODE);
this.tableName = desc.getTable().getName();
}

View File

@ -1725,7 +1725,6 @@ public class SingleNodePlanner {
scanNodeList.add(scanNode);
scanNode.init(analyzer);
return scanNode;
}

View File

@ -699,14 +699,6 @@ public class StmtExecutor implements ProfileWriter {
}
if (explainOptions != null) parsedStmt.setIsExplain(explainOptions);
}
if (parsedStmt instanceof InsertStmt && parsedStmt.isExplain()) {
if (ConnectContext.get() != null &&
ConnectContext.get().getExecutor() != null &&
ConnectContext.get().getExecutor().getParsedStmt() != null) {
ConnectContext.get().getExecutor().getParsedStmt().setIsExplain(new ExplainOptions(true, false));
}
}
}
plannerProfile.setQueryAnalysisFinishTime();

View File

@ -0,0 +1,161 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
package org.apache.doris.statistics;
import com.google.common.base.Preconditions;
import com.google.common.collect.Lists;
import org.apache.doris.analysis.Expr;
import org.apache.doris.analysis.SlotId;
import org.apache.doris.common.UserException;
import org.apache.doris.planner.PlanNode;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
public class BaseStatsDerive {
private static final Logger LOG = LogManager.getLogger(BaseStatsDerive.class);
// estimate of the output rowCount of this node;
// invalid: -1
protected long rowCount = -1;
protected long limit = -1;
protected List<Expr> conjuncts = Lists.newArrayList();
protected List<StatsDeriveResult> childrenStatsResult = Lists.newArrayList();
protected void init(PlanNode node) throws UserException {
limit = node.getLimit();
conjuncts.addAll(node.getConjuncts());
for (PlanNode childNode : node.getChildren()) {
StatsDeriveResult result = childNode.getStatsDeriveResult();
if (result == null) {
throw new UserException("childNode statsDeriveResult is null, childNodeType is " + childNode.getNodeType()
+ "parentNodeType is " + node.getNodeType());
}
childrenStatsResult.add(result);
}
}
public StatsDeriveResult deriveStats() {
return new StatsDeriveResult(deriveRowCount(), deriveColumnToDataSize(), deriveColumnToNdv());
}
public boolean hasLimit() {
return limit > -1;
}
protected void applyConjunctsSelectivity() {
if (rowCount == -1) {
return;
}
applySelectivity();
}
private void applySelectivity() {
double selectivity = computeSelectivity();
Preconditions.checkState(rowCount >= 0);
long preConjunctrowCount = rowCount;
rowCount = Math.round(rowCount * selectivity);
// don't round rowCount down to zero for safety.
if (rowCount == 0 && preConjunctrowCount > 0) {
rowCount = 1;
}
}
protected double computeSelectivity() {
for (Expr expr : conjuncts) {
expr.setSelectivity();
}
return computeCombinedSelectivity(conjuncts);
}
/**
* Returns the estimated combined selectivity of all conjuncts. Uses heuristics to
* address the following estimation challenges:
* 1. The individual selectivities of conjuncts may be unknown.
* 2. Two selectivities, whether known or unknown, could be correlated. Assuming
* independence can lead to significant underestimation.
* <p>
* The first issue is addressed by using a single default selectivity that is
* representative of all conjuncts with unknown selectivities.
* The second issue is addressed by an exponential backoff when multiplying each
* additional selectivity into the final result.
*/
protected double computeCombinedSelectivity(List<Expr> conjuncts) {
// Collect all estimated selectivities.
List<Double> selectivities = new ArrayList<>();
for (Expr e : conjuncts) {
if (e.hasSelectivity()) selectivities.add(e.getSelectivity());
}
if (selectivities.size() != conjuncts.size()) {
// Some conjuncts have no estimated selectivity. Use a single default
// representative selectivity for all those conjuncts.
selectivities.add(Expr.DEFAULT_SELECTIVITY);
}
// Sort the selectivities to get a consistent estimate, regardless of the original
// conjunct order. Sort in ascending order such that the most selective conjunct
// is fully applied.
Collections.sort(selectivities);
double result = 1.0;
// selectivity = 1 * (s1)^(1/1) * (s2)^(1/2) * ... * (sn-1)^(1/(n-1)) * (sn)^(1/n)
for (int i = 0; i < selectivities.size(); ++i) {
// Exponential backoff for each selectivity multiplied into the final result.
result *= Math.pow(selectivities.get(i), 1.0 / (double) (i + 1));
}
// Bound result in [0, 1]
return Math.max(0.0, Math.min(1.0, result));
}
protected void capRowCountAtLimit() {
if (hasLimit()) {
rowCount = rowCount == -1 ? limit : Math.min(rowCount, limit);
}
}
// Currently it simply adds the number of rows of children
protected long deriveRowCount() {
for (StatsDeriveResult statsDeriveResult : childrenStatsResult) {
rowCount = Math.max(rowCount, statsDeriveResult.getRowCount());
}
applyConjunctsSelectivity();
capRowCountAtLimit();
return rowCount;
}
protected HashMap<SlotId, Float> deriveColumnToDataSize() {
HashMap<SlotId, Float> columnToDataSize = new HashMap<>();
for (StatsDeriveResult child : childrenStatsResult) {
columnToDataSize.putAll(child.getColumnToDataSize());
}
return columnToDataSize;
}
protected HashMap<SlotId, Long> deriveColumnToNdv() {
HashMap<SlotId, Long> columnToNdv = new HashMap<>();
for (StatsDeriveResult child : childrenStatsResult) {
columnToNdv.putAll(child.getColumnToNdv());
}
return columnToNdv;
}
}

View File

@ -0,0 +1,36 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
package org.apache.doris.statistics;
import org.apache.doris.planner.PlanNode;
public class DeriveFactory {
public BaseStatsDerive getStatsDerive(PlanNode.NodeType nodeType) {
switch (nodeType) {
case AGG_NODE:
case HASH_JOIN_NODE:
case MERGE_NODE:
break;
case OLAP_SCAN_NODE:
return new OlapScanStatsDerive();
case DEFAULT:
}
return new BaseStatsDerive();
}
}

View File

@ -0,0 +1,112 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
package org.apache.doris.statistics;
import com.google.common.base.Preconditions;
import org.apache.doris.analysis.SlotDescriptor;
import org.apache.doris.analysis.SlotId;
import org.apache.doris.catalog.Catalog;
import org.apache.doris.common.Pair;
import org.apache.doris.common.UserException;
import org.apache.doris.planner.OlapScanNode;
import org.apache.doris.planner.PlanNode;
import java.util.HashMap;
import java.util.Map;
public class OlapScanStatsDerive extends BaseStatsDerive {
// Currently, due to the structure of doris,
// the selected materialized view is not determined when calculating the statistical information of scan,
// so baseIndex is used for calculation when generating Planner.
// The rowCount here is the number of rows.
private long inputRowCount = -1;
private Map<SlotId, Float> slotIdToDataSize;
private Map<SlotId, Long> slotIdToNdv;
private Map<SlotId, Pair<Long, String>> slotIdToTableIdAndColumnName;
@Override
public void init(PlanNode node) throws UserException {
Preconditions.checkState(node instanceof OlapScanNode);
super.init(node);
buildStructure((OlapScanNode)node);
}
@Override
public StatsDeriveResult deriveStats() {
/**
* Compute InAccurate cardinality before mv selector and tablet pruning.
* - Accurate statistical information relies on the selector of materialized views and bucket reduction.
* - However, Those both processes occur after the reorder algorithm is completed.
* - When Join reorder is turned on, the cardinality must be calculated before the reorder algorithm.
* - So only an inaccurate cardinality can be calculated here.
*/
rowCount = inputRowCount;
for (Map.Entry<SlotId, Pair<Long, String>> pairEntry : slotIdToTableIdAndColumnName.entrySet()) {
Pair<Long, Float> ndvAndDataSize = getNdvAndDataSizeFromStatistics(pairEntry.getValue());
long ndv = ndvAndDataSize.first;
float dataSize = ndvAndDataSize.second;
slotIdToNdv.put(pairEntry.getKey(), ndv);
slotIdToDataSize.put(pairEntry.getKey(), dataSize);
}
return new StatsDeriveResult(deriveRowCount(), slotIdToDataSize, slotIdToNdv);
}
public void buildStructure(OlapScanNode node) {
slotIdToDataSize = new HashMap<>();
slotIdToNdv = new HashMap<>();
if (node.getTupleDesc() != null
&& node.getTupleDesc().getTable() != null) {
long tableId = node.getTupleDesc().getTable().getId();
inputRowCount = Catalog.getCurrentCatalog().getStatisticsManager()
.getStatistics().getTableStats(tableId).getRowCount();
}
for (SlotDescriptor slot : node.getTupleDesc().getSlots()) {
if (!slot.isMaterialized()) {
continue;
}
long tableId = slot.getParent().getTable().getId();
String columnName = slot.getColumn().getName();
slotIdToTableIdAndColumnName.put(slot.getId(), new Pair<>(tableId, columnName));
}
}
//TODO:Implement the getStatistics interface
//now there is nothing in statistics, need to wait for collection finished
public Pair<Long, Float> getNdvAndDataSizeFromStatistics(Pair<Long, String> pair) {
long ndv = -1;
float dataSize = -1;
/*
if (Catalog.getCurrentCatalog()
.getStatisticsManager()
.getStatistics()
.getColumnStats(pair.first) != null) {
ndv = Catalog.getCurrentCatalog()
.getStatisticsManager()
.getStatistics()
.getColumnStats(pair.first).get(pair.second).getNdv();
dataSize = Catalog.getCurrentCatalog()
.getStatisticsManager()
.getStatistics()
.getColumnStats(pair.first).get(pair.second).getDataSize();
}
*/
return new Pair<>(ndv, dataSize);
}
}

View File

@ -70,4 +70,17 @@ public class Statistics {
}
return tableStats.getNameToColumnStats();
}
// TODO: mock statistics need to be removed in the future
public void mockTableStatsWithRowCount(long tableId, long rowCount) {
TableStats tableStats = idToTableStats.get(tableId);
if (tableStats == null) {
tableStats = new TableStats();
idToTableStats.put(tableId, tableStats);
}
if (tableStats.getRowCount() != rowCount) {
tableStats.setRowCount(rowCount);
}
}
}

View File

@ -136,4 +136,8 @@ public class StatisticsManager {
Table table = db.getTableOrAnalysisException(tableName);
return table;
}
public Statistics getStatistics() {
return statistics;
}
}

View File

@ -0,0 +1,56 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
package org.apache.doris.statistics;
import com.google.common.collect.Maps;
import org.apache.doris.analysis.SlotId;
import java.util.Map;
// This structure is maintained in each operator to store the statistical information results obtained by the operator.
public class StatsDeriveResult {
private long rowCount = -1;
// The data size of the corresponding column in the operator
// The actual key is slotId
private final Map<SlotId, Float> columnToDataSize = Maps.newHashMap();
// The ndv of the corresponding column in the operator
// The actual key is slotId
private final Map<SlotId, Long> columnToNdv = Maps.newHashMap();
public StatsDeriveResult(long rowCount, Map<SlotId, Float> columnToDataSize, Map<SlotId, Long> columnToNdv) {
this.rowCount = rowCount;
this.columnToDataSize.putAll(columnToDataSize);
this.columnToNdv.putAll(columnToNdv);
}
public void setRowCount(long rowCount) {
this.rowCount = rowCount;
}
public long getRowCount() {
return rowCount;
}
public Map<SlotId, Long> getColumnToNdv() {
return columnToNdv;
}
public Map<SlotId, Float> getColumnToDataSize() {
return columnToDataSize;
}
}

View File

@ -0,0 +1,56 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
package org.apache.doris.statistics;
import org.apache.doris.common.UserException;
import org.apache.doris.planner.PlanNode;
public class StatsRecursiveDerive {
private StatsRecursiveDerive() {}
public static StatsRecursiveDerive getStatsRecursiveDerive() {
return Inner.INSTANCE;
}
private static class Inner {
private static final StatsRecursiveDerive INSTANCE = new StatsRecursiveDerive();
}
/**
* Recursively complete the derivation of statistics for this node and all its children
* @param node
* This parameter is an input and output parameter,
* which will store the derivation result of statistical information in the corresponding node
*/
public void statsRecursiveDerive(PlanNode node) throws UserException {
if (node.getStatsDeriveResult() != null) {
return;
}
for (PlanNode childNode : node.getChildren()) {
if (childNode.getStatsDeriveResult() == null) {
statsRecursiveDerive(childNode);
}
}
DeriveFactory deriveFactory = new DeriveFactory();
BaseStatsDerive deriveStats = deriveFactory.getStatsDerive(node.getNodeType());
deriveStats.init(node);
StatsDeriveResult result = deriveStats.deriveStats();
node.setStatsDeriveResult(result);
}
}

View File

@ -94,4 +94,16 @@ public class TableStats {
public Map<String, ColumnStats> getNameToColumnStats() {
return nameToColumnStats;
}
public long getRowCount() {
return rowCount;
}
public long getDataSize() {
return dataSize;
}
public void setRowCount(long rowCount) {
this.rowCount = rowCount;
}
}

View File

@ -72,20 +72,27 @@ public class ExplainTest {
Assert.assertEquals(dropDbStmt.toSql(), dropSchemaStmt.toSql());
}
public void testExplainSelect() throws Exception {
String sql = "explain select * from test_explain.explain_t1 where dt = '1001';";
public void testExplainInsertInto() throws Exception {
String sql = "explain insert into test_explain.explain_t1 select * from test_explain.explain_t2";
String explainString = UtFrameUtils.getSQLPlanOrErrorMsg(ctx, sql, false);
System.out.println(explainString);
Assert.assertFalse(explainString.contains("CAST"));
}
public void testExplainInsertInto() throws Exception {
public void testExplainVerboseInsertInto() throws Exception {
String sql = "explain verbose insert into test_explain.explain_t1 select * from test_explain.explain_t2";
String explainString = UtFrameUtils.getSQLPlanOrErrorMsg(ctx, sql, true);
System.out.println(explainString);
Assert.assertTrue(explainString.contains("CAST"));
}
public void testExplainSelect() throws Exception {
String sql = "explain select * from test_explain.explain_t1 where dt = '1001';";
String explainString = UtFrameUtils.getSQLPlanOrErrorMsg(ctx, sql, false);
System.out.println(explainString);
Assert.assertFalse(explainString.contains("CAST"));
}
public void testExplainVerboseSelect() throws Exception {
String queryStr = "explain verbose select * from test_explain.explain_t1 where dt = '1001';";
String explainString = UtFrameUtils.getSQLPlanOrErrorMsg(ctx, queryStr, true);

View File

@ -1991,8 +1991,9 @@ public class QueryPlanTest {
public void testExplainInsertInto() throws Exception {
ExplainTest explainTest = new ExplainTest();
explainTest.before(connectContext);
explainTest.testExplainSelect();
explainTest.testExplainInsertInto();
explainTest.testExplainVerboseInsertInto();
explainTest.testExplainSelect();
explainTest.testExplainVerboseSelect();
explainTest.testExplainConcatSelect();
explainTest.testExplainVerboseConcatSelect();
@ -2088,7 +2089,7 @@ public class QueryPlanTest {
"\"storage_medium\" = \"HDD\",\n" +
"\"storage_format\" = \"V2\"\n" +
");\n");
String queryStr = "EXPLAIN INSERT INTO result_exprs\n" +
String queryStr = "EXPLAIN VERBOSE INSERT INTO result_exprs\n" +
"SELECT a.aid,\n" +
" b.bid\n" +
"FROM\n" +
@ -2098,7 +2099,7 @@ public class QueryPlanTest {
String explainString = UtFrameUtils.getSQLPlanOrErrorMsg(connectContext, queryStr);
Assert.assertFalse(explainString.contains("OUTPUT EXPRS:3 | 4"));
System.out.println(explainString);
Assert.assertTrue(explainString.contains("OUTPUT EXPRS:`a`.`aid` | 4"));
Assert.assertTrue(explainString.contains("OUTPUT EXPRS:CAST(`a`.`aid` AS INT) | 4"));
}
@Test