[feature](statistics) Statistics derivation.Step 1:ScanNode implement… (#8947)
* [feature](statistics) Statistics derivation.Step 1:ScanNode implementation Co-authored-by: jianghaochen <jianghaochen@meituan.com>
This commit is contained in:
@ -138,7 +138,14 @@ public class BrokerScanNode extends LoadScanNode {
|
||||
|
||||
public BrokerScanNode(PlanNodeId id, TupleDescriptor destTupleDesc, String planNodeName,
|
||||
List<List<TBrokerFileStatus>> fileStatusesList, int filesAdded) {
|
||||
super(id, destTupleDesc, planNodeName);
|
||||
super(id, destTupleDesc, planNodeName, NodeType.BROKER_SCAN_NODE);
|
||||
this.fileStatusesList = fileStatusesList;
|
||||
this.filesAdded = filesAdded;
|
||||
}
|
||||
|
||||
public BrokerScanNode(PlanNodeId id, TupleDescriptor destTupleDesc, String planNodeName,
|
||||
List<List<TBrokerFileStatus>> fileStatusesList, int filesAdded, NodeType nodeType) {
|
||||
super(id, destTupleDesc, planNodeName, nodeType);
|
||||
this.fileStatusesList = fileStatusesList;
|
||||
this.filesAdded = filesAdded;
|
||||
}
|
||||
|
||||
@ -72,7 +72,7 @@ public class EsScanNode extends ScanNode {
|
||||
boolean isFinalized = false;
|
||||
|
||||
public EsScanNode(PlanNodeId id, TupleDescriptor desc, String planNodeName) {
|
||||
super(id, desc, planNodeName);
|
||||
super(id, desc, planNodeName, NodeType.ES_SCAN_NODE);
|
||||
table = (EsTable) (desc.getTable());
|
||||
esTablePartitions = table.getEsTablePartitions();
|
||||
}
|
||||
|
||||
@ -100,7 +100,7 @@ public class HiveScanNode extends BrokerScanNode {
|
||||
|
||||
public HiveScanNode(PlanNodeId id, TupleDescriptor destTupleDesc, String planNodeName,
|
||||
List<List<TBrokerFileStatus>> fileStatusesList, int filesAdded) {
|
||||
super(id, destTupleDesc, planNodeName, fileStatusesList, filesAdded);
|
||||
super(id, destTupleDesc, planNodeName, fileStatusesList, filesAdded, NodeType.HIVE_SCAN_NODE);
|
||||
this.hiveTable = (HiveTable) destTupleDesc.getTable();
|
||||
}
|
||||
|
||||
|
||||
@ -47,7 +47,7 @@ public class IcebergScanNode extends BrokerScanNode {
|
||||
|
||||
public IcebergScanNode(PlanNodeId id, TupleDescriptor desc, String planNodeName,
|
||||
List<List<TBrokerFileStatus>> fileStatusesList, int filesAdded) {
|
||||
super(id, desc, planNodeName, fileStatusesList, filesAdded);
|
||||
super(id, desc, planNodeName, fileStatusesList, filesAdded, NodeType.ICEBREG_SCAN_NODE);
|
||||
icebergTable = (IcebergTable) desc.getTable();
|
||||
}
|
||||
|
||||
|
||||
@ -54,7 +54,11 @@ public abstract class LoadScanNode extends ScanNode {
|
||||
protected LoadTask.MergeType mergeType = LoadTask.MergeType.APPEND;
|
||||
|
||||
public LoadScanNode(PlanNodeId id, TupleDescriptor desc, String planNodeName) {
|
||||
super(id, desc, planNodeName);
|
||||
super(id, desc, planNodeName, NodeType.LOAD_SCAN_NODE);
|
||||
}
|
||||
|
||||
public LoadScanNode(PlanNodeId id, TupleDescriptor desc, String planNodeName, NodeType nodeType) {
|
||||
super(id, desc, planNodeName, nodeType);
|
||||
}
|
||||
|
||||
protected void initAndSetWhereExpr(Expr whereExpr, TupleDescriptor tupleDesc, Analyzer analyzer) throws UserException {
|
||||
|
||||
@ -56,7 +56,7 @@ public class MysqlScanNode extends ScanNode {
|
||||
* Constructs node to scan given data files of table 'tbl'.
|
||||
*/
|
||||
public MysqlScanNode(PlanNodeId id, TupleDescriptor desc, MysqlTable tbl) {
|
||||
super(id, desc, "SCAN MYSQL");
|
||||
super(id, desc, "SCAN MYSQL", NodeType.MYSQL_SCAN_NODE);
|
||||
tblName = "`" + tbl.getMysqlTableName() + "`";
|
||||
}
|
||||
|
||||
|
||||
@ -73,7 +73,7 @@ public class OdbcScanNode extends ScanNode {
|
||||
* Constructs node to scan given data files of table 'tbl'.
|
||||
*/
|
||||
public OdbcScanNode(PlanNodeId id, TupleDescriptor desc, OdbcTable tbl) {
|
||||
super(id, desc, "SCAN ODBC");
|
||||
super(id, desc, "SCAN ODBC", NodeType.ODBC_SCAN_NODE);
|
||||
connectString = tbl.getConnectString();
|
||||
odbcType = tbl.getOdbcTableType();
|
||||
tblName = OdbcTable.databaseProperName(odbcType, tbl.getOdbcTableName());
|
||||
|
||||
@ -53,6 +53,7 @@ import org.apache.doris.common.util.Util;
|
||||
import org.apache.doris.qe.ConnectContext;
|
||||
import org.apache.doris.qe.SessionVariable;
|
||||
import org.apache.doris.resource.Tag;
|
||||
import org.apache.doris.statistics.StatsRecursiveDerive;
|
||||
import org.apache.doris.system.Backend;
|
||||
import org.apache.doris.thrift.TExplainLevel;
|
||||
import org.apache.doris.thrift.TNetworkAddress;
|
||||
@ -146,7 +147,7 @@ public class OlapScanNode extends ScanNode {
|
||||
|
||||
// Constructs node to scan given data files of table 'tbl'.
|
||||
public OlapScanNode(PlanNodeId id, TupleDescriptor desc, String planNodeName) {
|
||||
super(id, desc, planNodeName);
|
||||
super(id, desc, planNodeName, NodeType.OLAP_SCAN_NODE);
|
||||
olapTable = (OlapTable) desc.getTable();
|
||||
}
|
||||
|
||||
@ -346,10 +347,25 @@ public class OlapScanNode extends ScanNode {
|
||||
* - So only an inaccurate cardinality can be calculated here.
|
||||
*/
|
||||
if (analyzer.safeIsEnableJoinReorderBasedCost()) {
|
||||
mockRowCountInStatistic();
|
||||
computeInaccurateCardinality();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Remove the method after statistics collection is working properly
|
||||
*/
|
||||
public void mockRowCountInStatistic() {
|
||||
long tableId = desc.getTable().getId();
|
||||
cardinality = 0;
|
||||
for (long selectedPartitionId : selectedPartitionIds) {
|
||||
final Partition partition = olapTable.getPartition(selectedPartitionId);
|
||||
final MaterializedIndex baseIndex = partition.getBaseIndex();
|
||||
cardinality += baseIndex.getRowCount();
|
||||
}
|
||||
Catalog.getCurrentCatalog().getStatisticsManager().getStatistics().mockTableStatsWithRowCount(tableId, cardinality);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void finalize(Analyzer analyzer) throws UserException {
|
||||
LOG.debug("OlapScanNode get scan range locations. Tuple: {}", desc);
|
||||
@ -386,6 +402,12 @@ public class OlapScanNode extends ScanNode {
|
||||
}
|
||||
// when node scan has no data, cardinality should be 0 instead of a invalid value after computeStats()
|
||||
cardinality = cardinality == -1 ? 0 : cardinality;
|
||||
|
||||
// update statsDeriveResult for real statistics
|
||||
// After statistics collection is complete, remove the logic
|
||||
if (analyzer.safeIsEnableJoinReorderBasedCost()) {
|
||||
statsDeriveResult.setRowCount(cardinality);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
@ -397,30 +419,9 @@ public class OlapScanNode extends ScanNode {
|
||||
numNodes = numNodes <= 0 ? 1 : numNodes;
|
||||
}
|
||||
|
||||
/**
|
||||
* Calculate inaccurate cardinality.
|
||||
* cardinality: the value of cardinality is the sum of rowcount which belongs to selectedPartitionIds
|
||||
* The cardinality here is actually inaccurate, it will be greater than the actual value.
|
||||
* There are two reasons
|
||||
* 1. During the actual execution, not all tablets belonging to the selected partition will be scanned.
|
||||
* Some tablets may have been pruned before execution.
|
||||
* 2. The base index may eventually be replaced by mv index.
|
||||
* <p>
|
||||
* There are three steps to calculate cardinality
|
||||
* 1. Calculate how many rows were scanned
|
||||
* 2. Apply conjunct
|
||||
* 3. Apply limit
|
||||
*/
|
||||
private void computeInaccurateCardinality() {
|
||||
// step1: Calculate how many rows were scanned
|
||||
cardinality = 0;
|
||||
for (long selectedPartitionId : selectedPartitionIds) {
|
||||
final Partition partition = olapTable.getPartition(selectedPartitionId);
|
||||
final MaterializedIndex baseIndex = partition.getBaseIndex();
|
||||
cardinality += baseIndex.getRowCount();
|
||||
}
|
||||
applyConjunctsSelectivity();
|
||||
capCardinalityAtLimit();
|
||||
private void computeInaccurateCardinality() throws UserException {
|
||||
StatsRecursiveDerive.getStatsRecursiveDerive().statsRecursiveDerive(this);
|
||||
cardinality = statsDeriveResult.getRowCount();
|
||||
}
|
||||
|
||||
private Collection<Long> partitionPrune(PartitionInfo partitionInfo, PartitionNames partitionNames) throws AnalysisException {
|
||||
@ -563,7 +564,7 @@ public class OlapScanNode extends ScanNode {
|
||||
|
||||
result.add(scanRangeLocations);
|
||||
}
|
||||
// FIXME(dhc): we use cardinality here to simulate ndv
|
||||
|
||||
if (tablets.size() == 0) {
|
||||
desc.setCardinality(0);
|
||||
} else {
|
||||
|
||||
@ -36,6 +36,7 @@ import org.apache.doris.common.NotImplementedException;
|
||||
import org.apache.doris.common.TreeNode;
|
||||
import org.apache.doris.common.UserException;
|
||||
import org.apache.doris.common.util.VectorizedUtil;
|
||||
import org.apache.doris.statistics.StatsDeriveResult;
|
||||
import org.apache.doris.thrift.TExplainLevel;
|
||||
import org.apache.doris.thrift.TFunctionBinaryType;
|
||||
import org.apache.doris.thrift.TPlan;
|
||||
@ -135,6 +136,9 @@ abstract public class PlanNode extends TreeNode<PlanNode> {
|
||||
|
||||
protected List<SlotId> outputSlotIds;
|
||||
|
||||
protected NodeType nodeType = NodeType.DEFAULT;
|
||||
protected StatsDeriveResult statsDeriveResult;
|
||||
|
||||
protected PlanNode(PlanNodeId id, ArrayList<TupleId> tupleIds, String planNodeName) {
|
||||
this.id = id;
|
||||
this.limit = -1;
|
||||
@ -173,12 +177,41 @@ abstract public class PlanNode extends TreeNode<PlanNode> {
|
||||
this.planNodeName = VectorizedUtil.isVectorized() ?
|
||||
"V" + planNodeName : planNodeName;
|
||||
this.numInstances = 1;
|
||||
this.nodeType = nodeType;
|
||||
}
|
||||
|
||||
public enum NodeType {
|
||||
DEFAULT,
|
||||
AGG_NODE,
|
||||
BROKER_SCAN_NODE,
|
||||
HASH_JOIN_NODE,
|
||||
HIVE_SCAN_NODE,
|
||||
MERGE_NODE,
|
||||
ES_SCAN_NODE,
|
||||
ICEBREG_SCAN_NODE,
|
||||
LOAD_SCAN_NODE,
|
||||
MYSQL_SCAN_NODE,
|
||||
ODBC_SCAN_NODE,
|
||||
OLAP_SCAN_NODE,
|
||||
SCHEMA_SCAN_NODE,
|
||||
}
|
||||
|
||||
public String getPlanNodeName() {
|
||||
return planNodeName;
|
||||
}
|
||||
|
||||
public StatsDeriveResult getStatsDeriveResult() {
|
||||
return statsDeriveResult;
|
||||
}
|
||||
|
||||
public NodeType getNodeType() {
|
||||
return nodeType;
|
||||
}
|
||||
|
||||
public void setStatsDeriveResult(StatsDeriveResult statsDeriveResult) {
|
||||
this.statsDeriveResult = statsDeriveResult;
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets tblRefIds_, tupleIds_, and nullableTupleIds_.
|
||||
* The default implementation is a no-op.
|
||||
|
||||
@ -65,8 +65,9 @@ abstract public class ScanNode extends PlanNode {
|
||||
protected String sortColumn = null;
|
||||
protected Analyzer analyzer;
|
||||
|
||||
public ScanNode(PlanNodeId id, TupleDescriptor desc, String planNodeName) {
|
||||
public ScanNode(PlanNodeId id, TupleDescriptor desc, String planNodeName, NodeType nodeType) {
|
||||
super(id, desc.getId().asList(), planNodeName);
|
||||
super.nodeType = nodeType;
|
||||
this.desc = desc;
|
||||
}
|
||||
|
||||
|
||||
@ -57,7 +57,7 @@ public class SchemaScanNode extends ScanNode {
|
||||
* Constructs node to scan given data files of table 'tbl'.
|
||||
*/
|
||||
public SchemaScanNode(PlanNodeId id, TupleDescriptor desc) {
|
||||
super(id, desc, "SCAN SCHEMA");
|
||||
super(id, desc, "SCAN SCHEMA", NodeType.SCHEMA_SCAN_NODE);
|
||||
this.tableName = desc.getTable().getName();
|
||||
}
|
||||
|
||||
|
||||
@ -1725,7 +1725,6 @@ public class SingleNodePlanner {
|
||||
scanNodeList.add(scanNode);
|
||||
|
||||
scanNode.init(analyzer);
|
||||
|
||||
return scanNode;
|
||||
}
|
||||
|
||||
|
||||
@ -699,14 +699,6 @@ public class StmtExecutor implements ProfileWriter {
|
||||
}
|
||||
if (explainOptions != null) parsedStmt.setIsExplain(explainOptions);
|
||||
}
|
||||
|
||||
if (parsedStmt instanceof InsertStmt && parsedStmt.isExplain()) {
|
||||
if (ConnectContext.get() != null &&
|
||||
ConnectContext.get().getExecutor() != null &&
|
||||
ConnectContext.get().getExecutor().getParsedStmt() != null) {
|
||||
ConnectContext.get().getExecutor().getParsedStmt().setIsExplain(new ExplainOptions(true, false));
|
||||
}
|
||||
}
|
||||
}
|
||||
plannerProfile.setQueryAnalysisFinishTime();
|
||||
|
||||
|
||||
@ -0,0 +1,161 @@
|
||||
// Licensed to the Apache Software Foundation (ASF) under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing,
|
||||
// software distributed under the License is distributed on an
|
||||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
package org.apache.doris.statistics;
|
||||
|
||||
import com.google.common.base.Preconditions;
|
||||
import com.google.common.collect.Lists;
|
||||
import org.apache.doris.analysis.Expr;
|
||||
import org.apache.doris.analysis.SlotId;
|
||||
import org.apache.doris.common.UserException;
|
||||
import org.apache.doris.planner.PlanNode;
|
||||
import org.apache.logging.log4j.LogManager;
|
||||
import org.apache.logging.log4j.Logger;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
|
||||
public class BaseStatsDerive {
|
||||
private static final Logger LOG = LogManager.getLogger(BaseStatsDerive.class);
|
||||
// estimate of the output rowCount of this node;
|
||||
// invalid: -1
|
||||
protected long rowCount = -1;
|
||||
protected long limit = -1;
|
||||
|
||||
protected List<Expr> conjuncts = Lists.newArrayList();
|
||||
protected List<StatsDeriveResult> childrenStatsResult = Lists.newArrayList();
|
||||
|
||||
protected void init(PlanNode node) throws UserException {
|
||||
limit = node.getLimit();
|
||||
conjuncts.addAll(node.getConjuncts());
|
||||
|
||||
for (PlanNode childNode : node.getChildren()) {
|
||||
StatsDeriveResult result = childNode.getStatsDeriveResult();
|
||||
if (result == null) {
|
||||
throw new UserException("childNode statsDeriveResult is null, childNodeType is " + childNode.getNodeType()
|
||||
+ "parentNodeType is " + node.getNodeType());
|
||||
}
|
||||
childrenStatsResult.add(result);
|
||||
}
|
||||
}
|
||||
|
||||
public StatsDeriveResult deriveStats() {
|
||||
return new StatsDeriveResult(deriveRowCount(), deriveColumnToDataSize(), deriveColumnToNdv());
|
||||
}
|
||||
|
||||
public boolean hasLimit() {
|
||||
return limit > -1;
|
||||
}
|
||||
|
||||
protected void applyConjunctsSelectivity() {
|
||||
if (rowCount == -1) {
|
||||
return;
|
||||
}
|
||||
applySelectivity();
|
||||
}
|
||||
|
||||
private void applySelectivity() {
|
||||
double selectivity = computeSelectivity();
|
||||
Preconditions.checkState(rowCount >= 0);
|
||||
long preConjunctrowCount = rowCount;
|
||||
rowCount = Math.round(rowCount * selectivity);
|
||||
// don't round rowCount down to zero for safety.
|
||||
if (rowCount == 0 && preConjunctrowCount > 0) {
|
||||
rowCount = 1;
|
||||
}
|
||||
}
|
||||
|
||||
protected double computeSelectivity() {
|
||||
for (Expr expr : conjuncts) {
|
||||
expr.setSelectivity();
|
||||
}
|
||||
return computeCombinedSelectivity(conjuncts);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the estimated combined selectivity of all conjuncts. Uses heuristics to
|
||||
* address the following estimation challenges:
|
||||
* 1. The individual selectivities of conjuncts may be unknown.
|
||||
* 2. Two selectivities, whether known or unknown, could be correlated. Assuming
|
||||
* independence can lead to significant underestimation.
|
||||
* <p>
|
||||
* The first issue is addressed by using a single default selectivity that is
|
||||
* representative of all conjuncts with unknown selectivities.
|
||||
* The second issue is addressed by an exponential backoff when multiplying each
|
||||
* additional selectivity into the final result.
|
||||
*/
|
||||
protected double computeCombinedSelectivity(List<Expr> conjuncts) {
|
||||
// Collect all estimated selectivities.
|
||||
List<Double> selectivities = new ArrayList<>();
|
||||
for (Expr e : conjuncts) {
|
||||
if (e.hasSelectivity()) selectivities.add(e.getSelectivity());
|
||||
}
|
||||
if (selectivities.size() != conjuncts.size()) {
|
||||
// Some conjuncts have no estimated selectivity. Use a single default
|
||||
// representative selectivity for all those conjuncts.
|
||||
selectivities.add(Expr.DEFAULT_SELECTIVITY);
|
||||
}
|
||||
// Sort the selectivities to get a consistent estimate, regardless of the original
|
||||
// conjunct order. Sort in ascending order such that the most selective conjunct
|
||||
// is fully applied.
|
||||
Collections.sort(selectivities);
|
||||
double result = 1.0;
|
||||
// selectivity = 1 * (s1)^(1/1) * (s2)^(1/2) * ... * (sn-1)^(1/(n-1)) * (sn)^(1/n)
|
||||
for (int i = 0; i < selectivities.size(); ++i) {
|
||||
// Exponential backoff for each selectivity multiplied into the final result.
|
||||
result *= Math.pow(selectivities.get(i), 1.0 / (double) (i + 1));
|
||||
}
|
||||
// Bound result in [0, 1]
|
||||
return Math.max(0.0, Math.min(1.0, result));
|
||||
}
|
||||
|
||||
protected void capRowCountAtLimit() {
|
||||
if (hasLimit()) {
|
||||
rowCount = rowCount == -1 ? limit : Math.min(rowCount, limit);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// Currently it simply adds the number of rows of children
|
||||
protected long deriveRowCount() {
|
||||
for (StatsDeriveResult statsDeriveResult : childrenStatsResult) {
|
||||
rowCount = Math.max(rowCount, statsDeriveResult.getRowCount());
|
||||
}
|
||||
applyConjunctsSelectivity();
|
||||
capRowCountAtLimit();
|
||||
return rowCount;
|
||||
}
|
||||
|
||||
|
||||
protected HashMap<SlotId, Float> deriveColumnToDataSize() {
|
||||
HashMap<SlotId, Float> columnToDataSize = new HashMap<>();
|
||||
for (StatsDeriveResult child : childrenStatsResult) {
|
||||
columnToDataSize.putAll(child.getColumnToDataSize());
|
||||
}
|
||||
return columnToDataSize;
|
||||
}
|
||||
|
||||
protected HashMap<SlotId, Long> deriveColumnToNdv() {
|
||||
HashMap<SlotId, Long> columnToNdv = new HashMap<>();
|
||||
for (StatsDeriveResult child : childrenStatsResult) {
|
||||
columnToNdv.putAll(child.getColumnToNdv());
|
||||
}
|
||||
return columnToNdv;
|
||||
}
|
||||
}
|
||||
@ -0,0 +1,36 @@
|
||||
// Licensed to the Apache Software Foundation (ASF) under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing,
|
||||
// software distributed under the License is distributed on an
|
||||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
package org.apache.doris.statistics;
|
||||
|
||||
import org.apache.doris.planner.PlanNode;
|
||||
|
||||
public class DeriveFactory {
|
||||
|
||||
public BaseStatsDerive getStatsDerive(PlanNode.NodeType nodeType) {
|
||||
switch (nodeType) {
|
||||
case AGG_NODE:
|
||||
case HASH_JOIN_NODE:
|
||||
case MERGE_NODE:
|
||||
break;
|
||||
case OLAP_SCAN_NODE:
|
||||
return new OlapScanStatsDerive();
|
||||
case DEFAULT:
|
||||
}
|
||||
return new BaseStatsDerive();
|
||||
}
|
||||
}
|
||||
@ -0,0 +1,112 @@
|
||||
// Licensed to the Apache Software Foundation (ASF) under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing,
|
||||
// software distributed under the License is distributed on an
|
||||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
package org.apache.doris.statistics;
|
||||
|
||||
import com.google.common.base.Preconditions;
|
||||
import org.apache.doris.analysis.SlotDescriptor;
|
||||
import org.apache.doris.analysis.SlotId;
|
||||
import org.apache.doris.catalog.Catalog;
|
||||
import org.apache.doris.common.Pair;
|
||||
import org.apache.doris.common.UserException;
|
||||
import org.apache.doris.planner.OlapScanNode;
|
||||
import org.apache.doris.planner.PlanNode;
|
||||
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
|
||||
public class OlapScanStatsDerive extends BaseStatsDerive {
|
||||
// Currently, due to the structure of doris,
|
||||
// the selected materialized view is not determined when calculating the statistical information of scan,
|
||||
// so baseIndex is used for calculation when generating Planner.
|
||||
|
||||
// The rowCount here is the number of rows.
|
||||
private long inputRowCount = -1;
|
||||
private Map<SlotId, Float> slotIdToDataSize;
|
||||
private Map<SlotId, Long> slotIdToNdv;
|
||||
private Map<SlotId, Pair<Long, String>> slotIdToTableIdAndColumnName;
|
||||
|
||||
@Override
|
||||
public void init(PlanNode node) throws UserException {
|
||||
Preconditions.checkState(node instanceof OlapScanNode);
|
||||
super.init(node);
|
||||
buildStructure((OlapScanNode)node);
|
||||
}
|
||||
|
||||
@Override
|
||||
public StatsDeriveResult deriveStats() {
|
||||
/**
|
||||
* Compute InAccurate cardinality before mv selector and tablet pruning.
|
||||
* - Accurate statistical information relies on the selector of materialized views and bucket reduction.
|
||||
* - However, Those both processes occur after the reorder algorithm is completed.
|
||||
* - When Join reorder is turned on, the cardinality must be calculated before the reorder algorithm.
|
||||
* - So only an inaccurate cardinality can be calculated here.
|
||||
*/
|
||||
rowCount = inputRowCount;
|
||||
for (Map.Entry<SlotId, Pair<Long, String>> pairEntry : slotIdToTableIdAndColumnName.entrySet()) {
|
||||
Pair<Long, Float> ndvAndDataSize = getNdvAndDataSizeFromStatistics(pairEntry.getValue());
|
||||
long ndv = ndvAndDataSize.first;
|
||||
float dataSize = ndvAndDataSize.second;
|
||||
slotIdToNdv.put(pairEntry.getKey(), ndv);
|
||||
slotIdToDataSize.put(pairEntry.getKey(), dataSize);
|
||||
}
|
||||
return new StatsDeriveResult(deriveRowCount(), slotIdToDataSize, slotIdToNdv);
|
||||
}
|
||||
|
||||
public void buildStructure(OlapScanNode node) {
|
||||
slotIdToDataSize = new HashMap<>();
|
||||
slotIdToNdv = new HashMap<>();
|
||||
if (node.getTupleDesc() != null
|
||||
&& node.getTupleDesc().getTable() != null) {
|
||||
long tableId = node.getTupleDesc().getTable().getId();
|
||||
inputRowCount = Catalog.getCurrentCatalog().getStatisticsManager()
|
||||
.getStatistics().getTableStats(tableId).getRowCount();
|
||||
}
|
||||
for (SlotDescriptor slot : node.getTupleDesc().getSlots()) {
|
||||
if (!slot.isMaterialized()) {
|
||||
continue;
|
||||
}
|
||||
|
||||
long tableId = slot.getParent().getTable().getId();
|
||||
String columnName = slot.getColumn().getName();
|
||||
slotIdToTableIdAndColumnName.put(slot.getId(), new Pair<>(tableId, columnName));
|
||||
}
|
||||
}
|
||||
|
||||
//TODO:Implement the getStatistics interface
|
||||
//now there is nothing in statistics, need to wait for collection finished
|
||||
public Pair<Long, Float> getNdvAndDataSizeFromStatistics(Pair<Long, String> pair) {
|
||||
long ndv = -1;
|
||||
float dataSize = -1;
|
||||
/*
|
||||
if (Catalog.getCurrentCatalog()
|
||||
.getStatisticsManager()
|
||||
.getStatistics()
|
||||
.getColumnStats(pair.first) != null) {
|
||||
ndv = Catalog.getCurrentCatalog()
|
||||
.getStatisticsManager()
|
||||
.getStatistics()
|
||||
.getColumnStats(pair.first).get(pair.second).getNdv();
|
||||
dataSize = Catalog.getCurrentCatalog()
|
||||
.getStatisticsManager()
|
||||
.getStatistics()
|
||||
.getColumnStats(pair.first).get(pair.second).getDataSize();
|
||||
}
|
||||
*/
|
||||
return new Pair<>(ndv, dataSize);
|
||||
}
|
||||
}
|
||||
@ -70,4 +70,17 @@ public class Statistics {
|
||||
}
|
||||
return tableStats.getNameToColumnStats();
|
||||
}
|
||||
|
||||
// TODO: mock statistics need to be removed in the future
|
||||
public void mockTableStatsWithRowCount(long tableId, long rowCount) {
|
||||
TableStats tableStats = idToTableStats.get(tableId);
|
||||
if (tableStats == null) {
|
||||
tableStats = new TableStats();
|
||||
idToTableStats.put(tableId, tableStats);
|
||||
}
|
||||
|
||||
if (tableStats.getRowCount() != rowCount) {
|
||||
tableStats.setRowCount(rowCount);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -136,4 +136,8 @@ public class StatisticsManager {
|
||||
Table table = db.getTableOrAnalysisException(tableName);
|
||||
return table;
|
||||
}
|
||||
|
||||
public Statistics getStatistics() {
|
||||
return statistics;
|
||||
}
|
||||
}
|
||||
|
||||
@ -0,0 +1,56 @@
|
||||
// Licensed to the Apache Software Foundation (ASF) under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing,
|
||||
// software distributed under the License is distributed on an
|
||||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
package org.apache.doris.statistics;
|
||||
|
||||
import com.google.common.collect.Maps;
|
||||
import org.apache.doris.analysis.SlotId;
|
||||
|
||||
import java.util.Map;
|
||||
|
||||
// This structure is maintained in each operator to store the statistical information results obtained by the operator.
|
||||
public class StatsDeriveResult {
|
||||
private long rowCount = -1;
|
||||
// The data size of the corresponding column in the operator
|
||||
// The actual key is slotId
|
||||
private final Map<SlotId, Float> columnToDataSize = Maps.newHashMap();
|
||||
// The ndv of the corresponding column in the operator
|
||||
// The actual key is slotId
|
||||
private final Map<SlotId, Long> columnToNdv = Maps.newHashMap();
|
||||
|
||||
public StatsDeriveResult(long rowCount, Map<SlotId, Float> columnToDataSize, Map<SlotId, Long> columnToNdv) {
|
||||
this.rowCount = rowCount;
|
||||
this.columnToDataSize.putAll(columnToDataSize);
|
||||
this.columnToNdv.putAll(columnToNdv);
|
||||
}
|
||||
|
||||
public void setRowCount(long rowCount) {
|
||||
this.rowCount = rowCount;
|
||||
}
|
||||
|
||||
public long getRowCount() {
|
||||
return rowCount;
|
||||
}
|
||||
|
||||
public Map<SlotId, Long> getColumnToNdv() {
|
||||
return columnToNdv;
|
||||
}
|
||||
|
||||
public Map<SlotId, Float> getColumnToDataSize() {
|
||||
return columnToDataSize;
|
||||
}
|
||||
}
|
||||
@ -0,0 +1,56 @@
|
||||
// Licensed to the Apache Software Foundation (ASF) under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing,
|
||||
// software distributed under the License is distributed on an
|
||||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
package org.apache.doris.statistics;
|
||||
|
||||
import org.apache.doris.common.UserException;
|
||||
import org.apache.doris.planner.PlanNode;
|
||||
|
||||
|
||||
public class StatsRecursiveDerive {
|
||||
private StatsRecursiveDerive() {}
|
||||
|
||||
public static StatsRecursiveDerive getStatsRecursiveDerive() {
|
||||
return Inner.INSTANCE;
|
||||
}
|
||||
|
||||
private static class Inner {
|
||||
private static final StatsRecursiveDerive INSTANCE = new StatsRecursiveDerive();
|
||||
}
|
||||
|
||||
/**
|
||||
* Recursively complete the derivation of statistics for this node and all its children
|
||||
* @param node
|
||||
* This parameter is an input and output parameter,
|
||||
* which will store the derivation result of statistical information in the corresponding node
|
||||
*/
|
||||
public void statsRecursiveDerive(PlanNode node) throws UserException {
|
||||
if (node.getStatsDeriveResult() != null) {
|
||||
return;
|
||||
}
|
||||
for (PlanNode childNode : node.getChildren()) {
|
||||
if (childNode.getStatsDeriveResult() == null) {
|
||||
statsRecursiveDerive(childNode);
|
||||
}
|
||||
}
|
||||
DeriveFactory deriveFactory = new DeriveFactory();
|
||||
BaseStatsDerive deriveStats = deriveFactory.getStatsDerive(node.getNodeType());
|
||||
deriveStats.init(node);
|
||||
StatsDeriveResult result = deriveStats.deriveStats();
|
||||
node.setStatsDeriveResult(result);
|
||||
}
|
||||
}
|
||||
@ -94,4 +94,16 @@ public class TableStats {
|
||||
public Map<String, ColumnStats> getNameToColumnStats() {
|
||||
return nameToColumnStats;
|
||||
}
|
||||
|
||||
public long getRowCount() {
|
||||
return rowCount;
|
||||
}
|
||||
|
||||
public long getDataSize() {
|
||||
return dataSize;
|
||||
}
|
||||
|
||||
public void setRowCount(long rowCount) {
|
||||
this.rowCount = rowCount;
|
||||
}
|
||||
}
|
||||
|
||||
@ -72,20 +72,27 @@ public class ExplainTest {
|
||||
Assert.assertEquals(dropDbStmt.toSql(), dropSchemaStmt.toSql());
|
||||
}
|
||||
|
||||
public void testExplainSelect() throws Exception {
|
||||
String sql = "explain select * from test_explain.explain_t1 where dt = '1001';";
|
||||
public void testExplainInsertInto() throws Exception {
|
||||
String sql = "explain insert into test_explain.explain_t1 select * from test_explain.explain_t2";
|
||||
String explainString = UtFrameUtils.getSQLPlanOrErrorMsg(ctx, sql, false);
|
||||
System.out.println(explainString);
|
||||
Assert.assertFalse(explainString.contains("CAST"));
|
||||
}
|
||||
|
||||
public void testExplainInsertInto() throws Exception {
|
||||
public void testExplainVerboseInsertInto() throws Exception {
|
||||
String sql = "explain verbose insert into test_explain.explain_t1 select * from test_explain.explain_t2";
|
||||
String explainString = UtFrameUtils.getSQLPlanOrErrorMsg(ctx, sql, true);
|
||||
System.out.println(explainString);
|
||||
Assert.assertTrue(explainString.contains("CAST"));
|
||||
}
|
||||
|
||||
public void testExplainSelect() throws Exception {
|
||||
String sql = "explain select * from test_explain.explain_t1 where dt = '1001';";
|
||||
String explainString = UtFrameUtils.getSQLPlanOrErrorMsg(ctx, sql, false);
|
||||
System.out.println(explainString);
|
||||
Assert.assertFalse(explainString.contains("CAST"));
|
||||
}
|
||||
|
||||
public void testExplainVerboseSelect() throws Exception {
|
||||
String queryStr = "explain verbose select * from test_explain.explain_t1 where dt = '1001';";
|
||||
String explainString = UtFrameUtils.getSQLPlanOrErrorMsg(ctx, queryStr, true);
|
||||
|
||||
@ -1991,8 +1991,9 @@ public class QueryPlanTest {
|
||||
public void testExplainInsertInto() throws Exception {
|
||||
ExplainTest explainTest = new ExplainTest();
|
||||
explainTest.before(connectContext);
|
||||
explainTest.testExplainSelect();
|
||||
explainTest.testExplainInsertInto();
|
||||
explainTest.testExplainVerboseInsertInto();
|
||||
explainTest.testExplainSelect();
|
||||
explainTest.testExplainVerboseSelect();
|
||||
explainTest.testExplainConcatSelect();
|
||||
explainTest.testExplainVerboseConcatSelect();
|
||||
@ -2088,7 +2089,7 @@ public class QueryPlanTest {
|
||||
"\"storage_medium\" = \"HDD\",\n" +
|
||||
"\"storage_format\" = \"V2\"\n" +
|
||||
");\n");
|
||||
String queryStr = "EXPLAIN INSERT INTO result_exprs\n" +
|
||||
String queryStr = "EXPLAIN VERBOSE INSERT INTO result_exprs\n" +
|
||||
"SELECT a.aid,\n" +
|
||||
" b.bid\n" +
|
||||
"FROM\n" +
|
||||
@ -2098,7 +2099,7 @@ public class QueryPlanTest {
|
||||
String explainString = UtFrameUtils.getSQLPlanOrErrorMsg(connectContext, queryStr);
|
||||
Assert.assertFalse(explainString.contains("OUTPUT EXPRS:3 | 4"));
|
||||
System.out.println(explainString);
|
||||
Assert.assertTrue(explainString.contains("OUTPUT EXPRS:`a`.`aid` | 4"));
|
||||
Assert.assertTrue(explainString.contains("OUTPUT EXPRS:CAST(`a`.`aid` AS INT) | 4"));
|
||||
}
|
||||
|
||||
@Test
|
||||
|
||||
Reference in New Issue
Block a user