[feature](statistics) Statistics derivation.Step 1:ScanNode implement… (#8947)

* [feature](statistics) Statistics derivation.Step 1:ScanNode implementation Co-authored-by: jianghaochen <jianghaochen@meituan.com>
2022-04-29 10:41:12 +08:00
parent 93a41b2625
commit 9ef09b8354
23 changed files with 545 additions and 50 deletions
--- a/fe/fe-core/src/main/java/org/apache/doris/planner/BrokerScanNode.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/planner/BrokerScanNode.java
@ -138,7 +138,14 @@ public class BrokerScanNode extends LoadScanNode {

    public BrokerScanNode(PlanNodeId id, TupleDescriptor destTupleDesc, String planNodeName,
                          List<List<TBrokerFileStatus>> fileStatusesList, int filesAdded) {
-        super(id, destTupleDesc, planNodeName);
+        super(id, destTupleDesc, planNodeName, NodeType.BROKER_SCAN_NODE);
+        this.fileStatusesList = fileStatusesList;
+        this.filesAdded = filesAdded;
+    }
+
+    public BrokerScanNode(PlanNodeId id, TupleDescriptor destTupleDesc, String planNodeName,
+                          List<List<TBrokerFileStatus>> fileStatusesList, int filesAdded, NodeType nodeType) {
+        super(id, destTupleDesc, planNodeName, nodeType);
        this.fileStatusesList = fileStatusesList;
        this.filesAdded = filesAdded;
    }
--- a/fe/fe-core/src/main/java/org/apache/doris/planner/EsScanNode.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/planner/EsScanNode.java
@ -72,7 +72,7 @@ public class EsScanNode extends ScanNode {
    boolean isFinalized = false;

    public EsScanNode(PlanNodeId id, TupleDescriptor desc, String planNodeName) {
-        super(id, desc, planNodeName);
+        super(id, desc, planNodeName, NodeType.ES_SCAN_NODE);
        table = (EsTable) (desc.getTable());
        esTablePartitions = table.getEsTablePartitions();
    }
--- a/fe/fe-core/src/main/java/org/apache/doris/planner/HiveScanNode.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/planner/HiveScanNode.java
@ -100,7 +100,7 @@ public class HiveScanNode extends BrokerScanNode {

    public HiveScanNode(PlanNodeId id, TupleDescriptor destTupleDesc, String planNodeName,
                        List<List<TBrokerFileStatus>> fileStatusesList, int filesAdded) {
-        super(id, destTupleDesc, planNodeName, fileStatusesList, filesAdded);
+        super(id, destTupleDesc, planNodeName, fileStatusesList, filesAdded, NodeType.HIVE_SCAN_NODE);
        this.hiveTable = (HiveTable) destTupleDesc.getTable();
    }

--- a/fe/fe-core/src/main/java/org/apache/doris/planner/IcebergScanNode.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/planner/IcebergScanNode.java
@ -47,7 +47,7 @@ public class IcebergScanNode extends BrokerScanNode {

    public IcebergScanNode(PlanNodeId id, TupleDescriptor desc, String planNodeName,
                           List<List<TBrokerFileStatus>> fileStatusesList, int filesAdded) {
-        super(id, desc, planNodeName, fileStatusesList, filesAdded);
+        super(id, desc, planNodeName, fileStatusesList, filesAdded, NodeType.ICEBREG_SCAN_NODE);
        icebergTable = (IcebergTable) desc.getTable();
    }

--- a/fe/fe-core/src/main/java/org/apache/doris/planner/LoadScanNode.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/planner/LoadScanNode.java
@ -54,7 +54,11 @@ public abstract class LoadScanNode extends ScanNode {
    protected LoadTask.MergeType mergeType = LoadTask.MergeType.APPEND;

    public LoadScanNode(PlanNodeId id, TupleDescriptor desc, String planNodeName) {
-        super(id, desc, planNodeName);
+        super(id, desc, planNodeName, NodeType.LOAD_SCAN_NODE);
+    }
+
+    public LoadScanNode(PlanNodeId id, TupleDescriptor desc, String planNodeName, NodeType nodeType) {
+        super(id, desc, planNodeName, nodeType);
    }

    protected void initAndSetWhereExpr(Expr whereExpr, TupleDescriptor tupleDesc, Analyzer analyzer) throws UserException {
--- a/fe/fe-core/src/main/java/org/apache/doris/planner/MysqlScanNode.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/planner/MysqlScanNode.java
@ -56,7 +56,7 @@ public class MysqlScanNode extends ScanNode {
     * Constructs node to scan given data files of table 'tbl'.
     */
    public MysqlScanNode(PlanNodeId id, TupleDescriptor desc, MysqlTable tbl) {
-        super(id, desc, "SCAN MYSQL");
+        super(id, desc, "SCAN MYSQL", NodeType.MYSQL_SCAN_NODE);
        tblName = "`" + tbl.getMysqlTableName() + "`";
    }

--- a/fe/fe-core/src/main/java/org/apache/doris/planner/OdbcScanNode.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/planner/OdbcScanNode.java
@ -73,7 +73,7 @@ public class OdbcScanNode extends ScanNode {
     * Constructs node to scan given data files of table 'tbl'.
     */
    public OdbcScanNode(PlanNodeId id, TupleDescriptor desc, OdbcTable tbl) {
-        super(id, desc, "SCAN ODBC");
+        super(id, desc, "SCAN ODBC", NodeType.ODBC_SCAN_NODE);
        connectString = tbl.getConnectString();
        odbcType = tbl.getOdbcTableType();
        tblName = OdbcTable.databaseProperName(odbcType, tbl.getOdbcTableName());
--- a/fe/fe-core/src/main/java/org/apache/doris/planner/OlapScanNode.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/planner/OlapScanNode.java
@ -53,6 +53,7 @@ import org.apache.doris.common.util.Util;
 import org.apache.doris.qe.ConnectContext;
 import org.apache.doris.qe.SessionVariable;
 import org.apache.doris.resource.Tag;
+import org.apache.doris.statistics.StatsRecursiveDerive;
 import org.apache.doris.system.Backend;
 import org.apache.doris.thrift.TExplainLevel;
 import org.apache.doris.thrift.TNetworkAddress;
@ -146,7 +147,7 @@ public class OlapScanNode extends ScanNode {

    // Constructs node to scan given data files of table 'tbl'.
    public OlapScanNode(PlanNodeId id, TupleDescriptor desc, String planNodeName) {
-        super(id, desc, planNodeName);
+        super(id, desc, planNodeName, NodeType.OLAP_SCAN_NODE);
        olapTable = (OlapTable) desc.getTable();
    }

@ -346,10 +347,25 @@ public class OlapScanNode extends ScanNode {
         * - So only an inaccurate cardinality can be calculated here.
         */
        if (analyzer.safeIsEnableJoinReorderBasedCost()) {
+            mockRowCountInStatistic();
            computeInaccurateCardinality();
        }
    }

+    /**
+     * Remove the method after statistics collection is working properly
+     */
+    public void mockRowCountInStatistic() {
+        long tableId = desc.getTable().getId();
+        cardinality = 0;
+        for (long selectedPartitionId : selectedPartitionIds) {
+            final Partition partition = olapTable.getPartition(selectedPartitionId);
+            final MaterializedIndex baseIndex = partition.getBaseIndex();
+            cardinality += baseIndex.getRowCount();
+        }
+        Catalog.getCurrentCatalog().getStatisticsManager().getStatistics().mockTableStatsWithRowCount(tableId, cardinality);
+    }
+
    @Override
    public void finalize(Analyzer analyzer) throws UserException {
        LOG.debug("OlapScanNode get scan range locations. Tuple: {}", desc);
@ -386,6 +402,12 @@ public class OlapScanNode extends ScanNode {
        }
        // when node scan has no data, cardinality should be 0 instead of a invalid value after computeStats()
        cardinality = cardinality == -1 ? 0 : cardinality;
+
+        // update statsDeriveResult for real statistics
+        // After statistics collection is complete, remove the logic
+        if (analyzer.safeIsEnableJoinReorderBasedCost()) {
+            statsDeriveResult.setRowCount(cardinality);
+        }
    }

    @Override
@ -397,30 +419,9 @@ public class OlapScanNode extends ScanNode {
        numNodes = numNodes <= 0 ? 1 : numNodes;
    }

-    /**
-     * Calculate inaccurate cardinality.
-     * cardinality: the value of cardinality is the sum of rowcount which belongs to selectedPartitionIds
-     * The cardinality here is actually inaccurate, it will be greater than the actual value.
-     * There are two reasons
-     * 1. During the actual execution, not all tablets belonging to the selected partition will be scanned.
-     * Some tablets may have been pruned before execution.
-     * 2. The base index may eventually be replaced by mv index.
-     * <p>
-     * There are three steps to calculate cardinality
-     * 1. Calculate how many rows were scanned
-     * 2. Apply conjunct
-     * 3. Apply limit
-     */
-    private void computeInaccurateCardinality() {
-        // step1: Calculate how many rows were scanned
-        cardinality = 0;
-        for (long selectedPartitionId : selectedPartitionIds) {
-            final Partition partition = olapTable.getPartition(selectedPartitionId);
-            final MaterializedIndex baseIndex = partition.getBaseIndex();
-            cardinality += baseIndex.getRowCount();
-        }
-        applyConjunctsSelectivity();
-        capCardinalityAtLimit();
+    private void computeInaccurateCardinality() throws UserException {
+        StatsRecursiveDerive.getStatsRecursiveDerive().statsRecursiveDerive(this);
+        cardinality = statsDeriveResult.getRowCount();
    }

    private Collection<Long> partitionPrune(PartitionInfo partitionInfo, PartitionNames partitionNames) throws AnalysisException {
@ -563,7 +564,7 @@ public class OlapScanNode extends ScanNode {

            result.add(scanRangeLocations);
        }
-        // FIXME(dhc): we use cardinality here to simulate ndv
+
        if (tablets.size() == 0) {
            desc.setCardinality(0);
        } else {
--- a/fe/fe-core/src/main/java/org/apache/doris/planner/PlanNode.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/planner/PlanNode.java
@ -36,6 +36,7 @@ import org.apache.doris.common.NotImplementedException;
 import org.apache.doris.common.TreeNode;
 import org.apache.doris.common.UserException;
 import org.apache.doris.common.util.VectorizedUtil;
+import org.apache.doris.statistics.StatsDeriveResult;
 import org.apache.doris.thrift.TExplainLevel;
 import org.apache.doris.thrift.TFunctionBinaryType;
 import org.apache.doris.thrift.TPlan;
@ -135,6 +136,9 @@ abstract public class PlanNode extends TreeNode<PlanNode> {

    protected List<SlotId> outputSlotIds;

+    protected NodeType nodeType = NodeType.DEFAULT;
+    protected StatsDeriveResult statsDeriveResult;
+
    protected PlanNode(PlanNodeId id, ArrayList<TupleId> tupleIds, String planNodeName) {
        this.id = id;
        this.limit = -1;
@ -173,12 +177,41 @@ abstract public class PlanNode extends TreeNode<PlanNode> {
        this.planNodeName = VectorizedUtil.isVectorized() ?
                "V" + planNodeName : planNodeName;
        this.numInstances = 1;
+        this.nodeType = nodeType;
+    }
+
+    public enum NodeType {
+        DEFAULT,
+        AGG_NODE,
+        BROKER_SCAN_NODE,
+        HASH_JOIN_NODE,
+        HIVE_SCAN_NODE,
+        MERGE_NODE,
+        ES_SCAN_NODE,
+        ICEBREG_SCAN_NODE,
+        LOAD_SCAN_NODE,
+        MYSQL_SCAN_NODE,
+        ODBC_SCAN_NODE,
+        OLAP_SCAN_NODE,
+        SCHEMA_SCAN_NODE,
    }

    public String getPlanNodeName() {
        return planNodeName;
    }

+    public StatsDeriveResult getStatsDeriveResult() {
+        return statsDeriveResult;
+    }
+
+    public NodeType getNodeType() {
+        return nodeType;
+    }
+
+    public void setStatsDeriveResult(StatsDeriveResult statsDeriveResult) {
+        this.statsDeriveResult = statsDeriveResult;
+    }
+
    /**
     * Sets tblRefIds_, tupleIds_, and nullableTupleIds_.
     * The default implementation is a no-op.
--- a/fe/fe-core/src/main/java/org/apache/doris/planner/ScanNode.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/planner/ScanNode.java
@ -65,8 +65,9 @@ abstract public class ScanNode extends PlanNode {
    protected String sortColumn = null;
    protected Analyzer analyzer;

-    public ScanNode(PlanNodeId id, TupleDescriptor desc, String planNodeName) {
+    public ScanNode(PlanNodeId id, TupleDescriptor desc, String planNodeName, NodeType nodeType) {
        super(id, desc.getId().asList(), planNodeName);
+        super.nodeType = nodeType;
        this.desc = desc;
    }

--- a/fe/fe-core/src/main/java/org/apache/doris/planner/SchemaScanNode.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/planner/SchemaScanNode.java
@ -57,7 +57,7 @@ public class SchemaScanNode extends ScanNode {
     * Constructs node to scan given data files of table 'tbl'.
     */
    public SchemaScanNode(PlanNodeId id, TupleDescriptor desc) {
-        super(id, desc, "SCAN SCHEMA");
+        super(id, desc, "SCAN SCHEMA", NodeType.SCHEMA_SCAN_NODE);
        this.tableName = desc.getTable().getName();
    }

--- a/fe/fe-core/src/main/java/org/apache/doris/planner/SingleNodePlanner.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/planner/SingleNodePlanner.java
@ -1725,7 +1725,6 @@ public class SingleNodePlanner {
        scanNodeList.add(scanNode);

        scanNode.init(analyzer);
-
        return scanNode;
    }

--- a/fe/fe-core/src/main/java/org/apache/doris/qe/StmtExecutor.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/qe/StmtExecutor.java
@ -699,14 +699,6 @@ public class StmtExecutor implements ProfileWriter {
                }
                if (explainOptions != null) parsedStmt.setIsExplain(explainOptions);
            }
-
-            if (parsedStmt instanceof InsertStmt && parsedStmt.isExplain()) {
-                if (ConnectContext.get() != null &&
-                        ConnectContext.get().getExecutor() != null &&
-                        ConnectContext.get().getExecutor().getParsedStmt() != null) {
-                    ConnectContext.get().getExecutor().getParsedStmt().setIsExplain(new ExplainOptions(true, false));
-                }
-            }
        }
        plannerProfile.setQueryAnalysisFinishTime();

--- a/fe/fe-core/src/main/java/org/apache/doris/statistics/BaseStatsDerive.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/BaseStatsDerive.java
@ -0,0 +1,161 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris.statistics;
+
+import com.google.common.base.Preconditions;
+import com.google.common.collect.Lists;
+import org.apache.doris.analysis.Expr;
+import org.apache.doris.analysis.SlotId;
+import org.apache.doris.common.UserException;
+import org.apache.doris.planner.PlanNode;
+import org.apache.logging.log4j.LogManager;
+import org.apache.logging.log4j.Logger;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.List;
+
+public class BaseStatsDerive {
+    private static final Logger LOG = LogManager.getLogger(BaseStatsDerive.class);
+    // estimate of the output rowCount of this node;
+    // invalid: -1
+    protected long rowCount = -1;
+    protected long limit = -1;
+
+    protected List<Expr> conjuncts = Lists.newArrayList();
+    protected List<StatsDeriveResult> childrenStatsResult = Lists.newArrayList();
+
+    protected void init(PlanNode node) throws UserException {
+        limit = node.getLimit();
+        conjuncts.addAll(node.getConjuncts());
+
+        for (PlanNode childNode : node.getChildren()) {
+            StatsDeriveResult result = childNode.getStatsDeriveResult();
+            if (result == null) {
+                throw new UserException("childNode statsDeriveResult is null, childNodeType is " + childNode.getNodeType()
+                + "parentNodeType is " + node.getNodeType());
+            }
+            childrenStatsResult.add(result);
+        }
+    }
+
+    public StatsDeriveResult deriveStats() {
+        return new StatsDeriveResult(deriveRowCount(), deriveColumnToDataSize(), deriveColumnToNdv());
+    }
+
+    public boolean hasLimit() {
+        return limit > -1;
+    }
+
+    protected void applyConjunctsSelectivity() {
+        if (rowCount == -1) {
+            return;
+        }
+        applySelectivity();
+    }
+
+    private void applySelectivity() {
+        double selectivity = computeSelectivity();
+        Preconditions.checkState(rowCount >= 0);
+        long preConjunctrowCount = rowCount;
+        rowCount = Math.round(rowCount * selectivity);
+        // don't round rowCount down to zero for safety.
+        if (rowCount == 0 && preConjunctrowCount > 0) {
+            rowCount = 1;
+        }
+    }
+
+    protected double computeSelectivity() {
+        for (Expr expr : conjuncts) {
+            expr.setSelectivity();
+        }
+        return computeCombinedSelectivity(conjuncts);
+    }
+
+    /**
+     * Returns the estimated combined selectivity of all conjuncts. Uses heuristics to
+     * address the following estimation challenges:
+     * 1. The individual selectivities of conjuncts may be unknown.
+     * 2. Two selectivities, whether known or unknown, could be correlated. Assuming
+     * independence can lead to significant underestimation.
+     * <p>
+     * The first issue is addressed by using a single default selectivity that is
+     * representative of all conjuncts with unknown selectivities.
+     * The second issue is addressed by an exponential backoff when multiplying each
+     * additional selectivity into the final result.
+     */
+    protected double computeCombinedSelectivity(List<Expr> conjuncts) {
+        // Collect all estimated selectivities.
+        List<Double> selectivities = new ArrayList<>();
+        for (Expr e : conjuncts) {
+            if (e.hasSelectivity()) selectivities.add(e.getSelectivity());
+        }
+        if (selectivities.size() != conjuncts.size()) {
+            // Some conjuncts have no estimated selectivity. Use a single default
+            // representative selectivity for all those conjuncts.
+            selectivities.add(Expr.DEFAULT_SELECTIVITY);
+        }
+        // Sort the selectivities to get a consistent estimate, regardless of the original
+        // conjunct order. Sort in ascending order such that the most selective conjunct
+        // is fully applied.
+        Collections.sort(selectivities);
+        double result = 1.0;
+        // selectivity = 1 * (s1)^(1/1) * (s2)^(1/2) * ... * (sn-1)^(1/(n-1)) * (sn)^(1/n)
+        for (int i = 0; i < selectivities.size(); ++i) {
+            // Exponential backoff for each selectivity multiplied into the final result.
+            result *= Math.pow(selectivities.get(i), 1.0 / (double) (i + 1));
+        }
+        // Bound result in [0, 1]
+        return Math.max(0.0, Math.min(1.0, result));
+    }
+
+    protected void capRowCountAtLimit() {
+        if (hasLimit()) {
+            rowCount = rowCount == -1 ? limit : Math.min(rowCount, limit);
+        }
+    }
+
+
+    // Currently it simply adds the number of rows of children
+    protected long deriveRowCount() {
+        for (StatsDeriveResult statsDeriveResult : childrenStatsResult) {
+            rowCount = Math.max(rowCount, statsDeriveResult.getRowCount());
+        }
+        applyConjunctsSelectivity();
+        capRowCountAtLimit();
+        return rowCount;
+    }
+
+
+    protected HashMap<SlotId, Float> deriveColumnToDataSize() {
+        HashMap<SlotId, Float> columnToDataSize = new HashMap<>();
+        for (StatsDeriveResult child : childrenStatsResult) {
+            columnToDataSize.putAll(child.getColumnToDataSize());
+        }
+        return columnToDataSize;
+    }
+
+    protected HashMap<SlotId, Long> deriveColumnToNdv() {
+        HashMap<SlotId, Long> columnToNdv = new HashMap<>();
+        for (StatsDeriveResult child : childrenStatsResult) {
+            columnToNdv.putAll(child.getColumnToNdv());
+        }
+        return columnToNdv;
+    }
+}
--- a/fe/fe-core/src/main/java/org/apache/doris/statistics/DeriveFactory.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/DeriveFactory.java
@ -0,0 +1,36 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris.statistics;
+
+import org.apache.doris.planner.PlanNode;
+
+public class DeriveFactory {
+
+    public BaseStatsDerive getStatsDerive(PlanNode.NodeType nodeType) {
+        switch (nodeType) {
+            case AGG_NODE:
+            case HASH_JOIN_NODE:
+            case MERGE_NODE:
+                break;
+            case OLAP_SCAN_NODE:
+                return new OlapScanStatsDerive();
+            case DEFAULT:
+        }
+        return new BaseStatsDerive();
+    }
+}
--- a/fe/fe-core/src/main/java/org/apache/doris/statistics/OlapScanStatsDerive.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/OlapScanStatsDerive.java
@ -0,0 +1,112 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris.statistics;
+
+import com.google.common.base.Preconditions;
+import org.apache.doris.analysis.SlotDescriptor;
+import org.apache.doris.analysis.SlotId;
+import org.apache.doris.catalog.Catalog;
+import org.apache.doris.common.Pair;
+import org.apache.doris.common.UserException;
+import org.apache.doris.planner.OlapScanNode;
+import org.apache.doris.planner.PlanNode;
+
+import java.util.HashMap;
+import java.util.Map;
+
+public class OlapScanStatsDerive extends BaseStatsDerive {
+    // Currently, due to the structure of doris,
+    // the selected materialized view is not determined when calculating the statistical information of scan,
+    // so baseIndex is used for calculation when generating Planner.
+
+    // The rowCount here is the number of rows.
+    private long inputRowCount = -1;
+    private Map<SlotId, Float> slotIdToDataSize;
+    private Map<SlotId, Long> slotIdToNdv;
+    private Map<SlotId, Pair<Long, String>> slotIdToTableIdAndColumnName;
+
+    @Override
+    public void init(PlanNode node) throws UserException {
+        Preconditions.checkState(node instanceof OlapScanNode);
+        super.init(node);
+        buildStructure((OlapScanNode)node);
+    }
+
+    @Override
+    public StatsDeriveResult deriveStats() {
+        /**
+         * Compute InAccurate cardinality before mv selector and tablet pruning.
+         * - Accurate statistical information relies on the selector of materialized views and bucket reduction.
+         * - However, Those both processes occur after the reorder algorithm is completed.
+         * - When Join reorder is turned on, the cardinality must be calculated before the reorder algorithm.
+         * - So only an inaccurate cardinality can be calculated here.
+         */
+        rowCount = inputRowCount;
+        for (Map.Entry<SlotId, Pair<Long, String>> pairEntry : slotIdToTableIdAndColumnName.entrySet()) {
+            Pair<Long, Float> ndvAndDataSize = getNdvAndDataSizeFromStatistics(pairEntry.getValue());
+            long ndv = ndvAndDataSize.first;
+            float dataSize = ndvAndDataSize.second;
+            slotIdToNdv.put(pairEntry.getKey(), ndv);
+            slotIdToDataSize.put(pairEntry.getKey(), dataSize);
+        }
+        return new StatsDeriveResult(deriveRowCount(), slotIdToDataSize, slotIdToNdv);
+    }
+
+    public void buildStructure(OlapScanNode node) {
+        slotIdToDataSize = new HashMap<>();
+        slotIdToNdv = new HashMap<>();
+        if (node.getTupleDesc() != null
+            && node.getTupleDesc().getTable() != null) {
+            long tableId = node.getTupleDesc().getTable().getId();
+            inputRowCount = Catalog.getCurrentCatalog().getStatisticsManager()
+                    .getStatistics().getTableStats(tableId).getRowCount();
+        }
+        for (SlotDescriptor slot : node.getTupleDesc().getSlots()) {
+            if (!slot.isMaterialized()) {
+                continue;
+            }
+
+            long tableId = slot.getParent().getTable().getId();
+            String columnName = slot.getColumn().getName();
+            slotIdToTableIdAndColumnName.put(slot.getId(), new Pair<>(tableId, columnName));
+        }
+    }
+
+    //TODO:Implement the getStatistics interface
+    //now there is nothing in statistics, need to wait for collection finished
+    public Pair<Long, Float> getNdvAndDataSizeFromStatistics(Pair<Long, String> pair) {
+        long ndv = -1;
+        float dataSize = -1;
+        /*
+        if (Catalog.getCurrentCatalog()
+                    .getStatisticsManager()
+                    .getStatistics()
+                    .getColumnStats(pair.first) != null) {
+                ndv = Catalog.getCurrentCatalog()
+                        .getStatisticsManager()
+                        .getStatistics()
+                        .getColumnStats(pair.first).get(pair.second).getNdv();
+                dataSize = Catalog.getCurrentCatalog()
+                        .getStatisticsManager()
+                        .getStatistics()
+                        .getColumnStats(pair.first).get(pair.second).getDataSize();
+         }
+         */
+        return new Pair<>(ndv, dataSize);
+    }
+}
--- a/fe/fe-core/src/main/java/org/apache/doris/statistics/Statistics.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/Statistics.java
@ -70,4 +70,17 @@ public class Statistics {
        }
        return tableStats.getNameToColumnStats();
    }
+
+    // TODO: mock statistics need to be removed in the future
+    public void mockTableStatsWithRowCount(long tableId, long rowCount) {
+        TableStats tableStats = idToTableStats.get(tableId);
+        if (tableStats == null) {
+            tableStats = new TableStats();
+            idToTableStats.put(tableId, tableStats);
+        }
+
+        if (tableStats.getRowCount() != rowCount) {
+            tableStats.setRowCount(rowCount);
+        }
+    }
 }
--- a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsManager.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsManager.java
@ -136,4 +136,8 @@ public class StatisticsManager {
        Table table = db.getTableOrAnalysisException(tableName);
        return table;
    }
+
+    public Statistics getStatistics() {
+        return statistics;
+    }
 }
--- a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatsDeriveResult.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatsDeriveResult.java
@ -0,0 +1,56 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris.statistics;
+
+import com.google.common.collect.Maps;
+import org.apache.doris.analysis.SlotId;
+
+import java.util.Map;
+
+// This structure is maintained in each operator to store the statistical information results obtained by the operator.
+public class StatsDeriveResult {
+    private long rowCount = -1;
+    // The data size of the corresponding column in the operator
+    // The actual key is slotId
+    private final Map<SlotId, Float> columnToDataSize = Maps.newHashMap();
+    // The ndv of the corresponding column in the operator
+    // The actual key is slotId
+    private final Map<SlotId, Long> columnToNdv = Maps.newHashMap();
+
+    public StatsDeriveResult(long rowCount, Map<SlotId, Float> columnToDataSize, Map<SlotId, Long> columnToNdv) {
+        this.rowCount = rowCount;
+        this.columnToDataSize.putAll(columnToDataSize);
+        this.columnToNdv.putAll(columnToNdv);
+    }
+
+    public void setRowCount(long rowCount) {
+        this.rowCount = rowCount;
+    }
+
+    public long getRowCount() {
+        return rowCount;
+    }
+
+    public Map<SlotId, Long> getColumnToNdv() {
+        return columnToNdv;
+    }
+
+    public Map<SlotId, Float> getColumnToDataSize() {
+        return columnToDataSize;
+    }
+}
--- a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatsRecursiveDerive.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatsRecursiveDerive.java
@ -0,0 +1,56 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris.statistics;
+
+import org.apache.doris.common.UserException;
+import org.apache.doris.planner.PlanNode;
+
+
+public class StatsRecursiveDerive {
+    private StatsRecursiveDerive() {}
+
+    public static StatsRecursiveDerive getStatsRecursiveDerive() {
+        return Inner.INSTANCE;
+    }
+
+    private static class Inner {
+        private static final StatsRecursiveDerive INSTANCE = new StatsRecursiveDerive();
+    }
+
+    /**
+     * Recursively complete the derivation of statistics for this node and all its children
+     * @param node
+     * This parameter is an input and output parameter,
+     * which will store the derivation result of statistical information in the corresponding node
+     */
+    public void statsRecursiveDerive(PlanNode node) throws UserException {
+        if (node.getStatsDeriveResult() != null) {
+            return;
+        }
+        for (PlanNode childNode : node.getChildren()) {
+            if (childNode.getStatsDeriveResult() == null) {
+                statsRecursiveDerive(childNode);
+            }
+        }
+        DeriveFactory deriveFactory = new DeriveFactory();
+        BaseStatsDerive deriveStats = deriveFactory.getStatsDerive(node.getNodeType());
+        deriveStats.init(node);
+        StatsDeriveResult result = deriveStats.deriveStats();
+        node.setStatsDeriveResult(result);
+    }
+}
--- a/fe/fe-core/src/main/java/org/apache/doris/statistics/TableStats.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/TableStats.java
@ -94,4 +94,16 @@ public class TableStats {
    public Map<String, ColumnStats> getNameToColumnStats() {
        return nameToColumnStats;
    }
+
+    public long getRowCount() {
+        return rowCount;
+    }
+
+    public long getDataSize() {
+        return dataSize;
+    }
+
+    public void setRowCount(long rowCount) {
+        this.rowCount = rowCount;
+    }
 }
--- a/fe/fe-core/src/test/java/org/apache/doris/analysis/ExplainTest.java
+++ b/fe/fe-core/src/test/java/org/apache/doris/analysis/ExplainTest.java
@ -72,20 +72,27 @@ public class ExplainTest {
        Assert.assertEquals(dropDbStmt.toSql(), dropSchemaStmt.toSql());
    }

-    public void testExplainSelect() throws Exception {
-        String sql = "explain select * from test_explain.explain_t1 where dt = '1001';";
+    public void testExplainInsertInto() throws Exception {
+        String sql = "explain insert into test_explain.explain_t1 select * from test_explain.explain_t2";
        String explainString = UtFrameUtils.getSQLPlanOrErrorMsg(ctx, sql, false);
        System.out.println(explainString);
        Assert.assertFalse(explainString.contains("CAST"));
    }

-    public void testExplainInsertInto() throws Exception {
+    public void testExplainVerboseInsertInto() throws Exception {
        String sql = "explain verbose insert into test_explain.explain_t1 select * from test_explain.explain_t2";
        String explainString = UtFrameUtils.getSQLPlanOrErrorMsg(ctx, sql, true);
        System.out.println(explainString);
        Assert.assertTrue(explainString.contains("CAST"));
    }

+    public void testExplainSelect() throws Exception {
+        String sql = "explain select * from test_explain.explain_t1 where dt = '1001';";
+        String explainString = UtFrameUtils.getSQLPlanOrErrorMsg(ctx, sql, false);
+        System.out.println(explainString);
+        Assert.assertFalse(explainString.contains("CAST"));
+    }
+
    public void testExplainVerboseSelect() throws Exception {
        String queryStr = "explain verbose select * from test_explain.explain_t1 where dt = '1001';";
        String explainString = UtFrameUtils.getSQLPlanOrErrorMsg(ctx, queryStr, true);
--- a/fe/fe-core/src/test/java/org/apache/doris/planner/QueryPlanTest.java
+++ b/fe/fe-core/src/test/java/org/apache/doris/planner/QueryPlanTest.java
@ -1991,8 +1991,9 @@ public class QueryPlanTest {
    public void testExplainInsertInto() throws Exception {
        ExplainTest explainTest = new ExplainTest();
        explainTest.before(connectContext);
-        explainTest.testExplainSelect();
        explainTest.testExplainInsertInto();
+        explainTest.testExplainVerboseInsertInto();
+        explainTest.testExplainSelect();
        explainTest.testExplainVerboseSelect();
        explainTest.testExplainConcatSelect();
        explainTest.testExplainVerboseConcatSelect();
@ -2088,7 +2089,7 @@ public class QueryPlanTest {
                "\"storage_medium\" = \"HDD\",\n" +
                "\"storage_format\" = \"V2\"\n" +
                ");\n");
-        String queryStr = "EXPLAIN INSERT INTO result_exprs\n" +
+        String queryStr = "EXPLAIN VERBOSE INSERT INTO result_exprs\n" +
                "SELECT a.aid,\n" +
                "       b.bid\n" +
                "FROM\n" +
@ -2098,7 +2099,7 @@ public class QueryPlanTest {
        String explainString = UtFrameUtils.getSQLPlanOrErrorMsg(connectContext, queryStr);
        Assert.assertFalse(explainString.contains("OUTPUT EXPRS:3 | 4"));
        System.out.println(explainString);
-        Assert.assertTrue(explainString.contains("OUTPUT EXPRS:`a`.`aid` | 4"));
+        Assert.assertTrue(explainString.contains("OUTPUT EXPRS:CAST(`a`.`aid` AS INT) | 4"));
    }

    @Test