[feature](Nereids): cost and stats; (#10199)

feature: cost and stats;

+ Add cost model data struct.
+ Add operator visitor for calculating cost.
+ Add plan context.
This commit is contained in:
jakevin
2022-06-22 22:46:22 +08:00
committed by GitHub
parent 8204b41d88
commit 2bd8b0713a
11 changed files with 399 additions and 9 deletions

View File

@ -0,0 +1,62 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
package org.apache.doris.nereids;
import org.apache.doris.nereids.operators.Operator;
import org.apache.doris.nereids.operators.plans.physical.PhysicalAggregation;
import org.apache.doris.nereids.operators.plans.physical.PhysicalFilter;
import org.apache.doris.nereids.operators.plans.physical.PhysicalHashJoin;
import org.apache.doris.nereids.operators.plans.physical.PhysicalOlapScan;
import org.apache.doris.nereids.operators.plans.physical.PhysicalProject;
import org.apache.doris.nereids.operators.plans.physical.PhysicalSort;
/**
* Base class for the processing of logical and physical operator.
*
* @param <R> Return type of each visit method.
* @param <C> Context type.
*/
public abstract class OperatorVisitor<R, C> {
/* Operator */
public abstract R visitOperator(Operator operator, C context);
public R visitPhysicalAggregation(PhysicalAggregation physicalAggregation, C context) {
return null;
}
public R visitPhysicalOlapScan(PhysicalOlapScan physicalOlapScan, C context) {
return null;
}
public R visitPhysicalSort(PhysicalSort physicalSort, C context) {
return null;
}
public R visitPhysicalHashJoin(PhysicalHashJoin physicalHashJoin, C context) {
return null;
}
public R visitPhysicalProject(PhysicalProject physicalProject, C context) {
return null;
}
public R visitPhysicalFilter(PhysicalFilter physicalFilter, C context) {
return null;
}
}

View File

@ -0,0 +1,106 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
package org.apache.doris.nereids;
import org.apache.doris.common.Id;
import org.apache.doris.nereids.memo.GroupExpression;
import org.apache.doris.nereids.properties.LogicalProperties;
import org.apache.doris.nereids.trees.expressions.Slot;
import org.apache.doris.nereids.trees.plans.Plan;
import org.apache.doris.statistics.StatsDeriveResult;
import com.google.common.base.Preconditions;
import com.google.common.collect.Lists;
import java.util.List;
/**
* Context for plan.
* Abstraction for group expressions and stand-alone expressions/DAGs.
* A ExpressionHandle is attached to either {@link Plan} or {@link GroupExpression}.
* Inspired by GPORCA-CExpressionHandle.
*/
public class PlanContext {
// array of children's derived stats
private final List<StatsDeriveResult> childrenStats = Lists.newArrayList();
// statistics of attached plan/gexpr
private StatsDeriveResult statistics;
// attached plan
private Plan plan;
// attached group expression
private GroupExpression groupExpression;
public PlanContext(Plan plan) {
this.plan = plan;
}
public PlanContext(GroupExpression groupExpression) {
this.groupExpression = groupExpression;
}
public Plan getPlan() {
return plan;
}
public GroupExpression getGroupExpression() {
return groupExpression;
}
public List<StatsDeriveResult> getChildrenStats() {
return childrenStats;
}
public StatsDeriveResult getStatistics() {
return statistics;
}
public void setStatistics(StatsDeriveResult stats) {
this.statistics = stats;
}
public StatsDeriveResult getStatisticsWithCheck() {
Preconditions.checkNotNull(statistics);
return statistics;
}
public LogicalProperties childLogicalPropertyAt(int index) {
return plan.child(index).getLogicalProperties();
}
public List<Slot> getChildOutputSlots(int index) {
return childLogicalPropertyAt(index).getOutput();
}
public List<Id> getChildOutputIds(int index) {
List<Id> ids = Lists.newArrayList();
childLogicalPropertyAt(index).getOutput().forEach(slot -> {
ids.add(slot.getId());
});
return ids;
}
/**
* Get child statistics.
*/
public StatsDeriveResult getChildStatistics(int index) {
StatsDeriveResult statistics = childrenStats.get(index);
Preconditions.checkNotNull(statistics);
return statistics;
}
}

View File

@ -0,0 +1,102 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
package org.apache.doris.nereids.cost;
import org.apache.doris.common.Id;
import org.apache.doris.nereids.OperatorVisitor;
import org.apache.doris.nereids.PlanContext;
import org.apache.doris.nereids.memo.GroupExpression;
import org.apache.doris.nereids.operators.Operator;
import org.apache.doris.nereids.operators.plans.physical.PhysicalAggregation;
import org.apache.doris.nereids.operators.plans.physical.PhysicalHashJoin;
import org.apache.doris.nereids.operators.plans.physical.PhysicalOlapScan;
import org.apache.doris.nereids.operators.plans.physical.PhysicalProject;
import org.apache.doris.statistics.StatsDeriveResult;
import com.google.common.base.Preconditions;
import java.util.List;
/**
* Calculate the cost of a plan.
* Inspired by Presto.
*/
public class CostCalculator {
/**
* Constructor.
*/
public double calculateCost(GroupExpression groupExpression) {
PlanContext planContext = new PlanContext(groupExpression);
CostEstimator costCalculator = new CostEstimator();
CostEstimate costEstimate = groupExpression.getOperator().accept(costCalculator, planContext);
return costFormula(costEstimate);
}
private double costFormula(CostEstimate costEstimate) {
double cpuCostWeight = 1;
double memoryCostWeight = 1;
double networkCostWeight = 1;
return costEstimate.getCpuCost() * cpuCostWeight + costEstimate.getMemoryCost() * memoryCostWeight
+ costEstimate.getNetworkCost() * networkCostWeight;
}
private static class CostEstimator extends OperatorVisitor<CostEstimate, PlanContext> {
@Override
public CostEstimate visitOperator(Operator operator, PlanContext context) {
return CostEstimate.zero();
}
@Override
public CostEstimate visitPhysicalAggregation(PhysicalAggregation physicalAggregation, PlanContext context) {
StatsDeriveResult statistics = context.getStatisticsWithCheck();
return CostEstimate.ofCpu(statistics.computeSize());
}
@Override
public CostEstimate visitPhysicalOlapScan(PhysicalOlapScan physicalOlapScan, PlanContext context) {
StatsDeriveResult statistics = context.getStatisticsWithCheck();
return CostEstimate.ofCpu(statistics.computeSize());
}
@Override
public CostEstimate visitPhysicalHashJoin(PhysicalHashJoin physicalHashJoin, PlanContext context) {
Preconditions.checkState(context.getGroupExpression().arity() == 2);
Preconditions.checkState(context.getChildrenStats().size() == 2);
StatsDeriveResult leftStatistics = context.getChildStatistics(0);
StatsDeriveResult rightStatistics = context.getChildStatistics(1);
// TODO: handle some case
List<Id> leftIds = context.getChildOutputIds(0);
List<Id> rightIds = context.getChildOutputIds(1);
// handle cross join, onClause is empty .....
return new CostEstimate(
leftStatistics.computeColumnSize(leftIds) + rightStatistics.computeColumnSize(rightIds),
rightStatistics.computeColumnSize(rightIds), 0);
}
@Override
public CostEstimate visitPhysicalProject(PhysicalProject physicalProject, PlanContext context) {
StatsDeriveResult statistics = context.getStatisticsWithCheck();
return CostEstimate.ofCpu(statistics.computeSize());
}
}
}

View File

@ -0,0 +1,74 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
package org.apache.doris.nereids.cost;
import com.google.common.base.Preconditions;
/**
* Use for estimating the cost of plan.
*/
public final class CostEstimate {
private static final CostEstimate INFINITE =
new CostEstimate(Double.POSITIVE_INFINITY, Double.POSITIVE_INFINITY, Double.POSITIVE_INFINITY);
private static final CostEstimate ZERO = new CostEstimate(0, 0, 0);
private final double cpuCost;
private final double memoryCost;
private final double networkCost;
/**
* Constructor of CostEstimate.
*/
public CostEstimate(double cpuCost, double memoryCost, double networkCost) {
Preconditions.checkArgument(!(cpuCost < 0), "cpuCost cannot be negative: %s", cpuCost);
Preconditions.checkArgument(!(memoryCost < 0), "memoryCost cannot be negative: %s", memoryCost);
Preconditions.checkArgument(!(networkCost < 0), "networkCost cannot be negative: %s", networkCost);
this.cpuCost = cpuCost;
this.memoryCost = memoryCost;
this.networkCost = networkCost;
}
public static CostEstimate infinite() {
return INFINITE;
}
public static CostEstimate zero() {
return ZERO;
}
public double getCpuCost() {
return cpuCost;
}
public double getMemoryCost() {
return memoryCost;
}
public double getNetworkCost() {
return networkCost;
}
public static CostEstimate ofCpu(double cpuCost) {
return new CostEstimate(cpuCost, 0, 0);
}
public static CostEstimate ofMemory(double memoryCost) {
return new CostEstimate(0, memoryCost, 0);
}
}

View File

@ -17,6 +17,7 @@
package org.apache.doris.nereids.operators;
import org.apache.doris.nereids.OperatorVisitor;
import org.apache.doris.nereids.trees.plans.Plan;
import org.apache.doris.nereids.trees.plans.PlanOperatorVisitor;
@ -48,14 +49,22 @@ public abstract class AbstractOperator implements Operator {
* Child operator should overwrite this method.
* for example:
* <code>
* visitor.visitPhysicalOlapScanPlan(
* (PhysicalPlan<? extends PhysicalPlan, PhysicalOlapScan>) plan, context);
* visitor.visitPhysicalOlapScanPlan(
* (PhysicalPlan<? extends PhysicalPlan, PhysicalOlapScan>) plan, context);
* </code>
*/
public <R, C> R accept(PlanOperatorVisitor<R, C> visitor, Plan plan, C context) {
return null;
}
public <R, C> R accept(OperatorVisitor<R, C> visitor, C context) {
return visitor.visitOperator(this, context);
}
public <R, C> R accept(OperatorVisitor<R, C> visitor, Operator operator, C context) {
return null;
}
public long getLimited() {
return limited;
}

View File

@ -17,6 +17,7 @@
package org.apache.doris.nereids.operators;
import org.apache.doris.nereids.OperatorVisitor;
import org.apache.doris.nereids.memo.GroupExpression;
import org.apache.doris.nereids.trees.TreeNode;
import org.apache.doris.nereids.trees.plans.Plan;
@ -32,4 +33,7 @@ public interface Operator {
<R, C> R accept(PlanOperatorVisitor<R, C> visitor, Plan plan, C context);
<R, C> R accept(OperatorVisitor<R, C> visitor, Operator operator, C context);
<R, C> R accept(OperatorVisitor<R, C> visitor, C context);
}

View File

@ -84,6 +84,10 @@ public enum JoinType {
}
}
public final boolean isCrossJoin() {
return this == CROSS_JOIN;
}
public final boolean isInnerOrOuterOrCrossJoin() {
return this == INNER_JOIN || this == CROSS_JOIN || this == FULL_OUTER_JOIN;
}

View File

@ -44,7 +44,7 @@ public class ExprId extends Id<ExprId> {
}
/**
* Should be only called by {@link org.apache.doris.nereids.trees.expressions.NamedExpressionUtil}.
* Should be only called by {@link org.apache.doris.nereids.trees.expressions.NamedExpressionUtil}.
*/
public static IdGenerator<ExprId> createGenerator() {
return new IdGenerator<ExprId>() {

View File

@ -23,10 +23,9 @@ import org.apache.doris.nereids.trees.NodeType;
* Abstract class for all slot in expression.
*/
public abstract class Slot extends NamedExpression implements LeafExpression {
private ExprId id;
private int id;
public Slot(NodeType type, int id, Expression... children) {
public Slot(NodeType type, ExprId id, Expression... children) {
super(type, children);
this.id = id;
}
@ -40,7 +39,7 @@ public abstract class Slot extends NamedExpression implements LeafExpression {
return this;
}
public int getId() {
public ExprId getId() {
return id;
}
}

View File

@ -277,7 +277,7 @@ public class PhysicalPlanTranslator extends PlanOperatorVisitor<PlanFragment, Pl
tupleDescriptor.setTable(table);
for (Slot slot : slotList) {
SlotReference slotReference = (SlotReference) slot;
SlotDescriptor slotDescriptor = context.addSlotDesc(tupleDescriptor, slot.getId());
SlotDescriptor slotDescriptor = context.addSlotDesc(tupleDescriptor, slot.getId().asInt());
slotDescriptor.setColumn(slotReference.getColumn());
slotDescriptor.setType(slotReference.getDataType().toCatalogDataType());
slotDescriptor.setIsMaterialized(true);

View File

@ -21,9 +21,13 @@ import org.apache.doris.common.Id;
import com.google.common.collect.Maps;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
// This structure is maintained in each operator to store the statistical information results obtained by the operator.
/**
* This structure is maintained in each operator to store the statistical information results obtained by the operator.
*/
public class StatsDeriveResult {
private long rowCount = -1;
// The data size of the corresponding column in the operator
@ -39,6 +43,32 @@ public class StatsDeriveResult {
this.columnToNdv.putAll(columnToNdv);
}
public float computeSize() {
return Math.max(1, columnToDataSize.values().stream().reduce((float) 0, Float::sum)) * rowCount;
}
/**
* Compute the data size of all input columns.
*
* @param slotIds all input columns.
* @return sum data size.
*/
public float computeColumnSize(List<Id> slotIds) {
float count = 0;
boolean exist = false;
for (Entry<Id, Float> entry : columnToDataSize.entrySet()) {
if (slotIds.contains(entry.getKey())) {
count += entry.getValue();
exist = true;
}
}
if (!exist) {
return 1;
}
return count * rowCount;
}
public void setRowCount(long rowCount) {
this.rowCount = rowCount;
}