[feature](Nereids): cost and stats; (#10199)
feature: cost and stats; + Add cost model data struct. + Add operator visitor for calculating cost. + Add plan context.
This commit is contained in:
@ -0,0 +1,62 @@
|
||||
// Licensed to the Apache Software Foundation (ASF) under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing,
|
||||
// software distributed under the License is distributed on an
|
||||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
package org.apache.doris.nereids;
|
||||
|
||||
import org.apache.doris.nereids.operators.Operator;
|
||||
import org.apache.doris.nereids.operators.plans.physical.PhysicalAggregation;
|
||||
import org.apache.doris.nereids.operators.plans.physical.PhysicalFilter;
|
||||
import org.apache.doris.nereids.operators.plans.physical.PhysicalHashJoin;
|
||||
import org.apache.doris.nereids.operators.plans.physical.PhysicalOlapScan;
|
||||
import org.apache.doris.nereids.operators.plans.physical.PhysicalProject;
|
||||
import org.apache.doris.nereids.operators.plans.physical.PhysicalSort;
|
||||
|
||||
/**
|
||||
* Base class for the processing of logical and physical operator.
|
||||
*
|
||||
* @param <R> Return type of each visit method.
|
||||
* @param <C> Context type.
|
||||
*/
|
||||
public abstract class OperatorVisitor<R, C> {
|
||||
/* Operator */
|
||||
|
||||
public abstract R visitOperator(Operator operator, C context);
|
||||
|
||||
public R visitPhysicalAggregation(PhysicalAggregation physicalAggregation, C context) {
|
||||
return null;
|
||||
}
|
||||
|
||||
public R visitPhysicalOlapScan(PhysicalOlapScan physicalOlapScan, C context) {
|
||||
return null;
|
||||
}
|
||||
|
||||
public R visitPhysicalSort(PhysicalSort physicalSort, C context) {
|
||||
return null;
|
||||
}
|
||||
|
||||
public R visitPhysicalHashJoin(PhysicalHashJoin physicalHashJoin, C context) {
|
||||
return null;
|
||||
}
|
||||
|
||||
public R visitPhysicalProject(PhysicalProject physicalProject, C context) {
|
||||
return null;
|
||||
}
|
||||
|
||||
public R visitPhysicalFilter(PhysicalFilter physicalFilter, C context) {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
@ -0,0 +1,106 @@
|
||||
// Licensed to the Apache Software Foundation (ASF) under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing,
|
||||
// software distributed under the License is distributed on an
|
||||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
|
||||
package org.apache.doris.nereids;
|
||||
|
||||
import org.apache.doris.common.Id;
|
||||
import org.apache.doris.nereids.memo.GroupExpression;
|
||||
import org.apache.doris.nereids.properties.LogicalProperties;
|
||||
import org.apache.doris.nereids.trees.expressions.Slot;
|
||||
import org.apache.doris.nereids.trees.plans.Plan;
|
||||
import org.apache.doris.statistics.StatsDeriveResult;
|
||||
|
||||
import com.google.common.base.Preconditions;
|
||||
import com.google.common.collect.Lists;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* Context for plan.
|
||||
* Abstraction for group expressions and stand-alone expressions/DAGs.
|
||||
* A ExpressionHandle is attached to either {@link Plan} or {@link GroupExpression}.
|
||||
* Inspired by GPORCA-CExpressionHandle.
|
||||
*/
|
||||
public class PlanContext {
|
||||
// array of children's derived stats
|
||||
private final List<StatsDeriveResult> childrenStats = Lists.newArrayList();
|
||||
// statistics of attached plan/gexpr
|
||||
private StatsDeriveResult statistics;
|
||||
// attached plan
|
||||
private Plan plan;
|
||||
// attached group expression
|
||||
private GroupExpression groupExpression;
|
||||
|
||||
public PlanContext(Plan plan) {
|
||||
this.plan = plan;
|
||||
}
|
||||
|
||||
public PlanContext(GroupExpression groupExpression) {
|
||||
this.groupExpression = groupExpression;
|
||||
}
|
||||
|
||||
public Plan getPlan() {
|
||||
return plan;
|
||||
}
|
||||
|
||||
public GroupExpression getGroupExpression() {
|
||||
return groupExpression;
|
||||
}
|
||||
|
||||
public List<StatsDeriveResult> getChildrenStats() {
|
||||
return childrenStats;
|
||||
}
|
||||
|
||||
public StatsDeriveResult getStatistics() {
|
||||
return statistics;
|
||||
}
|
||||
|
||||
public void setStatistics(StatsDeriveResult stats) {
|
||||
this.statistics = stats;
|
||||
}
|
||||
|
||||
public StatsDeriveResult getStatisticsWithCheck() {
|
||||
Preconditions.checkNotNull(statistics);
|
||||
return statistics;
|
||||
}
|
||||
|
||||
public LogicalProperties childLogicalPropertyAt(int index) {
|
||||
return plan.child(index).getLogicalProperties();
|
||||
}
|
||||
|
||||
public List<Slot> getChildOutputSlots(int index) {
|
||||
return childLogicalPropertyAt(index).getOutput();
|
||||
}
|
||||
|
||||
public List<Id> getChildOutputIds(int index) {
|
||||
List<Id> ids = Lists.newArrayList();
|
||||
childLogicalPropertyAt(index).getOutput().forEach(slot -> {
|
||||
ids.add(slot.getId());
|
||||
});
|
||||
return ids;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get child statistics.
|
||||
*/
|
||||
public StatsDeriveResult getChildStatistics(int index) {
|
||||
StatsDeriveResult statistics = childrenStats.get(index);
|
||||
Preconditions.checkNotNull(statistics);
|
||||
return statistics;
|
||||
}
|
||||
}
|
||||
@ -0,0 +1,102 @@
|
||||
// Licensed to the Apache Software Foundation (ASF) under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing,
|
||||
// software distributed under the License is distributed on an
|
||||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
package org.apache.doris.nereids.cost;
|
||||
|
||||
import org.apache.doris.common.Id;
|
||||
import org.apache.doris.nereids.OperatorVisitor;
|
||||
import org.apache.doris.nereids.PlanContext;
|
||||
import org.apache.doris.nereids.memo.GroupExpression;
|
||||
import org.apache.doris.nereids.operators.Operator;
|
||||
import org.apache.doris.nereids.operators.plans.physical.PhysicalAggregation;
|
||||
import org.apache.doris.nereids.operators.plans.physical.PhysicalHashJoin;
|
||||
import org.apache.doris.nereids.operators.plans.physical.PhysicalOlapScan;
|
||||
import org.apache.doris.nereids.operators.plans.physical.PhysicalProject;
|
||||
import org.apache.doris.statistics.StatsDeriveResult;
|
||||
|
||||
import com.google.common.base.Preconditions;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* Calculate the cost of a plan.
|
||||
* Inspired by Presto.
|
||||
*/
|
||||
public class CostCalculator {
|
||||
/**
|
||||
* Constructor.
|
||||
*/
|
||||
public double calculateCost(GroupExpression groupExpression) {
|
||||
PlanContext planContext = new PlanContext(groupExpression);
|
||||
CostEstimator costCalculator = new CostEstimator();
|
||||
CostEstimate costEstimate = groupExpression.getOperator().accept(costCalculator, planContext);
|
||||
return costFormula(costEstimate);
|
||||
}
|
||||
|
||||
private double costFormula(CostEstimate costEstimate) {
|
||||
double cpuCostWeight = 1;
|
||||
double memoryCostWeight = 1;
|
||||
double networkCostWeight = 1;
|
||||
return costEstimate.getCpuCost() * cpuCostWeight + costEstimate.getMemoryCost() * memoryCostWeight
|
||||
+ costEstimate.getNetworkCost() * networkCostWeight;
|
||||
}
|
||||
|
||||
private static class CostEstimator extends OperatorVisitor<CostEstimate, PlanContext> {
|
||||
@Override
|
||||
public CostEstimate visitOperator(Operator operator, PlanContext context) {
|
||||
return CostEstimate.zero();
|
||||
}
|
||||
|
||||
@Override
|
||||
public CostEstimate visitPhysicalAggregation(PhysicalAggregation physicalAggregation, PlanContext context) {
|
||||
StatsDeriveResult statistics = context.getStatisticsWithCheck();
|
||||
return CostEstimate.ofCpu(statistics.computeSize());
|
||||
}
|
||||
|
||||
@Override
|
||||
public CostEstimate visitPhysicalOlapScan(PhysicalOlapScan physicalOlapScan, PlanContext context) {
|
||||
StatsDeriveResult statistics = context.getStatisticsWithCheck();
|
||||
return CostEstimate.ofCpu(statistics.computeSize());
|
||||
}
|
||||
|
||||
@Override
|
||||
public CostEstimate visitPhysicalHashJoin(PhysicalHashJoin physicalHashJoin, PlanContext context) {
|
||||
Preconditions.checkState(context.getGroupExpression().arity() == 2);
|
||||
Preconditions.checkState(context.getChildrenStats().size() == 2);
|
||||
|
||||
StatsDeriveResult leftStatistics = context.getChildStatistics(0);
|
||||
StatsDeriveResult rightStatistics = context.getChildStatistics(1);
|
||||
|
||||
// TODO: handle some case
|
||||
|
||||
List<Id> leftIds = context.getChildOutputIds(0);
|
||||
List<Id> rightIds = context.getChildOutputIds(1);
|
||||
|
||||
// handle cross join, onClause is empty .....
|
||||
|
||||
return new CostEstimate(
|
||||
leftStatistics.computeColumnSize(leftIds) + rightStatistics.computeColumnSize(rightIds),
|
||||
rightStatistics.computeColumnSize(rightIds), 0);
|
||||
}
|
||||
|
||||
@Override
|
||||
public CostEstimate visitPhysicalProject(PhysicalProject physicalProject, PlanContext context) {
|
||||
StatsDeriveResult statistics = context.getStatisticsWithCheck();
|
||||
return CostEstimate.ofCpu(statistics.computeSize());
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -0,0 +1,74 @@
|
||||
// Licensed to the Apache Software Foundation (ASF) under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing,
|
||||
// software distributed under the License is distributed on an
|
||||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
package org.apache.doris.nereids.cost;
|
||||
|
||||
import com.google.common.base.Preconditions;
|
||||
|
||||
/**
|
||||
* Use for estimating the cost of plan.
|
||||
*/
|
||||
public final class CostEstimate {
|
||||
private static final CostEstimate INFINITE =
|
||||
new CostEstimate(Double.POSITIVE_INFINITY, Double.POSITIVE_INFINITY, Double.POSITIVE_INFINITY);
|
||||
private static final CostEstimate ZERO = new CostEstimate(0, 0, 0);
|
||||
|
||||
private final double cpuCost;
|
||||
private final double memoryCost;
|
||||
private final double networkCost;
|
||||
|
||||
/**
|
||||
* Constructor of CostEstimate.
|
||||
*/
|
||||
public CostEstimate(double cpuCost, double memoryCost, double networkCost) {
|
||||
Preconditions.checkArgument(!(cpuCost < 0), "cpuCost cannot be negative: %s", cpuCost);
|
||||
Preconditions.checkArgument(!(memoryCost < 0), "memoryCost cannot be negative: %s", memoryCost);
|
||||
Preconditions.checkArgument(!(networkCost < 0), "networkCost cannot be negative: %s", networkCost);
|
||||
this.cpuCost = cpuCost;
|
||||
this.memoryCost = memoryCost;
|
||||
this.networkCost = networkCost;
|
||||
}
|
||||
|
||||
public static CostEstimate infinite() {
|
||||
return INFINITE;
|
||||
}
|
||||
|
||||
public static CostEstimate zero() {
|
||||
return ZERO;
|
||||
}
|
||||
|
||||
public double getCpuCost() {
|
||||
return cpuCost;
|
||||
}
|
||||
|
||||
public double getMemoryCost() {
|
||||
return memoryCost;
|
||||
}
|
||||
|
||||
public double getNetworkCost() {
|
||||
return networkCost;
|
||||
}
|
||||
|
||||
public static CostEstimate ofCpu(double cpuCost) {
|
||||
return new CostEstimate(cpuCost, 0, 0);
|
||||
}
|
||||
|
||||
public static CostEstimate ofMemory(double memoryCost) {
|
||||
return new CostEstimate(0, memoryCost, 0);
|
||||
}
|
||||
|
||||
}
|
||||
@ -17,6 +17,7 @@
|
||||
|
||||
package org.apache.doris.nereids.operators;
|
||||
|
||||
import org.apache.doris.nereids.OperatorVisitor;
|
||||
import org.apache.doris.nereids.trees.plans.Plan;
|
||||
import org.apache.doris.nereids.trees.plans.PlanOperatorVisitor;
|
||||
|
||||
@ -48,14 +49,22 @@ public abstract class AbstractOperator implements Operator {
|
||||
* Child operator should overwrite this method.
|
||||
* for example:
|
||||
* <code>
|
||||
* visitor.visitPhysicalOlapScanPlan(
|
||||
* (PhysicalPlan<? extends PhysicalPlan, PhysicalOlapScan>) plan, context);
|
||||
* visitor.visitPhysicalOlapScanPlan(
|
||||
* (PhysicalPlan<? extends PhysicalPlan, PhysicalOlapScan>) plan, context);
|
||||
* </code>
|
||||
*/
|
||||
public <R, C> R accept(PlanOperatorVisitor<R, C> visitor, Plan plan, C context) {
|
||||
return null;
|
||||
}
|
||||
|
||||
public <R, C> R accept(OperatorVisitor<R, C> visitor, C context) {
|
||||
return visitor.visitOperator(this, context);
|
||||
}
|
||||
|
||||
public <R, C> R accept(OperatorVisitor<R, C> visitor, Operator operator, C context) {
|
||||
return null;
|
||||
}
|
||||
|
||||
public long getLimited() {
|
||||
return limited;
|
||||
}
|
||||
|
||||
@ -17,6 +17,7 @@
|
||||
|
||||
package org.apache.doris.nereids.operators;
|
||||
|
||||
import org.apache.doris.nereids.OperatorVisitor;
|
||||
import org.apache.doris.nereids.memo.GroupExpression;
|
||||
import org.apache.doris.nereids.trees.TreeNode;
|
||||
import org.apache.doris.nereids.trees.plans.Plan;
|
||||
@ -32,4 +33,7 @@ public interface Operator {
|
||||
|
||||
<R, C> R accept(PlanOperatorVisitor<R, C> visitor, Plan plan, C context);
|
||||
|
||||
<R, C> R accept(OperatorVisitor<R, C> visitor, Operator operator, C context);
|
||||
|
||||
<R, C> R accept(OperatorVisitor<R, C> visitor, C context);
|
||||
}
|
||||
|
||||
@ -84,6 +84,10 @@ public enum JoinType {
|
||||
}
|
||||
}
|
||||
|
||||
public final boolean isCrossJoin() {
|
||||
return this == CROSS_JOIN;
|
||||
}
|
||||
|
||||
public final boolean isInnerOrOuterOrCrossJoin() {
|
||||
return this == INNER_JOIN || this == CROSS_JOIN || this == FULL_OUTER_JOIN;
|
||||
}
|
||||
|
||||
@ -44,7 +44,7 @@ public class ExprId extends Id<ExprId> {
|
||||
}
|
||||
|
||||
/**
|
||||
* Should be only called by {@link org.apache.doris.nereids.trees.expressions.NamedExpressionUtil}.
|
||||
* Should be only called by {@link org.apache.doris.nereids.trees.expressions.NamedExpressionUtil}.
|
||||
*/
|
||||
public static IdGenerator<ExprId> createGenerator() {
|
||||
return new IdGenerator<ExprId>() {
|
||||
|
||||
@ -23,10 +23,9 @@ import org.apache.doris.nereids.trees.NodeType;
|
||||
* Abstract class for all slot in expression.
|
||||
*/
|
||||
public abstract class Slot extends NamedExpression implements LeafExpression {
|
||||
private ExprId id;
|
||||
|
||||
private int id;
|
||||
|
||||
public Slot(NodeType type, int id, Expression... children) {
|
||||
public Slot(NodeType type, ExprId id, Expression... children) {
|
||||
super(type, children);
|
||||
this.id = id;
|
||||
}
|
||||
@ -40,7 +39,7 @@ public abstract class Slot extends NamedExpression implements LeafExpression {
|
||||
return this;
|
||||
}
|
||||
|
||||
public int getId() {
|
||||
public ExprId getId() {
|
||||
return id;
|
||||
}
|
||||
}
|
||||
|
||||
@ -277,7 +277,7 @@ public class PhysicalPlanTranslator extends PlanOperatorVisitor<PlanFragment, Pl
|
||||
tupleDescriptor.setTable(table);
|
||||
for (Slot slot : slotList) {
|
||||
SlotReference slotReference = (SlotReference) slot;
|
||||
SlotDescriptor slotDescriptor = context.addSlotDesc(tupleDescriptor, slot.getId());
|
||||
SlotDescriptor slotDescriptor = context.addSlotDesc(tupleDescriptor, slot.getId().asInt());
|
||||
slotDescriptor.setColumn(slotReference.getColumn());
|
||||
slotDescriptor.setType(slotReference.getDataType().toCatalogDataType());
|
||||
slotDescriptor.setIsMaterialized(true);
|
||||
|
||||
@ -21,9 +21,13 @@ import org.apache.doris.common.Id;
|
||||
|
||||
import com.google.common.collect.Maps;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Map.Entry;
|
||||
|
||||
// This structure is maintained in each operator to store the statistical information results obtained by the operator.
|
||||
/**
|
||||
* This structure is maintained in each operator to store the statistical information results obtained by the operator.
|
||||
*/
|
||||
public class StatsDeriveResult {
|
||||
private long rowCount = -1;
|
||||
// The data size of the corresponding column in the operator
|
||||
@ -39,6 +43,32 @@ public class StatsDeriveResult {
|
||||
this.columnToNdv.putAll(columnToNdv);
|
||||
}
|
||||
|
||||
public float computeSize() {
|
||||
return Math.max(1, columnToDataSize.values().stream().reduce((float) 0, Float::sum)) * rowCount;
|
||||
}
|
||||
|
||||
/**
|
||||
* Compute the data size of all input columns.
|
||||
*
|
||||
* @param slotIds all input columns.
|
||||
* @return sum data size.
|
||||
*/
|
||||
public float computeColumnSize(List<Id> slotIds) {
|
||||
float count = 0;
|
||||
boolean exist = false;
|
||||
|
||||
for (Entry<Id, Float> entry : columnToDataSize.entrySet()) {
|
||||
if (slotIds.contains(entry.getKey())) {
|
||||
count += entry.getValue();
|
||||
exist = true;
|
||||
}
|
||||
}
|
||||
if (!exist) {
|
||||
return 1;
|
||||
}
|
||||
return count * rowCount;
|
||||
}
|
||||
|
||||
public void setRowCount(long rowCount) {
|
||||
this.rowCount = rowCount;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user