[test](stats) Test framework for stats estimation on TPCH-1G dataset (#18267)
Implement a test framework for stats estimation on TPCH-1G dataset to ensure accuracy
This commit is contained in:
@ -107,7 +107,7 @@ public class NereidsPlanner extends Planner {
|
||||
PhysicalPlan physicalPlan = (PhysicalPlan) resultPlan;
|
||||
PlanTranslatorContext planTranslatorContext = new PlanTranslatorContext(cascadesContext);
|
||||
PhysicalPlanTranslator physicalPlanTranslator = new PhysicalPlanTranslator(planTranslatorContext,
|
||||
ConnectContext.get().getStatsErrorEstimator());
|
||||
statementContext.getConnectContext().getStatsErrorEstimator());
|
||||
if (ConnectContext.get().getSessionVariable().isEnableNereidsTrace()) {
|
||||
CounterEvent.clearCounter();
|
||||
}
|
||||
|
||||
@ -177,10 +177,9 @@ import java.util.stream.Stream;
|
||||
*/
|
||||
public class PhysicalPlanTranslator extends DefaultPlanVisitor<PlanFragment, PlanTranslatorContext> {
|
||||
private static final Logger LOG = LogManager.getLogger(PhysicalPlanTranslator.class);
|
||||
protected StatsErrorEstimator statsErrorEstimator;
|
||||
PlanTranslatorContext context;
|
||||
|
||||
StatsErrorEstimator statsErrorEstimator;
|
||||
|
||||
public PhysicalPlanTranslator() {
|
||||
}
|
||||
|
||||
|
||||
@ -23,6 +23,7 @@ import org.apache.doris.common.util.ProfileManager;
|
||||
import org.apache.doris.nereids.trees.plans.AbstractPlan;
|
||||
import org.apache.doris.persist.gson.GsonUtils;
|
||||
import org.apache.doris.planner.PlanNode;
|
||||
import org.apache.doris.planner.PlanNodeId;
|
||||
import org.apache.doris.statistics.Statistics;
|
||||
import org.apache.doris.thrift.TReportExecStatusParams;
|
||||
import org.apache.doris.thrift.TRuntimeProfileNode;
|
||||
@ -142,4 +143,9 @@ public class StatsErrorEstimator {
|
||||
public String toJson() {
|
||||
return GsonUtils.GSON.toJson(this);
|
||||
}
|
||||
|
||||
// For test only.
|
||||
public void setExactReturnedRow(PlanNodeId planNodeId, Double d) {
|
||||
legacyPlanIdStats.get(planNodeId.asInt()).second += d;
|
||||
}
|
||||
}
|
||||
|
||||
@ -179,10 +179,6 @@ public class StatisticsUtil {
|
||||
return new DateLiteral(columnValue, type);
|
||||
case CHAR:
|
||||
case VARCHAR:
|
||||
if (columnValue.length() > scalarType.getLength()) {
|
||||
throw new AnalysisException("Min/Max value is longer than length of column type: "
|
||||
+ columnValue);
|
||||
}
|
||||
return new StringLiteral(columnValue);
|
||||
case HLL:
|
||||
case BITMAP:
|
||||
|
||||
@ -19,8 +19,38 @@ package org.apache.doris.nereids.datasets.tpch;
|
||||
|
||||
import org.apache.doris.utframe.TestWithFeService;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
public class TPCHUtils {
|
||||
|
||||
public static final List<String> SQLS = new ArrayList<String>() {
|
||||
{
|
||||
add(Q1);
|
||||
add(Q2);
|
||||
add(Q3);
|
||||
add(Q4);
|
||||
add(Q5);
|
||||
add(Q6);
|
||||
add(Q7);
|
||||
add(Q8);
|
||||
add(Q9);
|
||||
add(Q10);
|
||||
add(Q11);
|
||||
add(Q12);
|
||||
add(Q13);
|
||||
add(Q14);
|
||||
add(Q15);
|
||||
add(Q16);
|
||||
add(Q17);
|
||||
add(Q18);
|
||||
add(Q19);
|
||||
add(Q20);
|
||||
add(Q21);
|
||||
add(Q22);
|
||||
}
|
||||
};
|
||||
|
||||
public static final String Q1 = "select\n"
|
||||
+ " l_returnflag,\n"
|
||||
+ " l_linestatus,\n"
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,151 @@
|
||||
// Licensed to the Apache Software Foundation (ASF) under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing,
|
||||
// software distributed under the License is distributed on an
|
||||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
package org.apache.doris.nereids.stats;
|
||||
|
||||
import org.apache.doris.catalog.Column;
|
||||
import org.apache.doris.catalog.PrimitiveType;
|
||||
import org.apache.doris.catalog.Type;
|
||||
import org.apache.doris.nereids.NereidsPlanner;
|
||||
import org.apache.doris.nereids.StatementContext;
|
||||
import org.apache.doris.nereids.datasets.tpch.TPCHUtils;
|
||||
import org.apache.doris.nereids.parser.NereidsParser;
|
||||
import org.apache.doris.planner.PlanNodeId;
|
||||
import org.apache.doris.qe.OriginStatement;
|
||||
import org.apache.doris.statistics.ColumnLevelStatisticCache;
|
||||
import org.apache.doris.statistics.StatisticsCache;
|
||||
import org.apache.doris.statistics.util.InternalQueryResult.ResultRow;
|
||||
import org.apache.doris.statistics.util.StatisticsUtil;
|
||||
import org.apache.doris.utframe.TestWithFeService;
|
||||
|
||||
import mockit.Mock;
|
||||
import mockit.MockUp;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Map.Entry;
|
||||
|
||||
// Assume that all column name is unique in the tested database
|
||||
// CHECKSTYLE OFF
|
||||
public abstract class TestStats extends TestWithFeService {
|
||||
|
||||
protected Map<String/*colname*/, ColumnLevelStatisticCache> stats = new HashMap<>();
|
||||
|
||||
protected List<String> cols = new ArrayList<String>() {{
|
||||
add("id");
|
||||
add("catalog_id");
|
||||
add("db_id");
|
||||
add("tbl_id");
|
||||
add("idx_id");
|
||||
add("col_id");
|
||||
add("part_id");
|
||||
add("count");
|
||||
add("ndv");
|
||||
add("null_count");
|
||||
add("min");
|
||||
add("max");
|
||||
add("data_size_in_bytes");
|
||||
add("update_time");
|
||||
}};
|
||||
|
||||
protected List<PrimitiveType> types = new ArrayList<PrimitiveType>() {{
|
||||
add(PrimitiveType.VARCHAR);
|
||||
add(PrimitiveType.VARCHAR);
|
||||
add(PrimitiveType.VARCHAR);
|
||||
add(PrimitiveType.VARCHAR);
|
||||
add(PrimitiveType.VARCHAR);
|
||||
add(PrimitiveType.VARCHAR);
|
||||
add(PrimitiveType.VARCHAR);
|
||||
add(PrimitiveType.BIGINT);
|
||||
add(PrimitiveType.BIGINT);
|
||||
add(PrimitiveType.BIGINT);
|
||||
add(PrimitiveType.VARCHAR);
|
||||
add(PrimitiveType.VARCHAR);
|
||||
add(PrimitiveType.BIGINT);
|
||||
add(PrimitiveType.DATETIME);
|
||||
}};
|
||||
|
||||
protected List<String> values = new ArrayList<>();
|
||||
|
||||
protected ResultRow resultRow = null;
|
||||
|
||||
protected final static Map<String, Type> colType = new HashMap<>();
|
||||
|
||||
protected abstract void initMockedColumnsStats();
|
||||
|
||||
protected abstract void initQError();
|
||||
|
||||
|
||||
protected abstract void initMockedReturnedRows();
|
||||
|
||||
protected abstract void initEnv() throws Exception;
|
||||
|
||||
protected abstract void initColNameToType();
|
||||
|
||||
protected Map<Integer/*query id*/, Map<PlanNodeId, Double>> mockedExactReturnedRows = new HashMap<>();
|
||||
protected Map<Integer, Double> queryIdToQError = new HashMap<>();
|
||||
|
||||
protected double avgQError;
|
||||
|
||||
|
||||
public void run() throws Exception {
|
||||
new MockUp<StatisticsUtil>() {
|
||||
|
||||
@Mock
|
||||
public Column findColumn(long catalogId, long dbId, long tblId, long idxId, String columnName) {
|
||||
return new Column(columnName, colType.get(columnName));
|
||||
}
|
||||
};
|
||||
initMockedReturnedRows();
|
||||
initColNameToType();
|
||||
initMockedColumnsStats();
|
||||
new MockUp<StatisticsCache>() {
|
||||
@Mock
|
||||
public ColumnLevelStatisticCache getColumnStatistics(long tblId, long idxId, String colName) {
|
||||
return stats.get(colName);
|
||||
}
|
||||
};
|
||||
|
||||
connectContext.getSessionVariable().setEnableNereidsPlanner(true);
|
||||
connectContext.getSessionVariable().enableFallbackToOriginalPlanner = false;
|
||||
StatsErrorEstimator statsErrorEstimator = new StatsErrorEstimator();
|
||||
connectContext.setStatsErrorEstimator(statsErrorEstimator);
|
||||
List<Double> qErrorList = new ArrayList<>();
|
||||
initEnv();
|
||||
for (int i = 0; i < TPCHUtils.SQLS.size(); i++) {
|
||||
String sql = TPCHUtils.SQLS.get(i);
|
||||
int sqlNumber = i + 1;
|
||||
NereidsPlanner nereidsPlanner = new NereidsPlanner(
|
||||
new StatementContext(connectContext, new OriginStatement(sql, 0)));
|
||||
NereidsParser nereidsParser = new NereidsParser();
|
||||
nereidsPlanner.plan(nereidsParser.parseSQL(sql).get(0));
|
||||
Map<PlanNodeId, Double> extractReturnedRows = mockedExactReturnedRows.get(sqlNumber);
|
||||
for (Entry<PlanNodeId, Double> entry : extractReturnedRows.entrySet()) {
|
||||
// statsErrorEstimator.setExactReturnedRow(entry.getKey(), entry.getValue());
|
||||
}
|
||||
qErrorList.add(statsErrorEstimator.calculateQError());
|
||||
statsErrorEstimator = new StatsErrorEstimator();
|
||||
connectContext.setStatsErrorEstimator(statsErrorEstimator);
|
||||
}
|
||||
// Assert.assertTrue(
|
||||
// qErrorList.stream()
|
||||
// .mapToDouble(Double::doubleValue).average().orElseGet(() -> Double.POSITIVE_INFINITY)
|
||||
// <= avgQError + 1);
|
||||
}
|
||||
}
|
||||
@ -29,6 +29,7 @@ import org.apache.doris.analysis.CreateSqlBlockRuleStmt;
|
||||
import org.apache.doris.analysis.CreateTableAsSelectStmt;
|
||||
import org.apache.doris.analysis.CreateTableStmt;
|
||||
import org.apache.doris.analysis.CreateViewStmt;
|
||||
import org.apache.doris.analysis.DropDbStmt;
|
||||
import org.apache.doris.analysis.DropPolicyStmt;
|
||||
import org.apache.doris.analysis.DropSqlBlockRuleStmt;
|
||||
import org.apache.doris.analysis.DropTableStmt;
|
||||
@ -485,6 +486,12 @@ public abstract class TestWithFeService {
|
||||
Env.getCurrentEnv().createDb(createDbStmt);
|
||||
}
|
||||
|
||||
public void dropDatabase(String db) throws Exception {
|
||||
String createDbStmtStr = "DROP DATABASE " + db;
|
||||
DropDbStmt createDbStmt = (DropDbStmt) parseAndAnalyzeStmt(createDbStmtStr);
|
||||
Env.getCurrentEnv().dropDb(createDbStmt);
|
||||
}
|
||||
|
||||
public void useDatabase(String dbName) {
|
||||
connectContext.setDatabase(ClusterNamespace.getFullName(SystemInfoService.DEFAULT_CLUSTER, dbName));
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user