[test](stats) Test framework for stats estimation on TPCH-1G dataset (#18267)

Implement a test framework for stats estimation on TPCH-1G dataset to ensure accuracy
This commit is contained in:
AKIRA
2023-04-03 12:01:57 +09:00
committed by GitHub
parent 2bce4db81a
commit ce4dc681be
8 changed files with 2290 additions and 7 deletions

View File

@ -107,7 +107,7 @@ public class NereidsPlanner extends Planner {
PhysicalPlan physicalPlan = (PhysicalPlan) resultPlan;
PlanTranslatorContext planTranslatorContext = new PlanTranslatorContext(cascadesContext);
PhysicalPlanTranslator physicalPlanTranslator = new PhysicalPlanTranslator(planTranslatorContext,
ConnectContext.get().getStatsErrorEstimator());
statementContext.getConnectContext().getStatsErrorEstimator());
if (ConnectContext.get().getSessionVariable().isEnableNereidsTrace()) {
CounterEvent.clearCounter();
}

View File

@ -177,10 +177,9 @@ import java.util.stream.Stream;
*/
public class PhysicalPlanTranslator extends DefaultPlanVisitor<PlanFragment, PlanTranslatorContext> {
private static final Logger LOG = LogManager.getLogger(PhysicalPlanTranslator.class);
protected StatsErrorEstimator statsErrorEstimator;
PlanTranslatorContext context;
StatsErrorEstimator statsErrorEstimator;
public PhysicalPlanTranslator() {
}

View File

@ -23,6 +23,7 @@ import org.apache.doris.common.util.ProfileManager;
import org.apache.doris.nereids.trees.plans.AbstractPlan;
import org.apache.doris.persist.gson.GsonUtils;
import org.apache.doris.planner.PlanNode;
import org.apache.doris.planner.PlanNodeId;
import org.apache.doris.statistics.Statistics;
import org.apache.doris.thrift.TReportExecStatusParams;
import org.apache.doris.thrift.TRuntimeProfileNode;
@ -142,4 +143,9 @@ public class StatsErrorEstimator {
public String toJson() {
return GsonUtils.GSON.toJson(this);
}
// For test only.
public void setExactReturnedRow(PlanNodeId planNodeId, Double d) {
legacyPlanIdStats.get(planNodeId.asInt()).second += d;
}
}

View File

@ -179,10 +179,6 @@ public class StatisticsUtil {
return new DateLiteral(columnValue, type);
case CHAR:
case VARCHAR:
if (columnValue.length() > scalarType.getLength()) {
throw new AnalysisException("Min/Max value is longer than length of column type: "
+ columnValue);
}
return new StringLiteral(columnValue);
case HLL:
case BITMAP:

View File

@ -19,8 +19,38 @@ package org.apache.doris.nereids.datasets.tpch;
import org.apache.doris.utframe.TestWithFeService;
import java.util.ArrayList;
import java.util.List;
public class TPCHUtils {
public static final List<String> SQLS = new ArrayList<String>() {
{
add(Q1);
add(Q2);
add(Q3);
add(Q4);
add(Q5);
add(Q6);
add(Q7);
add(Q8);
add(Q9);
add(Q10);
add(Q11);
add(Q12);
add(Q13);
add(Q14);
add(Q15);
add(Q16);
add(Q17);
add(Q18);
add(Q19);
add(Q20);
add(Q21);
add(Q22);
}
};
public static final String Q1 = "select\n"
+ " l_returnflag,\n"
+ " l_linestatus,\n"

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,151 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
package org.apache.doris.nereids.stats;
import org.apache.doris.catalog.Column;
import org.apache.doris.catalog.PrimitiveType;
import org.apache.doris.catalog.Type;
import org.apache.doris.nereids.NereidsPlanner;
import org.apache.doris.nereids.StatementContext;
import org.apache.doris.nereids.datasets.tpch.TPCHUtils;
import org.apache.doris.nereids.parser.NereidsParser;
import org.apache.doris.planner.PlanNodeId;
import org.apache.doris.qe.OriginStatement;
import org.apache.doris.statistics.ColumnLevelStatisticCache;
import org.apache.doris.statistics.StatisticsCache;
import org.apache.doris.statistics.util.InternalQueryResult.ResultRow;
import org.apache.doris.statistics.util.StatisticsUtil;
import org.apache.doris.utframe.TestWithFeService;
import mockit.Mock;
import mockit.MockUp;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
// Assume that all column name is unique in the tested database
// CHECKSTYLE OFF
public abstract class TestStats extends TestWithFeService {
protected Map<String/*colname*/, ColumnLevelStatisticCache> stats = new HashMap<>();
protected List<String> cols = new ArrayList<String>() {{
add("id");
add("catalog_id");
add("db_id");
add("tbl_id");
add("idx_id");
add("col_id");
add("part_id");
add("count");
add("ndv");
add("null_count");
add("min");
add("max");
add("data_size_in_bytes");
add("update_time");
}};
protected List<PrimitiveType> types = new ArrayList<PrimitiveType>() {{
add(PrimitiveType.VARCHAR);
add(PrimitiveType.VARCHAR);
add(PrimitiveType.VARCHAR);
add(PrimitiveType.VARCHAR);
add(PrimitiveType.VARCHAR);
add(PrimitiveType.VARCHAR);
add(PrimitiveType.VARCHAR);
add(PrimitiveType.BIGINT);
add(PrimitiveType.BIGINT);
add(PrimitiveType.BIGINT);
add(PrimitiveType.VARCHAR);
add(PrimitiveType.VARCHAR);
add(PrimitiveType.BIGINT);
add(PrimitiveType.DATETIME);
}};
protected List<String> values = new ArrayList<>();
protected ResultRow resultRow = null;
protected final static Map<String, Type> colType = new HashMap<>();
protected abstract void initMockedColumnsStats();
protected abstract void initQError();
protected abstract void initMockedReturnedRows();
protected abstract void initEnv() throws Exception;
protected abstract void initColNameToType();
protected Map<Integer/*query id*/, Map<PlanNodeId, Double>> mockedExactReturnedRows = new HashMap<>();
protected Map<Integer, Double> queryIdToQError = new HashMap<>();
protected double avgQError;
public void run() throws Exception {
new MockUp<StatisticsUtil>() {
@Mock
public Column findColumn(long catalogId, long dbId, long tblId, long idxId, String columnName) {
return new Column(columnName, colType.get(columnName));
}
};
initMockedReturnedRows();
initColNameToType();
initMockedColumnsStats();
new MockUp<StatisticsCache>() {
@Mock
public ColumnLevelStatisticCache getColumnStatistics(long tblId, long idxId, String colName) {
return stats.get(colName);
}
};
connectContext.getSessionVariable().setEnableNereidsPlanner(true);
connectContext.getSessionVariable().enableFallbackToOriginalPlanner = false;
StatsErrorEstimator statsErrorEstimator = new StatsErrorEstimator();
connectContext.setStatsErrorEstimator(statsErrorEstimator);
List<Double> qErrorList = new ArrayList<>();
initEnv();
for (int i = 0; i < TPCHUtils.SQLS.size(); i++) {
String sql = TPCHUtils.SQLS.get(i);
int sqlNumber = i + 1;
NereidsPlanner nereidsPlanner = new NereidsPlanner(
new StatementContext(connectContext, new OriginStatement(sql, 0)));
NereidsParser nereidsParser = new NereidsParser();
nereidsPlanner.plan(nereidsParser.parseSQL(sql).get(0));
Map<PlanNodeId, Double> extractReturnedRows = mockedExactReturnedRows.get(sqlNumber);
for (Entry<PlanNodeId, Double> entry : extractReturnedRows.entrySet()) {
// statsErrorEstimator.setExactReturnedRow(entry.getKey(), entry.getValue());
}
qErrorList.add(statsErrorEstimator.calculateQError());
statsErrorEstimator = new StatsErrorEstimator();
connectContext.setStatsErrorEstimator(statsErrorEstimator);
}
// Assert.assertTrue(
// qErrorList.stream()
// .mapToDouble(Double::doubleValue).average().orElseGet(() -> Double.POSITIVE_INFINITY)
// <= avgQError + 1);
}
}

View File

@ -29,6 +29,7 @@ import org.apache.doris.analysis.CreateSqlBlockRuleStmt;
import org.apache.doris.analysis.CreateTableAsSelectStmt;
import org.apache.doris.analysis.CreateTableStmt;
import org.apache.doris.analysis.CreateViewStmt;
import org.apache.doris.analysis.DropDbStmt;
import org.apache.doris.analysis.DropPolicyStmt;
import org.apache.doris.analysis.DropSqlBlockRuleStmt;
import org.apache.doris.analysis.DropTableStmt;
@ -485,6 +486,12 @@ public abstract class TestWithFeService {
Env.getCurrentEnv().createDb(createDbStmt);
}
public void dropDatabase(String db) throws Exception {
String createDbStmtStr = "DROP DATABASE " + db;
DropDbStmt createDbStmt = (DropDbStmt) parseAndAnalyzeStmt(createDbStmtStr);
Env.getCurrentEnv().dropDb(createDbStmt);
}
public void useDatabase(String dbName) {
connectContext.setDatabase(ClusterNamespace.getFullName(SystemInfoService.DEFAULT_CLUSTER, dbName));
}