[enhancement](histogram) add histogram syntax and perstist histogram statistics (#15490)
Histogram statistics are more expensive to collect and we collect and persist them separately. This PR does the following work: 1. Add histogram syntax and add keyword `TABLE` 2. Add the task of collecting histogram statistics 3. Persistent histogram statistics 4. Replace fastjson with gson 5. Add unit tests... Relevant syntax examples: > Refer to some databases such as mysql and add the keyword `TABLE`. ```SQL -- collect column statistics ANALYZE TABLE statistics_test; -- collect histogram statistics ANALYZE TABLE statistics_test UPDATE HISTOGRAM ON col1,col2; ``` base on #15317
This commit is contained in:
@ -615,7 +615,8 @@ terminal String
|
||||
KW_WRITE,
|
||||
KW_YEAR,
|
||||
KW_MTMV,
|
||||
KW_TYPECAST;
|
||||
KW_TYPECAST,
|
||||
KW_HISTOGRAM;
|
||||
|
||||
terminal COMMA, COLON, DOT, DOTDOTDOT, AT, STAR, LPAREN, RPAREN, SEMICOLON, LBRACKET, RBRACKET, DIVIDE, MOD, ADD, SUBTRACT;
|
||||
terminal BITAND, BITOR, BITXOR, BITNOT;
|
||||
@ -2648,10 +2649,14 @@ show_create_routine_load_stmt ::=
|
||||
|
||||
// analyze statment
|
||||
analyze_stmt ::=
|
||||
KW_ANALYZE table_name:tbl opt_col_list:cols opt_partition_names:partitionNames opt_properties:properties
|
||||
KW_ANALYZE KW_TABLE table_name:tbl opt_col_list:cols opt_partition_names:partitionNames opt_properties:properties
|
||||
{:
|
||||
RESULT = new AnalyzeStmt(tbl, cols, partitionNames, properties);
|
||||
:}
|
||||
| KW_ANALYZE KW_TABLE table_name:tbl KW_UPDATE KW_HISTOGRAM KW_ON ident_list:cols opt_properties:properties
|
||||
{:
|
||||
RESULT = new AnalyzeStmt(tbl, cols, properties);
|
||||
:}
|
||||
;
|
||||
|
||||
// Grant statement
|
||||
|
||||
@ -62,7 +62,6 @@ public class AlterColumnStatsStmt extends DdlStmt {
|
||||
.add(ColumnStatistic.NUM_NULLS)
|
||||
.add(ColumnStatistic.MIN_VALUE)
|
||||
.add(ColumnStatistic.MAX_VALUE)
|
||||
.add(ColumnStatistic.HISTOGRAM)
|
||||
.add(StatsType.DATA_SIZE)
|
||||
.build();
|
||||
|
||||
|
||||
@ -64,6 +64,8 @@ public class AnalyzeStmt extends DdlStmt {
|
||||
// time to wait for collect statistics
|
||||
public static final String CBO_STATISTICS_TASK_TIMEOUT_SEC = "cbo_statistics_task_timeout_sec";
|
||||
|
||||
public boolean isHistogram = false;
|
||||
|
||||
private static final ImmutableSet<String> PROPERTIES_SET = new ImmutableSet.Builder<String>()
|
||||
.add(CBO_STATISTICS_TASK_TIMEOUT_SEC)
|
||||
.build();
|
||||
@ -76,7 +78,7 @@ public class AnalyzeStmt extends DdlStmt {
|
||||
|
||||
private TableIf table;
|
||||
|
||||
private final PartitionNames optPartitionNames;
|
||||
private PartitionNames optPartitionNames;
|
||||
private List<String> optColumnNames;
|
||||
private Map<String, String> optProperties;
|
||||
|
||||
@ -85,6 +87,16 @@ public class AnalyzeStmt extends DdlStmt {
|
||||
|
||||
private final List<String> partitionNames = Lists.newArrayList();
|
||||
|
||||
public AnalyzeStmt(TableName tableName,
|
||||
List<String> optColumnNames,
|
||||
Map<String, String> optProperties) {
|
||||
this.tableName = tableName;
|
||||
this.optColumnNames = optColumnNames;
|
||||
wholeTbl = CollectionUtils.isEmpty(optColumnNames);
|
||||
isHistogram = true;
|
||||
this.optProperties = optProperties;
|
||||
}
|
||||
|
||||
public AnalyzeStmt(TableName tableName,
|
||||
List<String> optColumnNames,
|
||||
PartitionNames optPartitionNames,
|
||||
|
||||
@ -2612,6 +2612,8 @@ public class FunctionSet<T> {
|
||||
"", "", "", "", "", true, false, true, true));
|
||||
addBuiltin(AggregateFunction.createBuiltin(HISTOGRAM, Lists.newArrayList(t, Type.DOUBLE, Type.INT), Type.VARCHAR, t,
|
||||
"", "", "", "", "", true, false, true, true));
|
||||
addBuiltin(AggregateFunction.createBuiltin(HISTOGRAM, Lists.newArrayList(t, Type.DOUBLE, Type.INT), Type.VARCHAR, t,
|
||||
"", "", "", "", "", true, false, true, true));
|
||||
}
|
||||
|
||||
// Avg
|
||||
|
||||
@ -79,6 +79,7 @@ public class InternalSchemaInitializer extends Thread {
|
||||
|
||||
private void createTbl() throws UserException {
|
||||
Env.getCurrentEnv().getInternalCatalog().createTable(buildStatisticsTblStmt());
|
||||
Env.getCurrentEnv().getInternalCatalog().createTable(buildHistogramTblStmt());
|
||||
Env.getCurrentEnv().getInternalCatalog().createTable(buildAnalysisJobTblStmt());
|
||||
}
|
||||
|
||||
@ -115,7 +116,6 @@ public class InternalSchemaInitializer extends Thread {
|
||||
columnDefs.add(new ColumnDef("null_count", TypeDef.create(PrimitiveType.BIGINT)));
|
||||
columnDefs.add(new ColumnDef("min", TypeDef.createVarchar(ScalarType.MAX_VARCHAR_LENGTH)));
|
||||
columnDefs.add(new ColumnDef("max", TypeDef.createVarchar(ScalarType.MAX_VARCHAR_LENGTH)));
|
||||
columnDefs.add(new ColumnDef("histogram", TypeDef.createVarchar(ScalarType.MAX_VARCHAR_LENGTH)));
|
||||
columnDefs.add(new ColumnDef("data_size_in_bytes", TypeDef.create(PrimitiveType.BIGINT)));
|
||||
columnDefs.add(new ColumnDef("update_time", TypeDef.create(PrimitiveType.DATETIME)));
|
||||
String engineName = "olap";
|
||||
@ -138,6 +138,39 @@ public class InternalSchemaInitializer extends Thread {
|
||||
return createTableStmt;
|
||||
}
|
||||
|
||||
@VisibleForTesting
|
||||
public CreateTableStmt buildHistogramTblStmt() throws UserException {
|
||||
TableName tableName = new TableName("",
|
||||
FeConstants.INTERNAL_DB_NAME, StatisticConstants.HISTOGRAM_TBL_NAME);
|
||||
List<ColumnDef> columnDefs = new ArrayList<>();
|
||||
columnDefs.add(new ColumnDef("id", TypeDef.createVarchar(StatisticConstants.ID_LEN)));
|
||||
columnDefs.add(new ColumnDef("catalog_id", TypeDef.createVarchar(StatisticConstants.MAX_NAME_LEN)));
|
||||
columnDefs.add(new ColumnDef("db_id", TypeDef.createVarchar(StatisticConstants.MAX_NAME_LEN)));
|
||||
columnDefs.add(new ColumnDef("tbl_id", TypeDef.createVarchar(StatisticConstants.MAX_NAME_LEN)));
|
||||
columnDefs.add(new ColumnDef("idx_id", TypeDef.createVarchar(StatisticConstants.MAX_NAME_LEN)));
|
||||
columnDefs.add(new ColumnDef("col_id", TypeDef.createVarchar(StatisticConstants.MAX_NAME_LEN)));
|
||||
columnDefs.add(new ColumnDef("sample_rate", TypeDef.create(PrimitiveType.DOUBLE)));
|
||||
columnDefs.add(new ColumnDef("buckets", TypeDef.createVarchar(ScalarType.MAX_VARCHAR_LENGTH)));
|
||||
columnDefs.add(new ColumnDef("update_time", TypeDef.create(PrimitiveType.DATETIME)));
|
||||
String engineName = "olap";
|
||||
KeysDesc keysDesc = new KeysDesc(KeysType.UNIQUE_KEYS,
|
||||
Lists.newArrayList("id"));
|
||||
DistributionDesc distributionDesc = new HashDistributionDesc(
|
||||
StatisticConstants.STATISTIC_TABLE_BUCKET_COUNT,
|
||||
Lists.newArrayList("id"));
|
||||
Map<String, String> properties = new HashMap<String, String>() {
|
||||
{
|
||||
put("replication_num", String.valueOf(Config.statistic_internal_table_replica_num));
|
||||
}
|
||||
};
|
||||
CreateTableStmt createTableStmt = new CreateTableStmt(true, false,
|
||||
tableName, columnDefs, engineName, keysDesc, null, distributionDesc,
|
||||
properties, null, "Doris internal statistics table, don't modify it", null);
|
||||
StatisticsUtil.analyze(createTableStmt);
|
||||
// createTableStmt.setClusterName(SystemInfoService.DEFAULT_CLUSTER);
|
||||
return createTableStmt;
|
||||
}
|
||||
|
||||
@VisibleForTesting
|
||||
public CreateTableStmt buildAnalysisJobTblStmt() throws UserException {
|
||||
TableName tableName = new TableName("",
|
||||
|
||||
@ -51,6 +51,7 @@ import org.apache.doris.statistics.AnalysisTaskInfo;
|
||||
import org.apache.doris.statistics.AnalysisTaskInfo.AnalysisType;
|
||||
import org.apache.doris.statistics.AnalysisTaskScheduler;
|
||||
import org.apache.doris.statistics.BaseAnalysisTask;
|
||||
import org.apache.doris.statistics.HistogramTask;
|
||||
import org.apache.doris.statistics.MVAnalysisTask;
|
||||
import org.apache.doris.statistics.OlapAnalysisTask;
|
||||
import org.apache.doris.system.Backend;
|
||||
@ -1005,6 +1006,9 @@ public class OlapTable extends Table {
|
||||
|
||||
@Override
|
||||
public BaseAnalysisTask createAnalysisTask(AnalysisTaskScheduler scheduler, AnalysisTaskInfo info) {
|
||||
if (info.analysisType.equals(AnalysisType.HISTOGRAM)) {
|
||||
return new HistogramTask(scheduler, info);
|
||||
}
|
||||
if (info.analysisType.equals(AnalysisType.COLUMN)) {
|
||||
return new OlapAnalysisTask(scheduler, info);
|
||||
}
|
||||
|
||||
@ -290,6 +290,7 @@ public class StatsCalculator extends DefaultPlanVisitor<StatsDeriveResult, Void>
|
||||
return computeTopN(topN);
|
||||
}
|
||||
|
||||
@Override
|
||||
public StatsDeriveResult visitPhysicalLocalQuickSort(PhysicalLocalQuickSort<? extends Plan> sort, Void context) {
|
||||
return groupExpression.childStatistics(0);
|
||||
}
|
||||
@ -447,7 +448,6 @@ public class StatsCalculator extends DefaultPlanVisitor<StatsDeriveResult, Void>
|
||||
stats.dataSize < 0 ? stats.dataSize : stats.dataSize * groupingSetNum,
|
||||
stats.minValue,
|
||||
stats.maxValue,
|
||||
stats.histogram,
|
||||
stats.selectivity,
|
||||
stats.minExpr,
|
||||
stats.maxExpr,
|
||||
@ -538,7 +538,6 @@ public class StatsCalculator extends DefaultPlanVisitor<StatsDeriveResult, Void>
|
||||
leftStats.dataSize + rightStats.dataSize,
|
||||
Math.min(leftStats.minValue, rightStats.minValue),
|
||||
Math.max(leftStats.maxValue, rightStats.maxValue),
|
||||
null,
|
||||
1.0 / (leftStats.ndv + rightStats.ndv),
|
||||
leftStats.minExpr,
|
||||
leftStats.maxExpr,
|
||||
|
||||
@ -71,7 +71,7 @@ public class Numbers extends TableValuedFunction {
|
||||
|
||||
Map<Id, ColumnStatistic> columnToStatistics = Maps.newHashMap();
|
||||
ColumnStatistic columnStat = new ColumnStatistic(rowNum, rowNum, 8, 0, 8, 0, rowNum - 1,
|
||||
null, 1.0 / rowNum, new IntLiteral(0, Type.BIGINT), new IntLiteral(rowNum - 1, Type.BIGINT), false);
|
||||
1.0 / rowNum, new IntLiteral(0, Type.BIGINT), new IntLiteral(rowNum - 1, Type.BIGINT), false);
|
||||
columnToStatistics.put(slots.get(0).getExprId(), columnStat);
|
||||
return new StatsDeriveResult(rowNum, columnToStatistics);
|
||||
} catch (Exception t) {
|
||||
|
||||
@ -43,6 +43,8 @@ import java.util.concurrent.ConcurrentMap;
|
||||
|
||||
public class AnalysisManager {
|
||||
|
||||
public final AnalysisTaskScheduler taskScheduler;
|
||||
|
||||
private static final Logger LOG = LogManager.getLogger(AnalysisManager.class);
|
||||
|
||||
private static final String UPDATE_JOB_STATE_SQL_TEMPLATE = "UPDATE "
|
||||
@ -51,8 +53,6 @@ public class AnalysisManager {
|
||||
|
||||
private final ConcurrentMap<Long, Map<Long, AnalysisTaskInfo>> analysisJobIdToTaskMap;
|
||||
|
||||
public final AnalysisTaskScheduler taskScheduler;
|
||||
|
||||
private StatisticsCache statisticsCache;
|
||||
|
||||
private final AnalysisTaskExecutor taskExecutor;
|
||||
@ -76,10 +76,11 @@ public class AnalysisManager {
|
||||
if (colNames != null) {
|
||||
for (String colName : colNames) {
|
||||
long taskId = Env.getCurrentEnv().getNextId();
|
||||
AnalysisType analType = analyzeStmt.isHistogram ? AnalysisType.HISTOGRAM : AnalysisType.COLUMN;
|
||||
AnalysisTaskInfo analysisTaskInfo = new AnalysisTaskInfoBuilder().setJobId(jobId)
|
||||
.setTaskId(taskId).setCatalogName(catalogName).setDbName(db)
|
||||
.setTblName(tbl.getTbl()).setColName(colName).setJobType(JobType.MANUAL)
|
||||
.setAnalysisMethod(AnalysisMethod.FULL).setAnalysisType(AnalysisType.COLUMN)
|
||||
.setAnalysisMethod(AnalysisMethod.FULL).setAnalysisType(analType)
|
||||
.setState(AnalysisState.PENDING)
|
||||
.setScheduleType(ScheduleType.ONCE).build();
|
||||
try {
|
||||
|
||||
@ -34,7 +34,8 @@ public class AnalysisTaskInfo {
|
||||
|
||||
public enum AnalysisType {
|
||||
COLUMN,
|
||||
INDEX
|
||||
INDEX,
|
||||
HISTOGRAM
|
||||
}
|
||||
|
||||
public enum JobType {
|
||||
@ -69,6 +70,10 @@ public class AnalysisTaskInfo {
|
||||
|
||||
public final AnalysisType analysisType;
|
||||
|
||||
// TODO: define constants or get them from configuration properties
|
||||
public final double sampleRate = 0.2;
|
||||
public final int maxBucketNum = 128;
|
||||
|
||||
public String message;
|
||||
|
||||
// finished or failed
|
||||
|
||||
@ -26,9 +26,13 @@ import org.apache.doris.qe.StmtExecutor;
|
||||
import org.apache.doris.statistics.AnalysisTaskInfo.AnalysisType;
|
||||
|
||||
import com.google.common.annotations.VisibleForTesting;
|
||||
import org.apache.logging.log4j.LogManager;
|
||||
import org.apache.logging.log4j.Logger;
|
||||
|
||||
public abstract class BaseAnalysisTask {
|
||||
|
||||
public static final Logger LOG = LogManager.getLogger(BaseAnalysisTask.class);
|
||||
|
||||
protected static final String INSERT_PART_STATISTICS = "INSERT INTO "
|
||||
+ "${internalDB}.${columnStatTbl}"
|
||||
+ " SELECT "
|
||||
@ -119,7 +123,8 @@ public abstract class BaseAnalysisTask {
|
||||
info, AnalysisState.FAILED,
|
||||
String.format("Table with name %s not exists", info.tblName), System.currentTimeMillis());
|
||||
}
|
||||
if (info.analysisType != null && info.analysisType.equals(AnalysisType.COLUMN)) {
|
||||
if (info.analysisType != null && (info.analysisType.equals(AnalysisType.COLUMN)
|
||||
|| info.analysisType.equals(AnalysisType.HISTOGRAM))) {
|
||||
col = tbl.getColumn(info.colName);
|
||||
if (col == null) {
|
||||
Env.getCurrentEnv().getAnalysisManager().updateTaskStatus(
|
||||
|
||||
@ -18,6 +18,15 @@
|
||||
package org.apache.doris.statistics;
|
||||
|
||||
import org.apache.doris.analysis.LiteralExpr;
|
||||
import org.apache.doris.catalog.Type;
|
||||
import org.apache.doris.common.AnalysisException;
|
||||
import org.apache.doris.statistics.util.StatisticsUtil;
|
||||
|
||||
import com.google.common.collect.Lists;
|
||||
import com.google.gson.JsonArray;
|
||||
import com.google.gson.JsonObject;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
public class Bucket {
|
||||
public LiteralExpr lower;
|
||||
@ -65,4 +74,20 @@ public class Bucket {
|
||||
public void setNdv(int ndv) {
|
||||
this.ndv = ndv;
|
||||
}
|
||||
|
||||
public static List<Bucket> deserializeFromjson(Type datatype, JsonArray jsonArray)
|
||||
throws AnalysisException {
|
||||
List<Bucket> buckets = Lists.newArrayList();
|
||||
for (int i = 0; i < jsonArray.size(); i++) {
|
||||
JsonObject bucketJson = jsonArray.get(i).getAsJsonObject();
|
||||
Bucket bucket = new Bucket();
|
||||
bucket.lower = StatisticsUtil.readableValue(datatype, bucketJson.get("lower").getAsString());
|
||||
bucket.upper = StatisticsUtil.readableValue(datatype, bucketJson.get("upper").getAsString());
|
||||
bucket.count = bucketJson.get("count").getAsInt();
|
||||
bucket.preSum = bucketJson.get("pre_sum").getAsInt();
|
||||
bucket.ndv = bucketJson.get("ndv").getAsInt();
|
||||
buckets.add(bucket);
|
||||
}
|
||||
return buckets;
|
||||
}
|
||||
}
|
||||
|
||||
@ -19,12 +19,7 @@ package org.apache.doris.statistics;
|
||||
|
||||
import org.apache.doris.analysis.LiteralExpr;
|
||||
import org.apache.doris.catalog.Column;
|
||||
import org.apache.doris.catalog.DatabaseIf;
|
||||
import org.apache.doris.catalog.Env;
|
||||
import org.apache.doris.catalog.OlapTable;
|
||||
import org.apache.doris.catalog.TableIf;
|
||||
import org.apache.doris.catalog.Type;
|
||||
import org.apache.doris.datasource.CatalogIf;
|
||||
import org.apache.doris.statistics.util.InternalQueryResult.ResultRow;
|
||||
import org.apache.doris.statistics.util.StatisticsUtil;
|
||||
|
||||
@ -42,13 +37,12 @@ public class ColumnStatistic {
|
||||
public static final StatsType NUM_NULLS = StatsType.NUM_NULLS;
|
||||
public static final StatsType MIN_VALUE = StatsType.MIN_VALUE;
|
||||
public static final StatsType MAX_VALUE = StatsType.MAX_VALUE;
|
||||
public static final StatsType HISTOGRAM = StatsType.HISTOGRAM;
|
||||
|
||||
private static final Logger LOG = LogManager.getLogger(ColumnStatistic.class);
|
||||
|
||||
public static ColumnStatistic DEFAULT = new ColumnStatisticBuilder().setAvgSizeByte(1).setNdv(1)
|
||||
.setNumNulls(1).setCount(1).setMaxValue(Double.MAX_VALUE).setMinValue(Double.MIN_VALUE)
|
||||
.setHistogram(Histogram.defaultHistogram()).setSelectivity(1.0).setIsUnknown(true)
|
||||
.setSelectivity(1.0).setIsUnknown(true)
|
||||
.build();
|
||||
|
||||
public static final Set<Type> MAX_MIN_UNSUPPORTED_TYPE = new HashSet<>();
|
||||
@ -68,7 +62,6 @@ public class ColumnStatistic {
|
||||
public final double avgSizeByte;
|
||||
public final double minValue;
|
||||
public final double maxValue;
|
||||
public final Histogram histogram;
|
||||
public final boolean isUnKnown;
|
||||
/*
|
||||
selectivity of Column T1.A:
|
||||
@ -90,8 +83,7 @@ public class ColumnStatistic {
|
||||
|
||||
public ColumnStatistic(double count, double ndv, double avgSizeByte,
|
||||
double numNulls, double dataSize, double minValue, double maxValue,
|
||||
Histogram histogram, double selectivity, LiteralExpr minExpr,
|
||||
LiteralExpr maxExpr, boolean isUnKnown) {
|
||||
double selectivity, LiteralExpr minExpr, LiteralExpr maxExpr, boolean isUnKnown) {
|
||||
this.count = count;
|
||||
this.ndv = ndv;
|
||||
this.avgSizeByte = avgSizeByte;
|
||||
@ -99,7 +91,6 @@ public class ColumnStatistic {
|
||||
this.dataSize = dataSize;
|
||||
this.minValue = minValue;
|
||||
this.maxValue = maxValue;
|
||||
this.histogram = histogram;
|
||||
this.selectivity = selectivity;
|
||||
this.minExpr = minExpr;
|
||||
this.maxExpr = maxExpr;
|
||||
@ -127,7 +118,7 @@ public class ColumnStatistic {
|
||||
long dbID = Long.parseLong(resultRow.getColumnValue("db_id"));
|
||||
long tblId = Long.parseLong(resultRow.getColumnValue("tbl_id"));
|
||||
String colName = resultRow.getColumnValue("col_id");
|
||||
Column col = findColumn(catalogId, dbID, tblId, idxId, colName);
|
||||
Column col = StatisticsUtil.findColumn(catalogId, dbID, tblId, idxId, colName);
|
||||
if (col == null) {
|
||||
LOG.warn("Failed to deserialize column statistics, ctlId: {} dbId: {}"
|
||||
+ "tblId: {} column: {} not exists",
|
||||
@ -136,10 +127,8 @@ public class ColumnStatistic {
|
||||
}
|
||||
String min = resultRow.getColumnValue("min");
|
||||
String max = resultRow.getColumnValue("max");
|
||||
String histogram = resultRow.getColumnValue("histogram");
|
||||
columnStatisticBuilder.setMinValue(StatisticsUtil.convertToDouble(col.getType(), min));
|
||||
columnStatisticBuilder.setMaxValue(StatisticsUtil.convertToDouble(col.getType(), max));
|
||||
columnStatisticBuilder.setHistogram(Histogram.deserializeFromJson(col.getType(), histogram));
|
||||
columnStatisticBuilder.setMaxExpr(StatisticsUtil.readableValue(col.getType(), max));
|
||||
columnStatisticBuilder.setMinExpr(StatisticsUtil.readableValue(col.getType(), min));
|
||||
columnStatisticBuilder.setSelectivity(1.0);
|
||||
@ -158,7 +147,7 @@ public class ColumnStatistic {
|
||||
public ColumnStatistic copy() {
|
||||
return new ColumnStatisticBuilder().setCount(count).setNdv(ndv).setAvgSizeByte(avgSizeByte)
|
||||
.setNumNulls(numNulls).setDataSize(dataSize).setMinValue(minValue)
|
||||
.setMaxValue(maxValue).setHistogram(histogram).setMinExpr(minExpr).setMaxExpr(maxExpr)
|
||||
.setMaxValue(maxValue).setMinExpr(minExpr).setMaxExpr(maxExpr)
|
||||
.setSelectivity(selectivity).setIsUnknown(isUnKnown).build();
|
||||
}
|
||||
|
||||
@ -182,7 +171,6 @@ public class ColumnStatistic {
|
||||
.setDataSize(Math.ceil(dataSize * ratio))
|
||||
.setMinValue(minValue)
|
||||
.setMaxValue(maxValue)
|
||||
.setHistogram(histogram)
|
||||
.setMinExpr(minExpr)
|
||||
.setMaxExpr(maxExpr)
|
||||
.setSelectivity(newSelectivity)
|
||||
@ -194,28 +182,6 @@ public class ColumnStatistic {
|
||||
return Math.max(this.minValue, other.minValue) <= Math.min(this.maxValue, other.maxValue);
|
||||
}
|
||||
|
||||
public static Column findColumn(long catalogId, long dbId, long tblId, long idxId, String columnName) {
|
||||
CatalogIf<DatabaseIf<TableIf>> catalogIf = Env.getCurrentEnv().getCatalogMgr().getCatalog(catalogId);
|
||||
if (catalogIf == null) {
|
||||
return null;
|
||||
}
|
||||
DatabaseIf<TableIf> db = catalogIf.getDb(dbId).orElse(null);
|
||||
if (db == null) {
|
||||
return null;
|
||||
}
|
||||
TableIf tblIf = db.getTable(tblId).orElse(null);
|
||||
if (tblIf == null) {
|
||||
return null;
|
||||
}
|
||||
if (idxId != -1) {
|
||||
if (tblIf instanceof OlapTable) {
|
||||
OlapTable olapTable = (OlapTable) tblIf;
|
||||
return olapTable.getIndexIdToMeta().get(idxId).getColumnByName(columnName);
|
||||
}
|
||||
}
|
||||
return tblIf.getColumn(columnName);
|
||||
}
|
||||
|
||||
public ColumnStatistic updateBySelectivity(double selectivity, double rowCount) {
|
||||
if (isUnKnown) {
|
||||
return DEFAULT;
|
||||
|
||||
@ -27,7 +27,6 @@ public class ColumnStatisticBuilder {
|
||||
private double dataSize;
|
||||
private double minValue;
|
||||
private double maxValue;
|
||||
private Histogram histogram;
|
||||
private double selectivity = 1.0;
|
||||
private LiteralExpr minExpr;
|
||||
private LiteralExpr maxExpr;
|
||||
@ -45,7 +44,6 @@ public class ColumnStatisticBuilder {
|
||||
this.dataSize = columnStatistic.dataSize;
|
||||
this.minValue = columnStatistic.minValue;
|
||||
this.maxValue = columnStatistic.maxValue;
|
||||
this.histogram = columnStatistic.histogram;
|
||||
this.selectivity = columnStatistic.selectivity;
|
||||
this.minExpr = columnStatistic.minExpr;
|
||||
this.maxExpr = columnStatistic.maxExpr;
|
||||
@ -87,11 +85,6 @@ public class ColumnStatisticBuilder {
|
||||
return this;
|
||||
}
|
||||
|
||||
public ColumnStatisticBuilder setHistogram(Histogram histogram) {
|
||||
this.histogram = histogram;
|
||||
return this;
|
||||
}
|
||||
|
||||
public ColumnStatisticBuilder setSelectivity(double selectivity) {
|
||||
this.selectivity = selectivity;
|
||||
return this;
|
||||
@ -140,10 +133,6 @@ public class ColumnStatisticBuilder {
|
||||
return maxValue;
|
||||
}
|
||||
|
||||
public Histogram getHistogram() {
|
||||
return histogram;
|
||||
}
|
||||
|
||||
public double getSelectivity() {
|
||||
return selectivity;
|
||||
}
|
||||
@ -162,6 +151,6 @@ public class ColumnStatisticBuilder {
|
||||
|
||||
public ColumnStatistic build() {
|
||||
return new ColumnStatistic(count, ndv, avgSizeByte, numNulls,
|
||||
dataSize, minValue, maxValue, histogram, selectivity, minExpr, maxExpr, isUnknown);
|
||||
dataSize, minValue, maxValue, selectivity, minExpr, maxExpr, isUnknown);
|
||||
}
|
||||
}
|
||||
|
||||
@ -17,14 +17,15 @@
|
||||
|
||||
package org.apache.doris.statistics;
|
||||
|
||||
import org.apache.doris.catalog.PrimitiveType;
|
||||
import org.apache.doris.catalog.Column;
|
||||
import org.apache.doris.catalog.Type;
|
||||
import org.apache.doris.statistics.util.InternalQueryResult.ResultRow;
|
||||
import org.apache.doris.statistics.util.StatisticsUtil;
|
||||
|
||||
import com.alibaba.fastjson2.JSON;
|
||||
import com.alibaba.fastjson2.JSONArray;
|
||||
import com.alibaba.fastjson2.JSONObject;
|
||||
import com.google.common.collect.Lists;
|
||||
import com.google.gson.JsonArray;
|
||||
import com.google.gson.JsonObject;
|
||||
import com.google.gson.JsonParser;
|
||||
import org.apache.logging.log4j.LogManager;
|
||||
import org.apache.logging.log4j.Logger;
|
||||
import org.apache.parquet.Strings;
|
||||
@ -34,71 +35,71 @@ import java.util.List;
|
||||
public class Histogram {
|
||||
private static final Logger LOG = LogManager.getLogger(Histogram.class);
|
||||
|
||||
private Type dataType;
|
||||
public final Type dataType;
|
||||
|
||||
private int maxBucketSize;
|
||||
private int bucketSize;
|
||||
private float sampleRate;
|
||||
public final int maxBucketNum;
|
||||
|
||||
private List<Bucket> buckets;
|
||||
public final int bucketNum;
|
||||
|
||||
public Histogram(Type dataType) {
|
||||
public final double sampleRate;
|
||||
|
||||
public final List<Bucket> buckets;
|
||||
|
||||
public Histogram(Type dataType, int maxBucketNum, int bucketNum,
|
||||
double sampleRate, List<Bucket> buckets) {
|
||||
this.dataType = dataType;
|
||||
}
|
||||
|
||||
public Type getDataType() {
|
||||
return dataType;
|
||||
}
|
||||
|
||||
public void setDataType(Type dataType) {
|
||||
this.dataType = dataType;
|
||||
}
|
||||
|
||||
public int getMaxBucketSize() {
|
||||
return maxBucketSize;
|
||||
}
|
||||
|
||||
public void setMaxBucketSize(int maxBucketSize) {
|
||||
this.maxBucketSize = maxBucketSize;
|
||||
}
|
||||
|
||||
public int getBucketSize() {
|
||||
return bucketSize;
|
||||
}
|
||||
|
||||
public void setBucketSize(int bucketSize) {
|
||||
this.bucketSize = bucketSize;
|
||||
}
|
||||
|
||||
public float getSampleRate() {
|
||||
return sampleRate;
|
||||
}
|
||||
|
||||
public void setSampleRate(float sampleRate) {
|
||||
if (sampleRate < 0f || sampleRate > 1f) {
|
||||
this.sampleRate = 1f;
|
||||
} else {
|
||||
this.sampleRate = sampleRate;
|
||||
}
|
||||
}
|
||||
|
||||
public void setBuckets(List<Bucket> buckets) {
|
||||
this.maxBucketNum = maxBucketNum;
|
||||
this.bucketNum = bucketNum;
|
||||
this.sampleRate = sampleRate;
|
||||
this.buckets = buckets;
|
||||
}
|
||||
|
||||
public List<Bucket> getBuckets() {
|
||||
return buckets;
|
||||
}
|
||||
public static Histogram DEFAULT = new HistogramBuilder()
|
||||
.setDataType(Type.INVALID).setMaxBucketNum(1)
|
||||
.setBucketNum(0).setSampleRate(1.0).setBuckets(Lists.newArrayList()).build();
|
||||
|
||||
public static Histogram defaultHistogram() {
|
||||
Type type = Type.fromPrimitiveType(PrimitiveType.INVALID_TYPE);
|
||||
List<Bucket> buckets = Lists.newArrayList();
|
||||
Histogram histogram = new Histogram(type);
|
||||
histogram.setMaxBucketSize(0);
|
||||
histogram.setBucketSize(0);
|
||||
histogram.setSampleRate(1.0f);
|
||||
histogram.setBuckets(buckets);
|
||||
return histogram;
|
||||
// TODO: use thrift
|
||||
public static Histogram fromResultRow(ResultRow resultRow) {
|
||||
try {
|
||||
HistogramBuilder histogramBuilder = new HistogramBuilder();
|
||||
|
||||
long catalogId = Long.parseLong(resultRow.getColumnValue("catalog_id"));
|
||||
long idxId = Long.parseLong(resultRow.getColumnValue("idx_id"));
|
||||
long dbId = Long.parseLong(resultRow.getColumnValue("db_id"));
|
||||
long tblId = Long.parseLong(resultRow.getColumnValue("tbl_id"));
|
||||
|
||||
String colName = resultRow.getColumnValue("col_id");
|
||||
Column col = StatisticsUtil.findColumn(catalogId, dbId, tblId, idxId, colName);
|
||||
if (col == null) {
|
||||
LOG.warn("Failed to deserialize histogram statistics, ctlId: {} dbId: {}"
|
||||
+ "tblId: {} column: {} not exists",
|
||||
catalogId, dbId, tblId, colName);
|
||||
return Histogram.DEFAULT;
|
||||
}
|
||||
|
||||
double sampleRate = Double.parseDouble(resultRow.getColumnValue("sample_rate"));
|
||||
histogramBuilder.setSampleRate(sampleRate);
|
||||
histogramBuilder.setDataType(col.getType());
|
||||
|
||||
String json = resultRow.getColumnValue("buckets");
|
||||
JsonObject jsonObj = JsonParser.parseString(json).getAsJsonObject();
|
||||
|
||||
int maxBucketNum = jsonObj.get("max_bucket_num").getAsInt();
|
||||
histogramBuilder.setMaxBucketNum(maxBucketNum);
|
||||
|
||||
int bucketNum = jsonObj.get("bucket_num").getAsInt();
|
||||
histogramBuilder.setBucketNum(bucketNum);
|
||||
|
||||
JsonArray jsonArray = jsonObj.getAsJsonArray("buckets");
|
||||
List<Bucket> buckets = Bucket.deserializeFromjson(col.getType(), jsonArray);
|
||||
histogramBuilder.setBuckets(buckets);
|
||||
|
||||
return histogramBuilder.build();
|
||||
} catch (Exception e) {
|
||||
e.printStackTrace();
|
||||
LOG.warn("Failed to deserialize histogram statistics.", e);
|
||||
return Histogram.DEFAULT;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
@ -111,37 +112,28 @@ public class Histogram {
|
||||
}
|
||||
|
||||
try {
|
||||
Histogram histogram = new Histogram(datatype);
|
||||
JSONObject histogramJson = JSON.parseObject(json);
|
||||
HistogramBuilder histogramBuilder = new HistogramBuilder();
|
||||
|
||||
List<Bucket> buckets = Lists.newArrayList();
|
||||
JSONArray jsonArray = histogramJson.getJSONArray("buckets");
|
||||
histogramBuilder.setDataType(datatype);
|
||||
|
||||
for (int i = 0; i < jsonArray.size(); i++) {
|
||||
JSONObject bucketJson = jsonArray.getJSONObject(i);
|
||||
Bucket bucket = new Bucket();
|
||||
bucket.lower = StatisticsUtil.readableValue(datatype, bucketJson.get("lower").toString());
|
||||
bucket.upper = StatisticsUtil.readableValue(datatype, bucketJson.get("upper").toString());
|
||||
bucket.count = bucketJson.getIntValue("count");
|
||||
bucket.preSum = bucketJson.getIntValue("pre_sum");
|
||||
bucket.ndv = bucketJson.getIntValue("ndv");
|
||||
buckets.add(bucket);
|
||||
}
|
||||
JsonObject histogramJson = JsonParser.parseString(json).getAsJsonObject();
|
||||
JsonArray jsonArray = histogramJson.getAsJsonArray("buckets");
|
||||
List<Bucket> buckets = Bucket.deserializeFromjson(datatype, jsonArray);
|
||||
|
||||
histogram.setBuckets(buckets);
|
||||
histogramBuilder.setBuckets(buckets);
|
||||
|
||||
int maxBucketSize = histogramJson.getIntValue("max_bucket_size");
|
||||
histogram.setMaxBucketSize(maxBucketSize);
|
||||
int maxBucketSize = histogramJson.get("max_bucket_num").getAsInt();
|
||||
histogramBuilder.setMaxBucketNum(maxBucketSize);
|
||||
|
||||
int bucketSize = histogramJson.getIntValue("bucket_size");
|
||||
histogram.setBucketSize(bucketSize);
|
||||
int bucketSize = histogramJson.get("bucket_num").getAsInt();
|
||||
histogramBuilder.setBucketNum(bucketSize);
|
||||
|
||||
float sampleRate = histogramJson.getFloatValue("sample_rate");
|
||||
histogram.setSampleRate(sampleRate);
|
||||
float sampleRate = histogramJson.get("sample_rate").getAsFloat();
|
||||
histogramBuilder.setSampleRate(sampleRate);
|
||||
|
||||
return histogram;
|
||||
return histogramBuilder.build();
|
||||
} catch (Throwable e) {
|
||||
LOG.warn("deserialize from json error, input json string: {}", json, e);
|
||||
LOG.error("deserialize from json error.", e);
|
||||
}
|
||||
|
||||
return null;
|
||||
@ -155,26 +147,25 @@ public class Histogram {
|
||||
return "";
|
||||
}
|
||||
|
||||
JSONObject histogramJson = new JSONObject();
|
||||
histogramJson.put("max_bucket_size", histogram.maxBucketSize);
|
||||
histogramJson.put("bucket_size", histogram.bucketSize);
|
||||
histogramJson.put("sample_rate", histogram.sampleRate);
|
||||
JsonObject histogramJson = new JsonObject();
|
||||
|
||||
JSONArray bucketsJsonArray = new JSONArray();
|
||||
histogramJson.put("buckets", bucketsJsonArray);
|
||||
histogramJson.addProperty("max_bucket_num", histogram.maxBucketNum);
|
||||
histogramJson.addProperty("bucket_num", histogram.bucketNum);
|
||||
histogramJson.addProperty("sample_rate", histogram.sampleRate);
|
||||
|
||||
if (histogram.buckets != null) {
|
||||
for (Bucket bucket : histogram.buckets) {
|
||||
JSONObject bucketJson = new JSONObject();
|
||||
bucketJson.put("count", bucket.count);
|
||||
bucketJson.put("pre_sum", bucket.preSum);
|
||||
bucketJson.put("ndv", bucket.ndv);
|
||||
bucketJson.put("upper", bucket.upper.getStringValue());
|
||||
bucketJson.put("lower", bucket.lower.getStringValue());
|
||||
bucketsJsonArray.add(bucketJson);
|
||||
}
|
||||
JsonArray bucketsJsonArray = new JsonArray();
|
||||
histogramJson.add("buckets", bucketsJsonArray);
|
||||
|
||||
for (Bucket bucket : histogram.buckets) {
|
||||
JsonObject bucketJson = new JsonObject();
|
||||
bucketJson.addProperty("count", bucket.count);
|
||||
bucketJson.addProperty("pre_sum", bucket.preSum);
|
||||
bucketJson.addProperty("ndv", bucket.ndv);
|
||||
bucketJson.addProperty("upper", bucket.upper.getStringValue());
|
||||
bucketJson.addProperty("lower", bucket.lower.getStringValue());
|
||||
bucketsJsonArray.add(bucketJson);
|
||||
}
|
||||
|
||||
return histogramJson.toJSONString();
|
||||
return histogramJson.toString();
|
||||
}
|
||||
}
|
||||
|
||||
@ -0,0 +1,100 @@
|
||||
// Licensed to the Apache Software Foundation (ASF) under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing,
|
||||
// software distributed under the License is distributed on an
|
||||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
package org.apache.doris.statistics;
|
||||
|
||||
import org.apache.doris.catalog.Type;
|
||||
|
||||
import java.util.Comparator;
|
||||
import java.util.List;
|
||||
|
||||
public class HistogramBuilder {
|
||||
private Type dataType;
|
||||
|
||||
private int maxBucketNum;
|
||||
|
||||
private int bucketNum;
|
||||
|
||||
private double sampleRate;
|
||||
|
||||
private List<Bucket> buckets;
|
||||
|
||||
public HistogramBuilder() {
|
||||
}
|
||||
|
||||
public HistogramBuilder(Histogram histogram) {
|
||||
this.dataType = histogram.dataType;
|
||||
this.maxBucketNum = histogram.maxBucketNum;
|
||||
this.bucketNum = histogram.bucketNum;
|
||||
this.sampleRate = histogram.sampleRate;
|
||||
this.buckets = histogram.buckets;
|
||||
}
|
||||
|
||||
public HistogramBuilder setDataType(Type dataType) {
|
||||
this.dataType = dataType;
|
||||
return this;
|
||||
}
|
||||
|
||||
public HistogramBuilder setMaxBucketNum(int maxBucketNum) {
|
||||
this.maxBucketNum = maxBucketNum;
|
||||
return this;
|
||||
}
|
||||
|
||||
public HistogramBuilder setBucketNum(int bucketNum) {
|
||||
this.bucketNum = bucketNum;
|
||||
return this;
|
||||
}
|
||||
|
||||
public HistogramBuilder setSampleRate(double sampleRate) {
|
||||
if (sampleRate < 0 || sampleRate > 1.0) {
|
||||
this.sampleRate = 1.0;
|
||||
} else {
|
||||
this.sampleRate = sampleRate;
|
||||
}
|
||||
return this;
|
||||
}
|
||||
|
||||
public HistogramBuilder setBuckets(List<Bucket> buckets) {
|
||||
buckets.sort(Comparator.comparing(Bucket::getLower));
|
||||
this.buckets = buckets;
|
||||
return this;
|
||||
}
|
||||
|
||||
public Type getDataType() {
|
||||
return dataType;
|
||||
}
|
||||
|
||||
public int getMaxBucketNum() {
|
||||
return maxBucketNum;
|
||||
}
|
||||
|
||||
public int getBucketNum() {
|
||||
return bucketNum;
|
||||
}
|
||||
|
||||
public double getSampleRate() {
|
||||
return sampleRate;
|
||||
}
|
||||
|
||||
public List<Bucket> getBuckets() {
|
||||
return buckets;
|
||||
}
|
||||
|
||||
public Histogram build() {
|
||||
return new Histogram(dataType, maxBucketNum, bucketNum, sampleRate, buckets);
|
||||
}
|
||||
}
|
||||
@ -0,0 +1,90 @@
|
||||
// Licensed to the Apache Software Foundation (ASF) under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing,
|
||||
// software distributed under the License is distributed on an
|
||||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
package org.apache.doris.statistics;
|
||||
|
||||
import org.apache.doris.catalog.Env;
|
||||
import org.apache.doris.common.FeConstants;
|
||||
import org.apache.doris.qe.AutoCloseConnectContext;
|
||||
import org.apache.doris.qe.StmtExecutor;
|
||||
import org.apache.doris.statistics.util.StatisticsUtil;
|
||||
|
||||
import com.google.common.annotations.VisibleForTesting;
|
||||
import org.apache.commons.text.StringSubstitutor;
|
||||
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* Each task analyze one column.
|
||||
*/
|
||||
public class HistogramTask extends BaseAnalysisTask {
|
||||
|
||||
/** To avoid too much data, use the following efficient sampling method */
|
||||
private static final String ANALYZE_HISTOGRAM_SQL_TEMPLATE = "INSERT INTO "
|
||||
+ "${internalDB}.${histogramStatTbl} "
|
||||
+ "SELECT "
|
||||
+ " CONCAT(${tblId}, '-', ${idxId}, '-', '${colId}') AS id, "
|
||||
+ " ${catalogId} AS catalog_id, "
|
||||
+ " ${dbId} AS db_id, "
|
||||
+ " ${tblId} AS tbl_id, "
|
||||
+ " ${idxId} AS idx_id, "
|
||||
+ " '${colId}' AS col_id, "
|
||||
+ " ${sampleRate} AS sample_rate, "
|
||||
+ " `HISTOGRAM`(`${colName}`, 1, ${maxBucketNum}) AS buckets, "
|
||||
+ " NOW() AS create_time "
|
||||
+ "FROM "
|
||||
+ " `${dbName}`.`${tblName}` TABLESAMPLE (${percentValue} PERCENT)";
|
||||
|
||||
@VisibleForTesting
|
||||
public HistogramTask() {
|
||||
super();
|
||||
}
|
||||
|
||||
public HistogramTask(AnalysisTaskScheduler analysisTaskScheduler, AnalysisTaskInfo info) {
|
||||
super(analysisTaskScheduler, info);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void execute() throws Exception {
|
||||
Map<String, String> params = new HashMap<>();
|
||||
params.put("internalDB", FeConstants.INTERNAL_DB_NAME);
|
||||
params.put("histogramStatTbl", StatisticConstants.HISTOGRAM_TBL_NAME);
|
||||
params.put("catalogId", String.valueOf(catalog.getId()));
|
||||
params.put("dbId", String.valueOf(db.getId()));
|
||||
params.put("tblId", String.valueOf(tbl.getId()));
|
||||
params.put("idxId", "-1");
|
||||
params.put("colId", String.valueOf(info.colName));
|
||||
params.put("dbName", info.dbName);
|
||||
params.put("tblName", String.valueOf(info.tblName));
|
||||
params.put("colName", String.valueOf(info.colName));
|
||||
params.put("sampleRate", String.valueOf(info.sampleRate));
|
||||
params.put("maxBucketNum", String.valueOf(info.maxBucketNum));
|
||||
params.put("percentValue", String.valueOf((int) (info.sampleRate * 100)));
|
||||
|
||||
StringSubstitutor stringSubstitutor = new StringSubstitutor(params);
|
||||
String histogramSql = stringSubstitutor.replace(ANALYZE_HISTOGRAM_SQL_TEMPLATE);
|
||||
LOG.info("SQL to collect the histogram:\n {}", histogramSql);
|
||||
|
||||
try (AutoCloseConnectContext r = StatisticsUtil.buildConnectContext()) {
|
||||
this.stmtExecutor = new StmtExecutor(r.connectContext, histogramSql);
|
||||
this.stmtExecutor.execute();
|
||||
}
|
||||
|
||||
Env.getCurrentEnv().getStatisticsCache().refreshSync(tbl.getId(), -1, col.getName());
|
||||
}
|
||||
}
|
||||
@ -58,12 +58,12 @@ public class HiveAnalysisTask extends HMSAnalysisTask {
|
||||
private static final String ANALYZE_PARTITION_SQL_TEMPLATE = "INSERT INTO "
|
||||
+ "${internalDB}.${columnStatTbl}"
|
||||
+ " values ('${id}','${catalogId}', '${dbId}', '${tblId}', '-1', '${colId}', '${partId}', "
|
||||
+ "${numRows}, ${ndv}, ${nulls}, '${min}', '${max}', NULL, ${dataSize}, '${update_time}')";
|
||||
+ "${numRows}, ${ndv}, ${nulls}, '${min}', '${max}', ${dataSize}, '${update_time}')";
|
||||
|
||||
private static final String ANALYZE_TABLE_SQL_TEMPLATE = "INSERT INTO "
|
||||
+ "${internalDB}.${columnStatTbl}"
|
||||
+ " values ('${id}','${catalogId}', '${dbId}', '${tblId}', NULL, '${colId}', NULL, "
|
||||
+ "${numRows}, ${ndv}, ${nulls}, '${min}', '${max}', NULL, ${dataSize}, '${update_time}')";
|
||||
+ "${numRows}, ${ndv}, ${nulls}, '${min}', '${max}', ${dataSize}, '${update_time}')";
|
||||
|
||||
@Override
|
||||
protected void getColumnStatsByMeta() throws Exception {
|
||||
|
||||
@ -52,7 +52,7 @@ public class IcebergAnalysisTask extends HMSAnalysisTask {
|
||||
private static final String INSERT_TABLE_SQL_TEMPLATE = "INSERT INTO "
|
||||
+ "${internalDB}.${columnStatTbl}"
|
||||
+ " values ('${id}','${catalogId}', '${dbId}', '${tblId}', '-1', '${colId}', NULL, "
|
||||
+ "${numRows}, 0, ${nulls}, '0', '0', NULL, ${dataSize}, '${update_time}')";
|
||||
+ "${numRows}, 0, ${nulls}, '0', '0', ${dataSize}, '${update_time}')";
|
||||
|
||||
|
||||
@Override
|
||||
|
||||
@ -0,0 +1,55 @@
|
||||
// Licensed to the Apache Software Foundation (ASF) under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing,
|
||||
// software distributed under the License is distributed on an
|
||||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
package org.apache.doris.statistics;
|
||||
|
||||
public class Statistic {
|
||||
|
||||
public Histogram histogram;
|
||||
|
||||
public ColumnStatistic columnStatistic;
|
||||
|
||||
public Statistic() {
|
||||
}
|
||||
|
||||
public Statistic(Histogram histogram, ColumnStatistic columnStatistic) {
|
||||
this.histogram = histogram;
|
||||
this.columnStatistic = columnStatistic;
|
||||
}
|
||||
|
||||
public Histogram getHistogram() {
|
||||
if (histogram != null) {
|
||||
return histogram;
|
||||
}
|
||||
return Histogram.DEFAULT;
|
||||
}
|
||||
|
||||
public void setHistogram(Histogram histogram) {
|
||||
this.histogram = histogram;
|
||||
}
|
||||
|
||||
public ColumnStatistic getColumnStatistic() {
|
||||
if (columnStatistic != null) {
|
||||
return columnStatistic;
|
||||
}
|
||||
return ColumnStatistic.DEFAULT;
|
||||
}
|
||||
|
||||
public void setColumnStatistic(ColumnStatistic columnStatistic) {
|
||||
this.columnStatistic = columnStatistic;
|
||||
}
|
||||
}
|
||||
@ -22,6 +22,8 @@ import java.util.concurrent.TimeUnit;
|
||||
public class StatisticConstants {
|
||||
public static final String STATISTIC_TBL_NAME = "column_statistics";
|
||||
|
||||
public static final String HISTOGRAM_TBL_NAME = "histogram_statistics";
|
||||
|
||||
public static final String ANALYSIS_JOB_TABLE = "analysis_jobs";
|
||||
|
||||
public static final int MAX_NAME_LEN = 64;
|
||||
|
||||
@ -31,7 +31,7 @@ public class StatisticsCache {
|
||||
|
||||
private static final Logger LOG = LogManager.getLogger(StatisticsCache.class);
|
||||
|
||||
private final AsyncLoadingCache<StatisticsCacheKey, ColumnStatistic> cache = Caffeine.newBuilder()
|
||||
private final AsyncLoadingCache<StatisticsCacheKey, Statistic> cache = Caffeine.newBuilder()
|
||||
.maximumSize(StatisticConstants.STATISTICS_RECORDS_CACHE_SIZE)
|
||||
.expireAfterAccess(Duration.ofHours(StatisticConstants.STATISTICS_CACHE_VALID_DURATION_IN_HOURS))
|
||||
.refreshAfterWrite(Duration.ofHours(StatisticConstants.STATISTICS_CACHE_REFRESH_INTERVAL))
|
||||
@ -42,29 +42,49 @@ public class StatisticsCache {
|
||||
}
|
||||
|
||||
public ColumnStatistic getColumnStatistics(long tblId, long idxId, String colName) {
|
||||
if (ConnectContext.get().getSessionVariable().internalSession) {
|
||||
ConnectContext ctx = ConnectContext.get();
|
||||
if (ctx != null && ctx.getSessionVariable().internalSession) {
|
||||
return ColumnStatistic.DEFAULT;
|
||||
}
|
||||
StatisticsCacheKey k = new StatisticsCacheKey(tblId, idxId, colName);
|
||||
try {
|
||||
CompletableFuture<ColumnStatistic> f = cache.get(k);
|
||||
if (f.isDone()) {
|
||||
return f.get();
|
||||
CompletableFuture<Statistic> f = cache.get(k);
|
||||
if (f.isDone() && f.get() != null) {
|
||||
return f.get().getColumnStatistic();
|
||||
}
|
||||
} catch (Exception e) {
|
||||
LOG.warn("Unexpected exception while returning ColumnStatistic", e);
|
||||
return ColumnStatistic.DEFAULT;
|
||||
}
|
||||
return ColumnStatistic.DEFAULT;
|
||||
}
|
||||
|
||||
public Histogram getHistogram(long tblId, String colName) {
|
||||
return getHistogram(tblId, -1, colName);
|
||||
}
|
||||
|
||||
public Histogram getHistogram(long tblId, long idxId, String colName) {
|
||||
ConnectContext ctx = ConnectContext.get();
|
||||
if (ctx != null && ctx.getSessionVariable().internalSession) {
|
||||
return Histogram.DEFAULT;
|
||||
}
|
||||
StatisticsCacheKey k = new StatisticsCacheKey(tblId, idxId, colName);
|
||||
try {
|
||||
CompletableFuture<Statistic> f = cache.get(k);
|
||||
if (f.isDone() && f.get() != null) {
|
||||
return f.get().getHistogram();
|
||||
}
|
||||
} catch (Exception e) {
|
||||
LOG.warn("Unexpected exception while returning Histogram", e);
|
||||
}
|
||||
return Histogram.DEFAULT;
|
||||
}
|
||||
|
||||
// TODO: finish this method.
|
||||
public void eraseExpiredCache(long tblId, long idxId, String colName) {
|
||||
cache.synchronous().invalidate(new StatisticsCacheKey(tblId, idxId, colName));
|
||||
}
|
||||
|
||||
public void updateCache(long tblId, long idxId, String colName, ColumnStatistic statistic) {
|
||||
|
||||
public void updateCache(long tblId, long idxId, String colName, Statistic statistic) {
|
||||
cache.synchronous().put(new StatisticsCacheKey(tblId, idxId, colName), statistic);
|
||||
}
|
||||
|
||||
|
||||
@ -35,7 +35,7 @@ import java.util.concurrent.CompletableFuture;
|
||||
import java.util.concurrent.CompletionException;
|
||||
import java.util.concurrent.Executor;
|
||||
|
||||
public class StatisticsCacheLoader implements AsyncCacheLoader<StatisticsCacheKey, ColumnStatistic> {
|
||||
public class StatisticsCacheLoader implements AsyncCacheLoader<StatisticsCacheKey, Statistic> {
|
||||
|
||||
private static final Logger LOG = LogManager.getLogger(StatisticsCacheLoader.class);
|
||||
|
||||
@ -43,13 +43,17 @@ public class StatisticsCacheLoader implements AsyncCacheLoader<StatisticsCacheKe
|
||||
+ "." + StatisticConstants.STATISTIC_TBL_NAME + " WHERE "
|
||||
+ "id = CONCAT('${tblId}', '-', ${idxId}, '-', '${colId}')";
|
||||
|
||||
private static final String QUERY_HISTOGRAM_STATISTICS = "SELECT * FROM " + FeConstants.INTERNAL_DB_NAME
|
||||
+ "." + StatisticConstants.HISTOGRAM_TBL_NAME + " WHERE "
|
||||
+ "id = CONCAT('${tblId}', '-', ${idxId}, '-', '${colId}')";
|
||||
|
||||
private static int CUR_RUNNING_LOAD = 0;
|
||||
|
||||
private static final Object LOCK = new Object();
|
||||
|
||||
// TODO: Maybe we should trigger a analyze job when the required ColumnStatistic doesn't exists.
|
||||
@Override
|
||||
public @NonNull CompletableFuture<ColumnStatistic> asyncLoad(@NonNull StatisticsCacheKey key,
|
||||
public @NonNull CompletableFuture<Statistic> asyncLoad(@NonNull StatisticsCacheKey key,
|
||||
@NonNull Executor executor) {
|
||||
synchronized (LOCK) {
|
||||
if (CUR_RUNNING_LOAD > StatisticConstants.LOAD_TASK_LIMITS) {
|
||||
@ -61,31 +65,53 @@ public class StatisticsCacheLoader implements AsyncCacheLoader<StatisticsCacheKe
|
||||
}
|
||||
CUR_RUNNING_LOAD++;
|
||||
return CompletableFuture.supplyAsync(() -> {
|
||||
Statistic statistic = new Statistic();
|
||||
|
||||
try {
|
||||
Map<String, String> params = new HashMap<>();
|
||||
params.put("tblId", String.valueOf(key.tableId));
|
||||
params.put("idxId", String.valueOf(key.idxId));
|
||||
params.put("colId", String.valueOf(key.colName));
|
||||
List<ResultRow> resultBatches =
|
||||
|
||||
List<ColumnStatistic> columnStatistics;
|
||||
List<ResultRow> columnResult =
|
||||
StatisticsUtil.execStatisticQuery(new StringSubstitutor(params)
|
||||
.replace(QUERY_COLUMN_STATISTICS));
|
||||
List<ColumnStatistic> columnStatistics = null;
|
||||
try {
|
||||
columnStatistics = StatisticsUtil.deserializeToColumnStatistics(resultBatches);
|
||||
columnStatistics = StatisticsUtil.deserializeToColumnStatistics(columnResult);
|
||||
} catch (Exception e) {
|
||||
LOG.warn("Failed to deserialize column statistics", e);
|
||||
throw new CompletionException(e);
|
||||
}
|
||||
if (CollectionUtils.isEmpty(columnStatistics)) {
|
||||
return ColumnStatistic.DEFAULT;
|
||||
statistic.setColumnStatistic(ColumnStatistic.DEFAULT);
|
||||
} else {
|
||||
statistic.setColumnStatistic(columnStatistics.get(0));
|
||||
}
|
||||
|
||||
List<Histogram> histogramStatistics;
|
||||
List<ResultRow> histogramResult =
|
||||
StatisticsUtil.execStatisticQuery(new StringSubstitutor(params)
|
||||
.replace(QUERY_HISTOGRAM_STATISTICS));
|
||||
try {
|
||||
histogramStatistics = StatisticsUtil.deserializeToHistogramStatistics(histogramResult);
|
||||
} catch (Exception e) {
|
||||
LOG.warn("Failed to deserialize histogram statistics", e);
|
||||
throw new CompletionException(e);
|
||||
}
|
||||
if (CollectionUtils.isEmpty(histogramStatistics)) {
|
||||
statistic.setHistogram(Histogram.DEFAULT);
|
||||
} else {
|
||||
statistic.setHistogram(histogramStatistics.get(0));
|
||||
}
|
||||
return columnStatistics.get(0);
|
||||
} finally {
|
||||
synchronized (LOCK) {
|
||||
CUR_RUNNING_LOAD--;
|
||||
LOCK.notify();
|
||||
}
|
||||
}
|
||||
|
||||
return statistic;
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
@ -74,8 +74,7 @@ public class StatisticsRepository {
|
||||
|
||||
private static final String INSERT_INTO_COLUMN_STATISTICS = "INSERT INTO "
|
||||
+ FULL_QUALIFIED_COLUMN_STATISTICS_NAME + " VALUES('${id}', ${catalogId}, ${dbId}, ${tblId}, '${idxId}',"
|
||||
+ "'${colId}', ${partId}, ${count}, ${ndv}, ${nullCount}, '${min}', '${max}', "
|
||||
+ "'${histogram}', ${dataSize}, NOW())";
|
||||
+ "'${colId}', ${partId}, ${count}, ${ndv}, ${nullCount}, '${min}', '${max}', ${dataSize}, NOW())";
|
||||
|
||||
public static ColumnStatistic queryColumnStatisticsByName(long tableId, String colName) {
|
||||
ResultRow resultRow = queryColumnStatisticById(tableId, colName);
|
||||
@ -159,7 +158,6 @@ public class StatisticsRepository {
|
||||
String nullCount = alterColumnStatsStmt.getValue(StatsType.NUM_NULLS);
|
||||
String min = alterColumnStatsStmt.getValue(StatsType.MIN_VALUE);
|
||||
String max = alterColumnStatsStmt.getValue(StatsType.MAX_VALUE);
|
||||
String histogram = alterColumnStatsStmt.getValue(StatsType.HISTOGRAM);
|
||||
String dataSize = alterColumnStatsStmt.getValue(StatsType.DATA_SIZE);
|
||||
ColumnStatisticBuilder builder = new ColumnStatisticBuilder();
|
||||
String colName = alterColumnStatsStmt.getColumnName();
|
||||
@ -181,12 +179,10 @@ public class StatisticsRepository {
|
||||
builder.setMaxExpr(StatisticsUtil.readableValue(column.getType(), max));
|
||||
builder.setMaxValue(StatisticsUtil.convertToDouble(column.getType(), max));
|
||||
}
|
||||
if (histogram != null) {
|
||||
builder.setHistogram(Histogram.deserializeFromJson(column.getType(), histogram));
|
||||
}
|
||||
if (dataSize != null) {
|
||||
builder.setDataSize(Double.parseDouble(dataSize));
|
||||
}
|
||||
|
||||
ColumnStatistic columnStatistic = builder.build();
|
||||
Map<String, String> params = new HashMap<>();
|
||||
params.put("id", constructId(objects.table.getId(), -1, colName));
|
||||
@ -201,9 +197,17 @@ public class StatisticsRepository {
|
||||
params.put("nullCount", String.valueOf(columnStatistic.numNulls));
|
||||
params.put("min", min == null ? "NULL" : min);
|
||||
params.put("max", max == null ? "NULL" : max);
|
||||
params.put("histogram", (columnStatistic.histogram == null) ? "NULL" : histogram);
|
||||
params.put("dataSize", String.valueOf(columnStatistic.dataSize));
|
||||
StatisticsUtil.execUpdate(INSERT_INTO_COLUMN_STATISTICS, params);
|
||||
Env.getCurrentEnv().getStatisticsCache().updateCache(objects.table.getId(), -1, colName, builder.build());
|
||||
|
||||
Histogram histogram = Env.getCurrentEnv().getStatisticsCache()
|
||||
.getHistogram(objects.table.getId(), -1, colName);
|
||||
|
||||
Statistic statistic = new Statistic();
|
||||
statistic.setHistogram(histogram);
|
||||
statistic.setColumnStatistic(builder.build());
|
||||
|
||||
Env.getCurrentEnv().getStatisticsCache()
|
||||
.updateCache(objects.table.getId(), -1, colName, statistic);
|
||||
}
|
||||
}
|
||||
|
||||
@ -29,8 +29,10 @@ import org.apache.doris.analysis.StatementBase;
|
||||
import org.apache.doris.analysis.StringLiteral;
|
||||
import org.apache.doris.analysis.TableName;
|
||||
import org.apache.doris.analysis.UserIdentity;
|
||||
import org.apache.doris.catalog.Column;
|
||||
import org.apache.doris.catalog.DatabaseIf;
|
||||
import org.apache.doris.catalog.Env;
|
||||
import org.apache.doris.catalog.OlapTable;
|
||||
import org.apache.doris.catalog.PrimitiveType;
|
||||
import org.apache.doris.catalog.ScalarType;
|
||||
import org.apache.doris.catalog.TableIf;
|
||||
@ -47,6 +49,7 @@ import org.apache.doris.qe.SessionVariable;
|
||||
import org.apache.doris.qe.StmtExecutor;
|
||||
import org.apache.doris.statistics.AnalysisTaskInfo;
|
||||
import org.apache.doris.statistics.ColumnStatistic;
|
||||
import org.apache.doris.statistics.Histogram;
|
||||
import org.apache.doris.statistics.StatisticConstants;
|
||||
import org.apache.doris.statistics.util.InternalQueryResult.ResultRow;
|
||||
import org.apache.doris.system.SystemInfoService;
|
||||
@ -107,6 +110,11 @@ public class StatisticsUtil {
|
||||
return resultBatches.stream().map(ColumnStatistic::fromResultRow).collect(Collectors.toList());
|
||||
}
|
||||
|
||||
public static List<Histogram> deserializeToHistogramStatistics(List<ResultRow> resultBatches)
|
||||
throws Exception {
|
||||
return resultBatches.stream().map(Histogram::fromResultRow).collect(Collectors.toList());
|
||||
}
|
||||
|
||||
public static AutoCloseConnectContext buildConnectContext() {
|
||||
ConnectContext connectContext = new ConnectContext();
|
||||
SessionVariable sessionVariable = connectContext.getSessionVariable();
|
||||
@ -253,4 +261,26 @@ public class StatisticsUtil {
|
||||
}
|
||||
return new DBObjects(catalogIf, databaseIf, tableIf);
|
||||
}
|
||||
|
||||
public static Column findColumn(long catalogId, long dbId, long tblId, long idxId, String columnName) {
|
||||
CatalogIf<DatabaseIf<TableIf>> catalogIf = Env.getCurrentEnv().getCatalogMgr().getCatalog(catalogId);
|
||||
if (catalogIf == null) {
|
||||
return null;
|
||||
}
|
||||
DatabaseIf<TableIf> db = catalogIf.getDb(dbId).orElse(null);
|
||||
if (db == null) {
|
||||
return null;
|
||||
}
|
||||
TableIf tblIf = db.getTable(tblId).orElse(null);
|
||||
if (tblIf == null) {
|
||||
return null;
|
||||
}
|
||||
if (idxId != -1) {
|
||||
if (tblIf instanceof OlapTable) {
|
||||
OlapTable olapTable = (OlapTable) tblIf;
|
||||
return olapTable.getIndexIdToMeta().get(idxId).getColumnByName(columnName);
|
||||
}
|
||||
}
|
||||
return tblIf.getColumn(columnName);
|
||||
}
|
||||
}
|
||||
|
||||
@ -478,6 +478,7 @@ import org.apache.doris.qe.SqlModeHelper;
|
||||
keywordMap.put("write", new Integer(SqlParserSymbols.KW_WRITE));
|
||||
keywordMap.put("year", new Integer(SqlParserSymbols.KW_YEAR));
|
||||
keywordMap.put("mtmv", new Integer(SqlParserSymbols.KW_MTMV));
|
||||
keywordMap.put("histogram", new Integer(SqlParserSymbols.KW_HISTOGRAM));
|
||||
}
|
||||
|
||||
// map from token id to token description
|
||||
|
||||
@ -191,7 +191,7 @@ public class HyperGraphBuilder {
|
||||
int count = rowCounts.get(Integer.parseInt(scanPlan.getTable().getName()));
|
||||
for (Slot slot : scanPlan.getOutput()) {
|
||||
slotIdToColumnStats.put(slot.getExprId(),
|
||||
new ColumnStatistic(count, count, 0, 0, 0, 0, 0, null, 0, null, null, true));
|
||||
new ColumnStatistic(count, count, 0, 0, 0, 0, 0, 0, null, null, true));
|
||||
}
|
||||
StatsDeriveResult stats = new StatsDeriveResult(count, slotIdToColumnStats);
|
||||
group.setStatistics(stats);
|
||||
|
||||
@ -82,7 +82,7 @@ public class AnalysisJobTest extends TestWithFeService {
|
||||
return connectContext;
|
||||
}
|
||||
};
|
||||
String sql = "ANALYZE t1";
|
||||
String sql = "ANALYZE TABLE t1";
|
||||
Assertions.assertNotNull(getSqlStmtExecutor(sql));
|
||||
}
|
||||
|
||||
|
||||
@ -32,6 +32,7 @@ import org.junit.jupiter.api.Test;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
import java.util.concurrent.CompletableFuture;
|
||||
import java.util.concurrent.Executor;
|
||||
@ -63,14 +64,12 @@ public class CacheTest extends TestWithFeService {
|
||||
|
||||
@Test
|
||||
public void testLoad() throws Exception {
|
||||
new MockUp<ColumnStatistic>() {
|
||||
new MockUp<StatisticsUtil>() {
|
||||
|
||||
@Mock
|
||||
public Column findColumn(long catalogId, long dbId, long tblId, long idxId, String columnName) {
|
||||
return new Column("abc", PrimitiveType.BIGINT);
|
||||
}
|
||||
};
|
||||
new MockUp<StatisticsUtil>() {
|
||||
|
||||
@Mock
|
||||
public List<ResultRow> execStatisticQuery(String sql) {
|
||||
@ -91,7 +90,6 @@ public class CacheTest extends TestWithFeService {
|
||||
colNames.add("col_id");
|
||||
colNames.add("min");
|
||||
colNames.add("max");
|
||||
colNames.add("histogram");
|
||||
List<PrimitiveType> primitiveTypes = new ArrayList<>();
|
||||
primitiveTypes.add(PrimitiveType.BIGINT);
|
||||
primitiveTypes.add(PrimitiveType.BIGINT);
|
||||
@ -103,6 +101,7 @@ public class CacheTest extends TestWithFeService {
|
||||
primitiveTypes.add(PrimitiveType.VARCHAR);
|
||||
primitiveTypes.add(PrimitiveType.VARCHAR);
|
||||
primitiveTypes.add(PrimitiveType.VARCHAR);
|
||||
primitiveTypes.add(PrimitiveType.VARCHAR);
|
||||
List<String> values = new ArrayList<>();
|
||||
values.add("1");
|
||||
values.add("2");
|
||||
@ -115,7 +114,6 @@ public class CacheTest extends TestWithFeService {
|
||||
values.add("8");
|
||||
values.add("9");
|
||||
values.add("10");
|
||||
values.add("");
|
||||
ResultRow resultRow = new ResultRow(colNames, primitiveTypes, values);
|
||||
return Arrays.asList(resultRow);
|
||||
}
|
||||
@ -123,10 +121,78 @@ public class CacheTest extends TestWithFeService {
|
||||
StatisticsCache statisticsCache = new StatisticsCache();
|
||||
ColumnStatistic columnStatistic = statisticsCache.getColumnStatistics(0, "col");
|
||||
Assertions.assertEquals(ColumnStatistic.DEFAULT, columnStatistic);
|
||||
Thread.sleep(100);
|
||||
Thread.sleep(1000);
|
||||
columnStatistic = statisticsCache.getColumnStatistics(0, "col");
|
||||
Assertions.assertEquals(1, columnStatistic.count);
|
||||
Assertions.assertEquals(2, columnStatistic.ndv);
|
||||
Assertions.assertEquals(10, columnStatistic.maxValue);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testLoadHistogram() throws Exception {
|
||||
new MockUp<StatisticsUtil>() {
|
||||
|
||||
@Mock
|
||||
public Column findColumn(long catalogId, long dbId, long tblId, long idxId, String columnName) {
|
||||
return new Column("abc", PrimitiveType.DATETIME);
|
||||
}
|
||||
|
||||
@Mock
|
||||
public List<ResultRow> execStatisticQuery(String sql) {
|
||||
try {
|
||||
Thread.sleep(50);
|
||||
} catch (InterruptedException e) {
|
||||
// ignore
|
||||
}
|
||||
List<String> colNames = new ArrayList<>();
|
||||
colNames.add("catalog_id");
|
||||
colNames.add("db_id");
|
||||
colNames.add("idx_id");
|
||||
colNames.add("tbl_id");
|
||||
colNames.add("col_id");
|
||||
colNames.add("sample_rate");
|
||||
colNames.add("buckets");
|
||||
List<PrimitiveType> primitiveTypes = new ArrayList<>();
|
||||
primitiveTypes.add(PrimitiveType.VARCHAR);
|
||||
primitiveTypes.add(PrimitiveType.VARCHAR);
|
||||
primitiveTypes.add(PrimitiveType.VARCHAR);
|
||||
primitiveTypes.add(PrimitiveType.VARCHAR);
|
||||
primitiveTypes.add(PrimitiveType.VARCHAR);
|
||||
primitiveTypes.add(PrimitiveType.VARCHAR);
|
||||
primitiveTypes.add(PrimitiveType.VARCHAR);
|
||||
List<String> values = new ArrayList<>();
|
||||
values.add("1");
|
||||
values.add("2");
|
||||
values.add("3");
|
||||
values.add("-1");
|
||||
values.add("4");
|
||||
values.add("0.2");
|
||||
String buckets = "{\"max_bucket_num\":128,\"bucket_num\":5,\"sample_rate\":1.0,\"buckets\":"
|
||||
+ "[{\"lower\":\"2022-09-21 17:30:29\",\"upper\":\"2022-09-21 22:30:29\","
|
||||
+ "\"count\":9,\"pre_sum\":0,\"ndv\":1},"
|
||||
+ "{\"lower\":\"2022-09-22 17:30:29\",\"upper\":\"2022-09-22 22:30:29\","
|
||||
+ "\"count\":10,\"pre_sum\":9,\"ndv\":1},"
|
||||
+ "{\"lower\":\"2022-09-23 17:30:29\",\"upper\":\"2022-09-23 22:30:29\","
|
||||
+ "\"count\":9,\"pre_sum\":19,\"ndv\":1},"
|
||||
+ "{\"lower\":\"2022-09-24 17:30:29\",\"upper\":\"2022-09-24 22:30:29\","
|
||||
+ "\"count\":9,\"pre_sum\":28,\"ndv\":1},"
|
||||
+ "{\"lower\":\"2022-09-25 17:30:29\",\"upper\":\"2022-09-25 22:30:29\","
|
||||
+ "\"count\":9,\"pre_sum\":37,\"ndv\":1}]}";
|
||||
values.add(buckets);
|
||||
ResultRow resultRow = new ResultRow(colNames, primitiveTypes, values);
|
||||
return Collections.singletonList(resultRow);
|
||||
}
|
||||
};
|
||||
|
||||
StatisticsCache statisticsCache = new StatisticsCache();
|
||||
Histogram histogram = statisticsCache.getHistogram(0, "col");
|
||||
Assertions.assertEquals(Histogram.DEFAULT, histogram);
|
||||
Thread.sleep(1000);
|
||||
histogram = statisticsCache.getHistogram(0, "col");
|
||||
Assertions.assertEquals("DATETIME", histogram.dataType.toString());
|
||||
Assertions.assertEquals(128, histogram.maxBucketNum);
|
||||
Assertions.assertEquals(5, histogram.bucketNum);
|
||||
Assertions.assertEquals(0.2, histogram.sampleRate);
|
||||
Assertions.assertEquals(5, histogram.buckets.size());
|
||||
}
|
||||
}
|
||||
|
||||
@ -39,7 +39,7 @@ class HistogramTest {
|
||||
|
||||
@BeforeEach
|
||||
void setUp() throws Exception {
|
||||
String json = "{\"max_bucket_size\":128,\"bucket_size\":5,\"sample_rate\":1.0,\"buckets\":"
|
||||
String json = "{\"max_bucket_num\":128,\"bucket_num\":5,\"sample_rate\":1.0,\"buckets\":"
|
||||
+ "[{\"lower\":\"2022-09-21 17:30:29\",\"upper\":\"2022-09-21 22:30:29\","
|
||||
+ "\"count\":9,\"pre_sum\":0,\"ndv\":1},"
|
||||
+ "{\"lower\":\"2022-09-22 17:30:29\",\"upper\":\"2022-09-22 22:30:29\","
|
||||
@ -58,19 +58,19 @@ class HistogramTest {
|
||||
|
||||
@Test
|
||||
void testDeserializeFromJson() throws Exception {
|
||||
Type dataType = histogramUnderTest.getDataType();
|
||||
Type dataType = histogramUnderTest.dataType;
|
||||
Assertions.assertTrue(dataType.isDatetime());
|
||||
|
||||
int maxBucketSize = histogramUnderTest.getMaxBucketSize();
|
||||
int maxBucketSize = histogramUnderTest.maxBucketNum;
|
||||
Assertions.assertEquals(128, maxBucketSize);
|
||||
|
||||
int bucketSize = histogramUnderTest.getBucketSize();
|
||||
int bucketSize = histogramUnderTest.bucketNum;
|
||||
Assertions.assertEquals(5, bucketSize);
|
||||
|
||||
float sampleRate = histogramUnderTest.getSampleRate();
|
||||
double sampleRate = histogramUnderTest.sampleRate;
|
||||
Assertions.assertEquals(1.0, sampleRate);
|
||||
|
||||
List<Bucket> buckets = histogramUnderTest.getBuckets();
|
||||
List<Bucket> buckets = histogramUnderTest.buckets;
|
||||
Assertions.assertEquals(5, buckets.size());
|
||||
|
||||
LiteralExpr expectedLower = LiteralExpr.create("2022-09-21 17:30:29",
|
||||
@ -97,10 +97,10 @@ class HistogramTest {
|
||||
String json = Histogram.serializeToJson(histogramUnderTest);
|
||||
JSONObject histogramJson = JSON.parseObject(json);
|
||||
|
||||
int maxBucketSize = histogramJson.getIntValue("max_bucket_size");
|
||||
int maxBucketSize = histogramJson.getIntValue("max_bucket_num");
|
||||
Assertions.assertEquals(128, maxBucketSize);
|
||||
|
||||
int bucketSize = histogramJson.getIntValue("bucket_size");
|
||||
int bucketSize = histogramJson.getIntValue("bucket_num");
|
||||
Assertions.assertEquals(5, bucketSize);
|
||||
|
||||
float sampleRate = histogramJson.getFloat("sample_rate");
|
||||
|
||||
@ -79,7 +79,7 @@ public class MVStatisticsTest extends TestWithFeService {
|
||||
};
|
||||
AnalysisManager analysisManager = Env.getCurrentEnv().getAnalysisManager();
|
||||
Deencapsulation.setField(analysisManager, "statisticsCache", statisticsCache);
|
||||
getSqlStmtExecutor("analyze t1");
|
||||
getSqlStmtExecutor("analyze table t1");
|
||||
Thread.sleep(3000);
|
||||
}
|
||||
}
|
||||
|
||||
@ -27,7 +27,7 @@ public class StatsDeriveResultTest {
|
||||
public void testUpdateRowCountByLimit() {
|
||||
StatsDeriveResult stats = new StatsDeriveResult(100);
|
||||
ColumnStatistic a = new ColumnStatistic(100, 10, 1, 5, 10,
|
||||
1, 100, null, 0.5, null, null, false);
|
||||
1, 100, 0.5, null, null, false);
|
||||
Id id = new Id(1);
|
||||
stats.addColumnStats(id, a);
|
||||
StatsDeriveResult res = stats.updateByLimit(0);
|
||||
|
||||
Reference in New Issue
Block a user