diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/Env.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/Env.java index bf29f9fb99..787e06704c 100755 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/Env.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/Env.java @@ -1397,6 +1397,8 @@ public class Env { partitionInMemoryInfoCollector.start(); streamLoadRecordMgr.start(); getInternalCatalog().getIcebergTableCreationRecordMgr().start(); + this.statisticsJobScheduler.start(); + this.statisticsTaskScheduler.start(); } // start threads that should running on all FE diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java index c75c939ede..a0042c55e8 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java @@ -17,6 +17,7 @@ package org.apache.doris.nereids.stats; +import org.apache.doris.analysis.NullLiteral; import org.apache.doris.catalog.MaterializedIndex; import org.apache.doris.catalog.OlapTable; import org.apache.doris.catalog.Partition; @@ -270,7 +271,7 @@ public class StatsCalculator extends DefaultPlanVisitor if (colName == null) { throw new RuntimeException("Column name of SlotReference shouldn't be null here"); } - ColumnStats columnStats = tableStats.getColumnStats(colName); + ColumnStats columnStats = tableStats.getColumnStatsOrDefault(colName); slotToColumnStats.put(slotReference, columnStats); } long rowCount = tableStats.getRowCount(); @@ -344,7 +345,8 @@ public class StatsCalculator extends DefaultPlanVisitor ColumnStats value = null; Set slots = projection.getInputSlots(); if (slots.isEmpty()) { - value = ColumnStats.createDefaultColumnStats(); + value = new ColumnStats(1, 1, 1, 0, + new NullLiteral(), new NullLiteral()); } else { // TODO: just a trick here, need to do real project on column stats for (Slot slot : slots) { @@ -354,7 +356,8 @@ public class StatsCalculator extends DefaultPlanVisitor } } if (value == null) { - value = ColumnStats.createDefaultColumnStats(); + value = new ColumnStats(1, 1, 1, 0, + new NullLiteral(), new NullLiteral()); } } return new SimpleEntry<>(projection.toSlot(), value); @@ -367,8 +370,8 @@ public class StatsCalculator extends DefaultPlanVisitor Map columnStatsMap = oneRowRelation.getProjects() .stream() .map(project -> { - ColumnStats columnStats = new ColumnStats(); - columnStats.setNdv(1); + ColumnStats columnStats = new ColumnStats(1, -1, -1, -1, + new NullLiteral(), new NullLiteral()); // TODO: compute the literal size return Pair.of(project.toSlot(), columnStats); }) @@ -381,11 +384,8 @@ public class StatsCalculator extends DefaultPlanVisitor Map columnStatsMap = emptyRelation.getProjects() .stream() .map(project -> { - ColumnStats columnStats = new ColumnStats(); - columnStats.setNdv(0); - columnStats.setMaxSize(0); - columnStats.setNumNulls(0); - columnStats.setAvgSize(0); + ColumnStats columnStats = new ColumnStats(0, 0, 0, 0, + new NullLiteral(), new NullLiteral()); return Pair.of(project.toSlot(), columnStats); }) .collect(Collectors.toMap(Pair::key, Pair::value)); diff --git a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java index 95fb643817..f09bd1a5f0 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java +++ b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java @@ -223,6 +223,8 @@ public class SessionVariable implements Serializable, Writable { public static final String ENABLE_PUSH_DOWN_NO_GROUP_AGG = "enable_push_down_no_group_agg"; + public static final String ENABLE_CBO_STATISTICS = "enable_cbo_statistics"; + // session origin value public Map sessionOriginValue = new HashMap(); // check stmt is or not [select /*+ SET_VAR(...)*/ ...] @@ -570,6 +572,13 @@ public class SessionVariable implements Serializable, Writable { @VariableMgr.VarAttr(name = ENABLE_PUSH_DOWN_NO_GROUP_AGG) public boolean enablePushDownNoGroupAgg = true; + /** + * The current statistics are only used for CBO test, + * and are not available to users. (work in progress) + */ + @VariableMgr.VarAttr(name = ENABLE_CBO_STATISTICS) + public boolean enableCboStatistics = false; + public String getBlockEncryptionMode() { return blockEncryptionMode; } @@ -975,6 +984,10 @@ public class SessionVariable implements Serializable, Writable { return enableLocalExchange; } + public boolean getEnableCboStatistics() { + return enableCboStatistics; + } + /** * getInsertVisibleTimeoutMs. **/ diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/ColumnStats.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/ColumnStats.java index ecdf946379..e4628c8ea6 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/ColumnStats.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/ColumnStats.java @@ -24,6 +24,7 @@ import org.apache.doris.analysis.FloatLiteral; import org.apache.doris.analysis.IntLiteral; import org.apache.doris.analysis.LargeIntLiteral; import org.apache.doris.analysis.LiteralExpr; +import org.apache.doris.analysis.NullLiteral; import org.apache.doris.analysis.StringLiteral; import org.apache.doris.catalog.PrimitiveType; import org.apache.doris.catalog.ScalarType; @@ -43,19 +44,21 @@ import java.util.function.Predicate; * The column stats are mainly used to provide input for the Optimizer's cost model. *

* The description of column stats are following: - * 1. @ndv: The number distinct values of column. - * 2. @avgSize: The average size of column. The unit is bytes. - * 3. @maxSize: The max size of column. The unit is bytes. - * 4. @numNulls: The number of nulls. - * 5. @minValue: The min value of column. - * 6. @maxValue: The max value of column. + * 1. @ndv: The number distinct values of column. + * 2. @avgSize: The average size of column. The unit is bytes. + * 3. @maxSize: The max size of column. The unit is bytes. + * 4. @numNulls: The number of nulls. + * 5. @minValue: The min value of column. + * 6. @maxValue: The max value of column. *

* The granularity of the statistics is whole table. * For example: - * "@ndv = 10" means that the number distinct values is 10 in the whole table. + * "@ndv = 10" means that the number distinct values is 10 in the whole table or partition. + *

+ * After the statistics task is successfully completed, update the ColumnStats, + * ColumnStats should not be updated in any other way. */ public class ColumnStats { - public static final StatsType NDV = StatsType.NDV; public static final StatsType AVG_SIZE = StatsType.AVG_SIZE; public static final StatsType MAX_SIZE = StatsType.MAX_SIZE; @@ -72,16 +75,94 @@ public class ColumnStats { private float avgSize = -1; // in bytes private long maxSize = -1; // in bytes private long numNulls = -1; - private LiteralExpr minValue; - private LiteralExpr maxValue; - public static ColumnStats createDefaultColumnStats() { - ColumnStats columnStats = new ColumnStats(); - columnStats.setAvgSize(1); - columnStats.setMaxSize(1); - columnStats.setNdv(1); - columnStats.setNumNulls(0); - return columnStats; + private LiteralExpr minValue = new NullLiteral(); + private LiteralExpr maxValue = new NullLiteral(); + + /** + * Return default column statistic. + */ + public static ColumnStats getDefaultColumnStats() { + return new ColumnStats(); + } + + /** + * Merge column statistics(the original statistics should not be modified) + * + * @param left statistics to be merged + * @param right statistics to be merged + */ + public static ColumnStats mergeColumnStats(ColumnStats left, ColumnStats right) { + // merge ndv + long leftNdv = left.getNdv(); + long rightNdv = right.getNdv(); + + if (leftNdv == -1) { + leftNdv = rightNdv; + } else { + leftNdv = rightNdv != -1 ? (leftNdv + rightNdv) : leftNdv; + } + + // merge avg_size + float leftAvgSize = left.getAvgSize(); + float rightAvgSize = right.getAvgSize(); + if (leftAvgSize == -1) { + leftAvgSize = rightAvgSize; + } else { + leftAvgSize = rightAvgSize != -1 ? ((leftAvgSize + rightAvgSize) / 2) : leftAvgSize; + } + + // merge max_size + long leftMaxSize = left.getMaxSize(); + long rightMaxSize = right.getMaxSize(); + if (leftMaxSize == -1) { + leftMaxSize = rightMaxSize; + } else { + leftMaxSize = Math.max(leftMaxSize, rightMaxSize); + } + + // merge num_nulls + long leftNumNulls = left.getNumNulls(); + long rightNumNulls = right.getNumNulls(); + if (leftNumNulls == -1) { + leftNumNulls = rightNumNulls; + } else { + leftNumNulls = rightNumNulls != -1 ? (leftNumNulls + rightNumNulls) : leftNumNulls; + } + + // merge min_value + LiteralExpr leftMinValue = left.getMinValue(); + LiteralExpr rightMinValue = right.getMinValue(); + if (leftMinValue == null) { + leftMinValue = rightMinValue; + } else { + leftMinValue = leftMinValue.compareTo(rightMinValue) > 0 ? rightMinValue : leftMinValue; + } + + // merge max_value + LiteralExpr leftMaxValue = left.getMaxValue(); + LiteralExpr rightMaxValue = right.getMaxValue(); + if (leftMaxValue == null) { + leftMaxValue = rightMaxValue; + } else { + leftMaxValue = leftMaxValue.compareTo(rightMaxValue) < 0 ? rightMaxValue : leftMaxValue; + } + + // generate the new merged-statistics + return new ColumnStats(leftNdv, leftAvgSize, leftMaxSize, leftNumNulls, leftMinValue, leftMaxValue); + } + + public ColumnStats() { + } + + public ColumnStats(long ndv, float avgSize, long maxSize, + long numNulls, LiteralExpr minValue, LiteralExpr maxValue) { + this.ndv = ndv; + this.avgSize = avgSize; + this.maxSize = maxSize; + this.numNulls = numNulls; + this.minValue = minValue; + this.maxValue = maxValue; } public ColumnStats(ColumnStats other) { @@ -89,17 +170,14 @@ public class ColumnStats { this.avgSize = other.avgSize; this.maxSize = other.maxSize; this.numNulls = other.numNulls; - if (other.minValue != null) { + if (other.minValue != null && !(other.minValue instanceof NullLiteral)) { this.minValue = (LiteralExpr) other.minValue.clone(); } - if (other.maxValue != null) { + if (other.maxValue != null && !(other.minValue instanceof NullLiteral)) { this.maxValue = (LiteralExpr) other.maxValue.clone(); } } - public ColumnStats() { - } - public long getNdv() { return ndv; } @@ -124,31 +202,35 @@ public class ColumnStats { return maxValue; } - public void setNdv(long ndv) { - this.ndv = ndv; + public List getShowInfo() { + List result = Lists.newArrayList(); + result.add(Long.toString(ndv)); + result.add(Float.toString(avgSize)); + result.add(Long.toString(maxSize)); + result.add(Long.toString(numNulls)); + if (minValue != null) { + result.add(minValue.getStringValue()); + } else { + result.add("N/A"); + } + if (maxValue != null) { + result.add(maxValue.getStringValue()); + } else { + result.add("N/A"); + } + return result; } - public void setAvgSize(float avgSize) { - this.avgSize = avgSize; + public ColumnStats copy() { + return new ColumnStats(this); } - public void setMaxSize(long maxSize) { - this.maxSize = maxSize; - } - - public void setNumNulls(long numNulls) { - this.numNulls = numNulls; - } - - public void setMinValue(LiteralExpr minValue) { - this.minValue = minValue; - } - - public void setMaxValue(LiteralExpr maxValue) { - this.maxValue = maxValue; - } - - public void updateStats(Type columnType, Map statsTypeToValue) throws AnalysisException { + /** + * After the statistics task is successfully completed, update the statistics of the column, + * statistics should not be updated in any other way. + */ + public void updateStats(Type columnType, Map statsTypeToValue) + throws AnalysisException { for (Map.Entry entry : statsTypeToValue.entrySet()) { StatsType statsType = entry.getKey(); switch (statsType) { @@ -180,23 +262,10 @@ public class ColumnStats { } } - public List getShowInfo() { - List result = Lists.newArrayList(); - result.add(Long.toString(ndv)); - result.add(Float.toString(avgSize)); - result.add(Long.toString(maxSize)); - result.add(Long.toString(numNulls)); - if (minValue != null) { - result.add(minValue.getStringValue()); - } else { - result.add("N/A"); - } - if (maxValue != null) { - result.add(maxValue.getStringValue()); - } else { - result.add("N/A"); - } - return result; + // TODO: The generated statistics should not be modified + public void updateBySelectivity(double selectivity) { + ndv = (long) Math.ceil(ndv * selectivity); + numNulls = (long) Math.ceil(numNulls * selectivity); } private LiteralExpr validateColumnValue(Type type, String columnValue) throws AnalysisException { @@ -248,14 +317,4 @@ public class ColumnStats { throw new AnalysisException("Unsupported setting this type: " + type + " of min max value"); } } - - public ColumnStats copy() { - return new ColumnStats(this); - } - - public ColumnStats updateBySelectivity(double selectivity) { - ndv = (long) Math.ceil(ndv * selectivity); - numNulls = (long) Math.ceil(numNulls * selectivity); - return this; - } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/PartitionStats.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/PartitionStats.java index 4720525e1b..1ef48fc0c0 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/PartitionStats.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/PartitionStats.java @@ -28,7 +28,6 @@ import java.util.List; import java.util.Map; import java.util.function.Predicate; - /** * There are the statistics of partition. * The partition stats are mainly used to provide input for the Optimizer's cost model. @@ -36,19 +35,21 @@ import java.util.function.Predicate; * - @rowCount: The row count of partition. * - @dataSize: The data size of partition. * - @nameToColumnStats: <@String columnName, @ColumnStats columnStats> - * - *

Each column in the Table will have corresponding @ColumnStats. + *

+ * Each column in the Table will have corresponding @ColumnStats. * Those @ColumnStats are recorded in @nameToColumnStats form of MAP. * This facilitates the optimizer to quickly find the corresponding: * - @ColumnStats: based on the column name. * - @rowCount: The row count of partition. * - @dataSize: The data size of partition. - * - *

The granularity of the statistics is whole partition. + *

+ * The granularity of the statistics is whole partition. * For example: "@rowCount = 1000" means that the row count is 1000 in the whole partition. + *

+ * After the statistics task is successfully completed, update the PartitionStats, + * PartitionStats should not be updated in any other way. */ public class PartitionStats { - public static final StatsType DATA_SIZE = StatsType.DATA_SIZE; public static final StatsType ROW_COUNT = StatsType.ROW_COUNT; @@ -59,8 +60,19 @@ public class PartitionStats { private long dataSize = -1; private final Map nameToColumnStats = Maps.newConcurrentMap(); - public Map getNameToColumnStats() { - return nameToColumnStats; + /** + * Return a default partition statistic. + */ + public static PartitionStats getDefaultPartitionStats() { + return new PartitionStats(); + } + + public PartitionStats() { + } + + public PartitionStats(long rowCount, long dataSize) { + this.rowCount = rowCount; + this.dataSize = dataSize; } public long getRowCount() { @@ -79,11 +91,35 @@ public class PartitionStats { this.dataSize = dataSize; } + public Map getNameToColumnStats() { + return nameToColumnStats; + } + + public ColumnStats getColumnStats(String columnName) { + return nameToColumnStats.get(columnName); + } + /** - * Update the partition stats. - * - * @param statsTypeToValue the map of stats type to value - * @throws AnalysisException if the stats value is not valid + * If the column statistics do not exist, the default statistics will be returned. + */ + public ColumnStats getColumnStatsOrDefault(String columnName) { + return nameToColumnStats.getOrDefault(columnName, + ColumnStats.getDefaultColumnStats()); + } + + /** + * Show the partition row count and data size. + */ + public List getShowInfo() { + List result = Lists.newArrayList(); + result.add(Long.toString(rowCount)); + result.add(Long.toString(dataSize)); + return result; + } + + /** + * After the statistics task is successfully completed, update the statistics of the partition, + * statistics should not be updated in any other way. */ public void updatePartitionStats(Map statsTypeToValue) throws AnalysisException { for (Map.Entry entry : statsTypeToValue.entrySet()) { @@ -99,6 +135,10 @@ public class PartitionStats { } } + /** + * After the statistics task is successfully completed, update the statistics of the column, + * statistics should not be updated in any other way. + */ public void updateColumnStats(String columnName, Type columnType, Map statsTypeToValue) throws AnalysisException { @@ -112,7 +152,7 @@ public class PartitionStats { * @param columnName column name * @return @ColumnStats */ - public ColumnStats getNotNullColumnStats(String columnName) { + private ColumnStats getNotNullColumnStats(String columnName) { ColumnStats columnStats = nameToColumnStats.get(columnName); if (columnStats == null) { columnStats = new ColumnStats(); @@ -120,14 +160,4 @@ public class PartitionStats { } return columnStats; } - - /** - * show the partition row count and data size. - */ - public List getShowInfo() { - List result = Lists.newArrayList(); - result.add(Long.toString(rowCount)); - result.add(Long.toString(dataSize)); - return result; - } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/Statistics.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/Statistics.java index 0720d5c1fe..0f5609ef64 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/Statistics.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/Statistics.java @@ -23,7 +23,6 @@ import org.apache.doris.common.AnalysisException; import com.google.common.collect.Maps; import java.util.Map; -import java.util.concurrent.locks.ReentrantReadWriteLock; /** * There are the statistics of all tables. @@ -36,84 +35,8 @@ import java.util.concurrent.locks.ReentrantReadWriteLock; * @TableStats based on the table id. */ public class Statistics { - private final ReentrantReadWriteLock lock = new ReentrantReadWriteLock(true); - private final Map idToTableStats = Maps.newConcurrentMap(); - public void readLock() { - lock.readLock().lock(); - } - - public void readUnlock() { - lock.readLock().unlock(); - } - - private void writeLock() { - lock.writeLock().lock(); - } - - private void writeUnlock() { - lock.writeLock().unlock(); - } - - public void updateTableStats(long tableId, Map statsTypeToValue) throws AnalysisException { - writeLock(); - try { - TableStats tableStats = getNotNullTableStats(tableId); - tableStats.updateTableStats(statsTypeToValue); - } finally { - writeUnlock(); - } - } - - public void updatePartitionStats(long tableId, String partitionName, Map statsTypeToValue) - throws AnalysisException { - writeLock(); - try { - TableStats tableStats = getNotNullTableStats(tableId); - tableStats.updatePartitionStats(partitionName, statsTypeToValue); - } finally { - writeUnlock(); - } - } - - public void updateColumnStats(long tableId, String columnName, Type columnType, - Map statsTypeToValue) throws AnalysisException { - writeLock(); - try { - TableStats tableStats = getNotNullTableStats(tableId); - tableStats.updateColumnStats(columnName, columnType, statsTypeToValue); - } finally { - writeUnlock(); - } - } - - public void updateColumnStats(long tableId, String partitionName, String columnName, Type columnType, - Map statsTypeToValue) throws AnalysisException { - writeLock(); - try { - PartitionStats partitionStats = getNotNullPartitionStats(tableId, partitionName); - partitionStats.updateColumnStats(columnName, columnType, statsTypeToValue); - } finally { - writeUnlock(); - } - } - - /** - * if the table stats is not exist, create a new one. - * - * @param tableId table id - * @return @TableStats - */ - public TableStats getNotNullTableStats(long tableId) { - TableStats tableStats = idToTableStats.get(tableId); - if (tableStats == null) { - tableStats = new TableStats(); - idToTableStats.put(tableId, tableStats); - } - return tableStats; - } - /** * Get the table stats for the given table id. * @@ -130,21 +53,10 @@ public class Statistics { } /** - * if the partition stats is not exist, create a new one. - * - * @param tableId table id - * @param partitionName partition name - * @return @TableStats + * If the table statistics do not exist, the default statistics will be returned. */ - public PartitionStats getNotNullPartitionStats(long tableId, String partitionName) { - TableStats tableStats = getNotNullTableStats(tableId); - Map nameToPartitionStats = tableStats.getNameToPartitionStats(); - PartitionStats partitionStats = nameToPartitionStats.get(partitionName); - if (partitionStats == null) { - partitionStats = new PartitionStats(); - nameToPartitionStats.put(partitionName, partitionStats); - } - return partitionStats; + public TableStats getTableStatsOrDefault(long tableId) throws AnalysisException { + return idToTableStats.getOrDefault(tableId, TableStats.getDefaultTableStats()); } /** @@ -216,21 +128,76 @@ public class Statistics { return partitionStat.getNameToColumnStats(); } + public void updateTableStats(long tableId, Map statsTypeToValue) throws AnalysisException { + synchronized (this) { + TableStats tableStats = getNotNullTableStats(tableId); + tableStats.updateTableStats(statsTypeToValue); + } + } + + public void updatePartitionStats(long tableId, String partitionName, Map statsTypeToValue) + throws AnalysisException { + synchronized (this) { + TableStats tableStats = getNotNullTableStats(tableId); + tableStats.updatePartitionStats(partitionName, statsTypeToValue); + } + } + + public void updateColumnStats(long tableId, String columnName, Type columnType, + Map statsTypeToValue) throws AnalysisException { + synchronized (this) { + TableStats tableStats = getNotNullTableStats(tableId); + tableStats.updateColumnStats(columnName, columnType, statsTypeToValue); + } + } + + public void updateColumnStats(long tableId, String partitionName, String columnName, Type columnType, + Map statsTypeToValue) throws AnalysisException { + synchronized (this) { + PartitionStats partitionStats = getNotNullPartitionStats(tableId, partitionName); + partitionStats.updateColumnStats(columnName, columnType, statsTypeToValue); + } + } + // TODO: mock statistics need to be removed in the future public void mockTableStatsWithRowCount(long tableId, long rowCount) { + TableStats tableStats = idToTableStats.get(tableId); + if (tableStats == null) { + tableStats = new TableStats(rowCount, 1); + idToTableStats.put(tableId, tableStats); + } + } + + /** + * if the table stats is not exist, create a new one. + * + * @param tableId table id + * @return @TableStats + */ + private TableStats getNotNullTableStats(long tableId) { TableStats tableStats = idToTableStats.get(tableId); if (tableStats == null) { tableStats = new TableStats(); idToTableStats.put(tableId, tableStats); } - - if (tableStats.getRowCount() != rowCount) { - tableStats.setRowCount(rowCount); - } + return tableStats; } - // Used for unit test - public void putTableStats(long id, TableStats tableStats) { - this.idToTableStats.put(id, tableStats); + /** + * if the partition stats is not exist, create a new one. + * + * @param tableId table id + * @param partitionName partition name + * @return @TableStats + */ + private PartitionStats getNotNullPartitionStats(long tableId, String partitionName) { + TableStats tableStats = getNotNullTableStats(tableId); + Map nameToPartitionStats = tableStats.getNameToPartitionStats(); + PartitionStats partitionStats = nameToPartitionStats.get(partitionName); + if (partitionStats == null) { + partitionStats = new PartitionStats(); + nameToPartitionStats.put(partitionName, partitionStats); + } + return partitionStats; } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsJobManager.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsJobManager.java index a8afd9850f..3bb74af732 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsJobManager.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsJobManager.java @@ -30,6 +30,7 @@ import org.apache.doris.common.ErrorReport; import org.apache.doris.common.UserException; import org.apache.doris.common.util.ListComparator; import org.apache.doris.common.util.OrderByPair; +import org.apache.doris.qe.ConnectContext; import com.google.common.base.Strings; import com.google.common.collect.Lists; @@ -42,7 +43,6 @@ import java.util.Collections; import java.util.List; import java.util.Map; import java.util.Set; -import java.util.concurrent.locks.ReentrantReadWriteLock; import java.util.stream.Collectors; /** @@ -57,39 +57,28 @@ public class StatisticsJobManager { */ private final Map idToStatisticsJob = Maps.newConcurrentMap(); - private final ReentrantReadWriteLock lock = new ReentrantReadWriteLock(true); - - public void readLock() { - lock.readLock().lock(); - } - - public void readUnlock() { - lock.readLock().unlock(); - } - - private void writeLock() { - lock.writeLock().lock(); - } - - private void writeUnlock() { - lock.writeLock().unlock(); - } - public Map getIdToStatisticsJob() { return idToStatisticsJob; } public void createStatisticsJob(AnalyzeStmt analyzeStmt) throws UserException { - // step1: init statistics job by analyzeStmt - StatisticsJob statisticsJob = StatisticsJob.fromAnalyzeStmt(analyzeStmt); - writeLock(); - try { - // step2: check restrict - checkRestrict(analyzeStmt.getDbId(), statisticsJob.getTblIds()); - // step3: create it - createStatisticsJob(statisticsJob); - } finally { - writeUnlock(); + // The current statistics are only used for CBO test, + // and are not available to users. (work in progress) + // TODO(wzt): Further tests are needed + boolean enableCboStatistics = ConnectContext.get() + .getSessionVariable().getEnableCboStatistics(); + if (enableCboStatistics) { + // step1: init statistics job by analyzeStmt + StatisticsJob statisticsJob = StatisticsJob.fromAnalyzeStmt(analyzeStmt); + synchronized (this) { + // step2: check restrict + checkRestrict(analyzeStmt.getDbId(), statisticsJob.getTblIds()); + // step3: create it + createStatisticsJob(statisticsJob); + } + } else { + throw new UserException("Statistics are not yet stable, if you want to enable statistics," + + " use 'set enable_cbo_statistics=true' to enable it."); } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/TableStats.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/TableStats.java index abc977c292..3267580c5f 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/TableStats.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/TableStats.java @@ -17,7 +17,6 @@ package org.apache.doris.statistics; -import org.apache.doris.analysis.LiteralExpr; import org.apache.doris.catalog.Type; import org.apache.doris.common.AnalysisException; import org.apache.doris.common.util.Util; @@ -29,7 +28,6 @@ import java.util.List; import java.util.Map; import java.util.function.Predicate; - /** * There are the statistics of table. * The table stats are mainly used to provide input for the Optimizer's cost model. @@ -37,17 +35,21 @@ import java.util.function.Predicate; * - @rowCount: The row count of table. * - @dataSize: The data size of table. * - @nameToColumnStats: <@String columnName, @ColumnStats columnStats> - *

Each column in the Table will have corresponding @ColumnStats. + *

+ * Each column in the Table will have corresponding @ColumnStats. * Those @ColumnStats are recorded in @nameToColumnStats form of MAP. * This facilitates the optimizer to quickly find the corresponding: * - @ColumnStats based on the column name. * - @rowCount: The row count of table. * - @dataSize: The data size of table. - *

The granularity of the statistics is whole table. + *

+ * The granularity of the statistics is whole table. * For example: "@rowCount = 1000" means that the row count is 1000 in the whole table. + *

+ * After the statistics task is successfully completed, update the TableStats, + * TableStats should not be updated in any other way. */ public class TableStats { - public static final StatsType DATA_SIZE = StatsType.DATA_SIZE; public static final StatsType ROW_COUNT = StatsType.ROW_COUNT; @@ -59,8 +61,23 @@ public class TableStats { private final Map nameToPartitionStats = Maps.newConcurrentMap(); private final Map nameToColumnStats = Maps.newConcurrentMap(); + /** + * Return a default partition statistic. + */ + public static TableStats getDefaultTableStats() { + return new TableStats(); + } + + public TableStats() { + } + + public TableStats(long rowCount, long dataSize) { + this.rowCount = rowCount; + this.dataSize = dataSize; + } + public long getRowCount() { - if (rowCount == -1) { + if (rowCount == -1) { return nameToPartitionStats.values().stream() .filter(partitionStats -> partitionStats.getRowCount() != -1) .mapToLong(PartitionStats::getRowCount).sum(); @@ -68,10 +85,6 @@ public class TableStats { return rowCount; } - public void setRowCount(long rowCount) { - this.rowCount = rowCount; - } - public long getDataSize() { if (dataSize == -1) { return nameToPartitionStats.values().stream() @@ -92,10 +105,46 @@ public class TableStats { return nameToColumnStats; } - public void setDataSize(long dataSize) { - this.dataSize = dataSize; + public PartitionStats getPartitionStats(String partitionName) { + return nameToPartitionStats.get(partitionName); } + /** + * If the partition statistics do not exist, the default statistics will be returned. + */ + public PartitionStats getPartitionStatsOrDefault(String columnName) { + return nameToPartitionStats.getOrDefault(columnName, + PartitionStats.getDefaultPartitionStats()); + } + + public ColumnStats getColumnStats(String columnName) { + return nameToColumnStats.get(columnName); + } + + /** + * If the column statistics do not exist, the default statistics will be returned. + */ + public ColumnStats getColumnStatsOrDefault(String columnName) { + return nameToColumnStats.getOrDefault(columnName, + ColumnStats.getDefaultColumnStats()); + } + + public List getShowInfo() { + List result = Lists.newArrayList(); + result.add(Long.toString(getRowCount())); + result.add(Long.toString(getDataSize())); + return result; + } + + public List getShowInfo(String partitionName) { + PartitionStats partitionStats = nameToPartitionStats.get(partitionName); + return partitionStats.getShowInfo(); + } + + /** + * After the statistics task is successfully completed, update the statistics of the partition, + * statistics should not be updated in any other way. + */ public void updateTableStats(Map statsTypeToValue) throws AnalysisException { for (Map.Entry entry : statsTypeToValue.entrySet()) { if (entry.getKey() == ROW_COUNT) { @@ -108,18 +157,42 @@ public class TableStats { } } + /** + * After the statistics task is successfully completed, update the statistics of the partition, + * statistics should not be updated in any other way. + */ public void updatePartitionStats(String partitionName, Map statsTypeToValue) throws AnalysisException { PartitionStats partitionStats = getNotNullPartitionStats(partitionName); partitionStats.updatePartitionStats(statsTypeToValue); } + /** + * After the statistics task is successfully completed, update the statistics of the column, + * statistics should not be updated in any other way. + */ public void updateColumnStats(String columnName, Type columnType, Map statsTypeToValue) throws AnalysisException { - ColumnStats columnStats = getColumnStats(columnName); + ColumnStats columnStats = getNotNullColumnStats(columnName); columnStats.updateStats(columnType, statsTypeToValue); } + private Map getAggPartitionColStats() { + Map aggColumnStats = Maps.newConcurrentMap(); + for (PartitionStats partitionStats : nameToPartitionStats.values()) { + partitionStats.getNameToColumnStats().forEach((colName, columnStats) -> { + if (!aggColumnStats.containsKey(colName)) { + aggColumnStats.put(colName, columnStats); + } else { + ColumnStats oldColumnStats = aggColumnStats.get(colName); + ColumnStats newColumnStats = ColumnStats.mergeColumnStats(columnStats, oldColumnStats); + aggColumnStats.put(colName, newColumnStats); + } + }); + } + return aggColumnStats; + } + /** * If partition stats is not exist, create a new one. * @@ -141,7 +214,7 @@ public class TableStats { * @param columnName column name * @return @ColumnStats */ - public ColumnStats getColumnStats(String columnName) { + private ColumnStats getNotNullColumnStats(String columnName) { ColumnStats columnStats = nameToColumnStats.get(columnName); if (columnStats == null) { columnStats = new ColumnStats(); @@ -149,109 +222,4 @@ public class TableStats { } return columnStats; } - - public List getShowInfo() { - List result = Lists.newArrayList(); - result.add(Long.toString(getRowCount())); - result.add(Long.toString(getDataSize())); - return result; - } - - public List getShowInfo(String partitionName) { - PartitionStats partitionStats = nameToPartitionStats.get(partitionName); - return partitionStats.getShowInfo(); - } - - private Map getAggPartitionColStats() { - Map aggColumnStats = Maps.newConcurrentMap(); - for (PartitionStats partitionStats : nameToPartitionStats.values()) { - partitionStats.getNameToColumnStats().forEach((colName, columnStats) -> { - if (!aggColumnStats.containsKey(colName)) { - aggColumnStats.put(colName, columnStats); - } else { - ColumnStats tblColStats = aggColumnStats.get(colName); - aggPartitionColumnStats(tblColStats, columnStats); - } - }); - } - - return aggColumnStats; - } - - private void aggPartitionColumnStats(ColumnStats leftStats, ColumnStats rightStats) { - if (leftStats.getNdv() == -1) { - if (rightStats.getNdv() != -1) { - leftStats.setNdv(rightStats.getNdv()); - } - } else { - if (rightStats.getNdv() != -1) { - long ndv = leftStats.getNdv() + rightStats.getNdv(); - leftStats.setNdv(ndv); - } - } - - if (leftStats.getAvgSize() == -1) { - if (rightStats.getAvgSize() != -1) { - leftStats.setAvgSize(rightStats.getAvgSize()); - } - } else { - if (rightStats.getAvgSize() != -1) { - float avgSize = (leftStats.getAvgSize() + rightStats.getAvgSize()) / 2; - leftStats.setAvgSize(avgSize); - } - } - - if (leftStats.getMaxSize() == -1) { - if (rightStats.getMaxSize() != -1) { - leftStats.setMaxSize(rightStats.getMaxSize()); - } - } else { - if (rightStats.getMaxSize() != -1) { - long maxSize = Math.max(leftStats.getMaxSize(), rightStats.getMaxSize()); - leftStats.setMaxSize(maxSize); - } - } - - if (leftStats.getNumNulls() == -1) { - if (rightStats.getNumNulls() != -1) { - leftStats.setNumNulls(rightStats.getNumNulls()); - } - } else { - if (rightStats.getNumNulls() != -1) { - long numNulls = leftStats.getNumNulls() + rightStats.getNumNulls(); - leftStats.setNumNulls(numNulls); - } - } - - if (leftStats.getMinValue() == null) { - if (rightStats.getMinValue() != null) { - leftStats.setMinValue(rightStats.getMinValue()); - } - } else { - if (rightStats.getMinValue() != null) { - LiteralExpr minValue = leftStats.getMinValue().compareTo(rightStats.getMinValue()) > 0 - ? leftStats.getMinValue() : rightStats.getMinValue(); - leftStats.setMinValue(minValue); - } - } - - if (leftStats.getMaxValue() == null) { - if (rightStats.getMaxValue() != null) { - leftStats.setMaxValue(rightStats.getMaxValue()); - } - } else { - if (rightStats.getMaxValue() != null) { - LiteralExpr maxValue = leftStats.getMaxValue().compareTo(rightStats.getMaxValue()) > 0 - ? leftStats.getMaxValue() : rightStats.getMaxValue(); - leftStats.setMaxValue(maxValue); - } - } - } - - /** - * This method is for unit test. - */ - public void putColumnStats(String name, ColumnStats columnStats) { - nameToColumnStats.put(name, columnStats); - } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/TabletStats.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/TabletStats.java index ac4a72f3e5..756b82d2de 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/TabletStats.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/TabletStats.java @@ -37,7 +37,6 @@ package org.apache.doris.statistics; * "@rowCount = 10" means that the row count is 1000 in one tablet. */ public class TabletStats { - private long rowCount; private long dataSize; } diff --git a/fe/fe-core/src/test/java/org/apache/doris/nereids/jobs/cascades/DeriveStatsJobTest.java b/fe/fe-core/src/test/java/org/apache/doris/nereids/jobs/cascades/DeriveStatsJobTest.java index 7435f508f7..d4c8c8e47e 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/nereids/jobs/cascades/DeriveStatsJobTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/nereids/jobs/cascades/DeriveStatsJobTest.java @@ -17,8 +17,10 @@ package org.apache.doris.nereids.jobs.cascades; +import org.apache.doris.analysis.NullLiteral; import org.apache.doris.catalog.Env; import org.apache.doris.catalog.OlapTable; +import org.apache.doris.common.AnalysisException; import org.apache.doris.nereids.CascadesContext; import org.apache.doris.nereids.jobs.JobContext; import org.apache.doris.nereids.properties.LogicalProperties; @@ -43,6 +45,8 @@ import org.apache.doris.statistics.TableStats; import com.google.common.collect.ImmutableList; import mockit.Expectations; +import mockit.Mock; +import mockit.MockUp; import mockit.Mocked; import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.Test; @@ -78,15 +82,19 @@ public class DeriveStatsJobTest { Assertions.assertEquals(1, statistics.getRowCount()); } - private LogicalOlapScan constructOlapSCan() { - ColumnStats columnStats1 = new ColumnStats(); - columnStats1.setNdv(10); - columnStats1.setNumNulls(5); + private LogicalOlapScan constructOlapSCan() throws AnalysisException { + ColumnStats columnStats1 = new ColumnStats(10, 0, 0, 5, + new NullLiteral(), new NullLiteral()); + new MockUp(TableStats.class) { + @Mock + public ColumnStats getColumnStats(String columnName) { + return columnStats1; + } + }; + long tableId1 = 0; - TableStats tableStats1 = new TableStats(); - tableStats1.putColumnStats("c1", columnStats1); Statistics statistics = new Statistics(); - statistics.putTableStats(tableId1, tableStats1); + List qualifier = ImmutableList.of("test", "t"); slot1 = new SlotReference("c1", IntegerType.INSTANCE, true, qualifier); new Expectations() {{ diff --git a/fe/fe-core/src/test/java/org/apache/doris/nereids/stats/StatsCalculatorTest.java b/fe/fe-core/src/test/java/org/apache/doris/nereids/stats/StatsCalculatorTest.java index 246114ca5f..1767a0940f 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/nereids/stats/StatsCalculatorTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/nereids/stats/StatsCalculatorTest.java @@ -17,8 +17,10 @@ package org.apache.doris.nereids.stats; +import org.apache.doris.analysis.NullLiteral; import org.apache.doris.catalog.Env; import org.apache.doris.catalog.OlapTable; +import org.apache.doris.common.AnalysisException; import org.apache.doris.nereids.memo.Group; import org.apache.doris.nereids.memo.GroupExpression; import org.apache.doris.nereids.properties.LogicalProperties; @@ -46,6 +48,8 @@ import org.apache.doris.statistics.TableStats; import com.google.common.base.Supplier; import com.google.common.collect.ImmutableList; import mockit.Expectations; +import mockit.Mock; +import mockit.MockUp; import mockit.Mocked; import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.Test; @@ -112,12 +116,10 @@ public class StatsCalculatorTest { qualifier.add("t"); SlotReference slot1 = new SlotReference("c1", IntegerType.INSTANCE, true, qualifier); SlotReference slot2 = new SlotReference("c2", IntegerType.INSTANCE, true, qualifier); - ColumnStats columnStats1 = new ColumnStats(); - columnStats1.setNdv(10); - columnStats1.setNumNulls(5); - ColumnStats columnStats2 = new ColumnStats(); - columnStats2.setNdv(20); - columnStats1.setNumNulls(10); + ColumnStats columnStats1 = new ColumnStats(10, 0, 0, 5, + new NullLiteral(), new NullLiteral()); + ColumnStats columnStats2 = new ColumnStats(20, 0, 0, 10, + new NullLiteral(), new NullLiteral()); Map slotColumnStatsMap = new HashMap<>(); slotColumnStatsMap.put(slot1, columnStats1); slotColumnStatsMap.put(slot2, columnStats2); @@ -196,15 +198,19 @@ public class StatsCalculatorTest { // } @Test - public void testOlapScan() { - ColumnStats columnStats1 = new ColumnStats(); - columnStats1.setNdv(10); - columnStats1.setNumNulls(5); + public void testOlapScan() throws AnalysisException { + ColumnStats columnStats1 = new ColumnStats(10, 0, 0, 5, + new NullLiteral(), new NullLiteral()); + new MockUp(TableStats.class) { + @Mock + public ColumnStats getColumnStats(String columnName) { + return columnStats1; + } + }; + long tableId1 = 0; - TableStats tableStats1 = new TableStats(); - tableStats1.putColumnStats("c1", columnStats1); Statistics statistics = new Statistics(); - statistics.putTableStats(tableId1, tableStats1); + List qualifier = ImmutableList.of("test", "t"); SlotReference slot1 = new SlotReference("c1", IntegerType.INSTANCE, true, qualifier); new Expectations() {{ @@ -237,9 +243,8 @@ public class StatsCalculatorTest { qualifier.add("test"); qualifier.add("t"); SlotReference slot1 = new SlotReference("c1", IntegerType.INSTANCE, true, qualifier); - ColumnStats columnStats1 = new ColumnStats(); - columnStats1.setNdv(10); - columnStats1.setNumNulls(5); + ColumnStats columnStats1 = new ColumnStats(10, 1, 1, 5, + new NullLiteral(), new NullLiteral()); Map slotColumnStatsMap = new HashMap<>(); slotColumnStatsMap.put(slot1, columnStats1); StatsDeriveResult childStats = new StatsDeriveResult(10, slotColumnStatsMap); @@ -268,9 +273,8 @@ public class StatsCalculatorTest { qualifier.add("test"); qualifier.add("t"); SlotReference slot1 = new SlotReference("c1", IntegerType.INSTANCE, true, qualifier); - ColumnStats columnStats1 = new ColumnStats(); - columnStats1.setNdv(10); - columnStats1.setNumNulls(5); + ColumnStats columnStats1 = new ColumnStats(10, 0, 0, 5, + new NullLiteral(), new NullLiteral()); Map slotColumnStatsMap = new HashMap<>(); slotColumnStatsMap.put(slot1, columnStats1); StatsDeriveResult childStats = new StatsDeriveResult(10, slotColumnStatsMap); diff --git a/fe/fe-core/src/test/java/org/apache/doris/statistics/ColumnStatsTest.java b/fe/fe-core/src/test/java/org/apache/doris/statistics/ColumnStatsTest.java new file mode 100644 index 0000000000..c6ec604a60 --- /dev/null +++ b/fe/fe-core/src/test/java/org/apache/doris/statistics/ColumnStatsTest.java @@ -0,0 +1,159 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.statistics; + +import org.apache.doris.analysis.LiteralExpr; +import org.apache.doris.analysis.NullLiteral; +import org.apache.doris.catalog.PrimitiveType; +import org.apache.doris.catalog.Type; +import org.apache.doris.common.AnalysisException; + +import org.junit.Assert; +import org.junit.Before; +import org.junit.Test; + +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Objects; + +public class ColumnStatsTest { + private ColumnStats columnStatsUnderTest; + + @Before + public void setUp() throws Exception { + columnStatsUnderTest = new ColumnStats(); + } + + @Test + public void testUpdateStats() throws Exception { + // Setup + Type columnType = Type.fromPrimitiveType(PrimitiveType.BIGINT); + Map statsTypeToValue = new HashMap<>(); + statsTypeToValue.put(StatsType.MAX_SIZE, "8"); + statsTypeToValue.put(StatsType.MIN_VALUE, "0"); + statsTypeToValue.put(StatsType.MAX_VALUE, "100"); + + // Run the test + columnStatsUnderTest.updateStats(columnType, statsTypeToValue); + + // Verify the results + long maxSize = columnStatsUnderTest.getMaxSize(); + Assert.assertEquals(8, maxSize); + + long minValue = columnStatsUnderTest.getMinValue().getLongValue(); + Assert.assertEquals(0, minValue); + + long maxValue = columnStatsUnderTest.getMaxValue().getLongValue(); + Assert.assertEquals(100, maxValue); + } + + @Test + public void testUpdateStats_ThrowsAnalysisException() { + // Setup + Type columnType = Type.fromPrimitiveType(PrimitiveType.BIGINT); + Map statsTypeToValue = new HashMap<>(); + statsTypeToValue.put(StatsType.AVG_SIZE, "abc"); + + // Run the test + Assert.assertThrows(AnalysisException.class, + () -> columnStatsUnderTest.updateStats(columnType, statsTypeToValue)); + } + + @Test + public void testGetShowInfo() throws AnalysisException { + // Setup + Type columnType = Type.fromPrimitiveType(PrimitiveType.BIGINT); + Map statsTypeToValue = new HashMap<>(); + statsTypeToValue.put(StatsType.NDV, "1"); + statsTypeToValue.put(StatsType.AVG_SIZE, "8"); + statsTypeToValue.put(StatsType.MAX_SIZE, "8"); + statsTypeToValue.put(StatsType.NUM_NULLS, "2"); + statsTypeToValue.put(StatsType.MIN_VALUE, "0"); + statsTypeToValue.put(StatsType.MAX_VALUE, "1000"); + + columnStatsUnderTest.updateStats(columnType, statsTypeToValue); + String[] expectedInfo = {"1", "8.0", "8", "2", "0", "1000"}; + + // Run the test + List showInfo = columnStatsUnderTest.getShowInfo(); + String[] result = showInfo.toArray(new String[0]); + + // Verify the results + Assert.assertArrayEquals(expectedInfo, result); + } + + @Test + public void testGetDefaultColumnStats() { + // Run the test + ColumnStats defaultColumnStats = ColumnStats.getDefaultColumnStats(); + + // Verify the results + long ndv = defaultColumnStats.getNdv(); + Assert.assertEquals(-1L, ndv); + + float avgSize = defaultColumnStats.getAvgSize(); + Assert.assertEquals(-1.0f, avgSize, 0.0001); + + long maxSize = defaultColumnStats.getMaxSize(); + Assert.assertEquals(-1L, maxSize); + + LiteralExpr maxValue = defaultColumnStats.getMaxValue(); + Assert.assertEquals(new NullLiteral(), maxValue); + + LiteralExpr minValue = defaultColumnStats.getMinValue(); + Assert.assertEquals(new NullLiteral(), minValue); + } + + @Test + public void testAggColumnStats() throws Exception { + // Setup + ColumnStats columnStats = ColumnStats.getDefaultColumnStats(); + Type minValueType = Objects.requireNonNull(Type.fromPrimitiveType(PrimitiveType.STRING)); + Type maxValueType = Objects.requireNonNull(Type.fromPrimitiveType(PrimitiveType.STRING)); + ColumnStats other = new ColumnStats(1L, 4.0f, 5L, 10L, + LiteralExpr.create("sMinValue", minValueType), + LiteralExpr.create("sMaxValue", maxValueType)); + + // Run the test + ColumnStats aggColumnStats = ColumnStats.mergeColumnStats(columnStats, other); + + // Verify the results + long ndv = aggColumnStats.getNdv(); + // 0(default) + 1 + Assert.assertEquals(1L, ndv); + + float avgSize = aggColumnStats.getAvgSize(); + // (0.0f + 4.0f) / 2 + Assert.assertEquals(4.0f, avgSize, 0.0001); + + long maxSize = aggColumnStats.getMaxSize(); + Assert.assertEquals(5L, maxSize); + + long numNulls = aggColumnStats.getNumNulls(); + Assert.assertEquals(10L, numNulls); + + String minValue = aggColumnStats.getMinValue().getStringValue(); + // null VS sMinValue + Assert.assertEquals("NULL", minValue); + + String maxValue = aggColumnStats.getMaxValue().getStringValue(); + // null VS sMaxValue + Assert.assertEquals("sMaxValue", maxValue); + } +} diff --git a/fe/fe-core/src/test/java/org/apache/doris/statistics/PartitionStatsTest.java b/fe/fe-core/src/test/java/org/apache/doris/statistics/PartitionStatsTest.java new file mode 100644 index 0000000000..fe9bb67bce --- /dev/null +++ b/fe/fe-core/src/test/java/org/apache/doris/statistics/PartitionStatsTest.java @@ -0,0 +1,136 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.statistics; + +import org.apache.doris.catalog.PrimitiveType; +import org.apache.doris.catalog.Type; +import org.apache.doris.common.AnalysisException; + +import org.junit.Assert; +import org.junit.Before; +import org.junit.Test; + +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +public class PartitionStatsTest { + private PartitionStats partitionStatsUnderTest; + + @Before + public void setUp() throws Exception { + partitionStatsUnderTest = new PartitionStats(); + } + + @Test + public void testUpdatePartitionStats() throws Exception { + // Setup + Map statsTypeToValue = new HashMap<>(); + statsTypeToValue.put(StatsType.ROW_COUNT, "1000"); + statsTypeToValue.put(StatsType.DATA_SIZE, "10240"); + + // Run the test + partitionStatsUnderTest.updatePartitionStats(statsTypeToValue); + + // Verify the results + long rowCount = partitionStatsUnderTest.getRowCount(); + Assert.assertEquals(1000, rowCount); + + long dataSize = partitionStatsUnderTest.getDataSize(); + Assert.assertEquals(10240, dataSize); + } + + @Test + public void testUpdatePartitionStats_ThrowsAnalysisException() { + // Setup + Map statsTypeToValue = new HashMap<>(); + statsTypeToValue.put(StatsType.AVG_SIZE, "8"); + statsTypeToValue.put(StatsType.ROW_COUNT, "abc"); + + // Run the test + Assert.assertThrows(AnalysisException.class, + () -> partitionStatsUnderTest.updatePartitionStats(statsTypeToValue)); + } + + @Test + public void testUpdateColumnStats() throws Exception { + // Setup + Type columnType = Type.fromPrimitiveType(PrimitiveType.BIGINT); + Map statsTypeToValue = new HashMap<>(); + statsTypeToValue.put(StatsType.NDV, "1"); + statsTypeToValue.put(StatsType.AVG_SIZE, "8"); + statsTypeToValue.put(StatsType.MAX_SIZE, "8"); + statsTypeToValue.put(StatsType.NUM_NULLS, "2"); + statsTypeToValue.put(StatsType.MIN_VALUE, "0"); + statsTypeToValue.put(StatsType.MAX_VALUE, "1000"); + + // Run the test + partitionStatsUnderTest.updateColumnStats("columnName", columnType, statsTypeToValue); + ColumnStats columnStats = partitionStatsUnderTest.getColumnStats("columnName"); + + // Verify the results + long ndv = columnStats.getNdv(); + Assert.assertEquals(1, ndv); + + float avgSize = columnStats.getAvgSize(); + Assert.assertEquals(8.0f, avgSize, 0.0001); + + long maxSize = columnStats.getMaxSize(); + Assert.assertEquals(8, maxSize); + + long maxValue = columnStats.getMaxValue().getLongValue(); + Assert.assertEquals(1000, maxValue); + + long minValue = columnStats.getMinValue().getLongValue(); + Assert.assertEquals(0, minValue); + + long numNulls = columnStats.getNumNulls(); + Assert.assertEquals(2, numNulls); + } + + @Test + public void testUpdateColumnStats_ThrowsAnalysisException() { + // Setup + Type columnType = Type.fromPrimitiveType(PrimitiveType.BIGINT); + Map statsTypeToValue = new HashMap<>(); + statsTypeToValue.put(StatsType.AVG_SIZE, "abc"); + + // Run the test + Assert.assertThrows( + AnalysisException.class, () -> partitionStatsUnderTest + .updateColumnStats("columnName", columnType, statsTypeToValue)); + } + + @Test + public void testGetShowInfo() throws AnalysisException { + // Setup + Map statsTypeToValue = new HashMap<>(); + statsTypeToValue.put(StatsType.ROW_COUNT, "1000"); + statsTypeToValue.put(StatsType.DATA_SIZE, "10240"); + + partitionStatsUnderTest.updatePartitionStats(statsTypeToValue); + String[] expectedInfo = {"1000", "10240"}; + + // Run the test + List showInfo = partitionStatsUnderTest.getShowInfo(); + String[] result = showInfo.toArray(new String[0]); + + // Run the test + Assert.assertArrayEquals(expectedInfo, result); + } +} diff --git a/fe/fe-core/src/test/java/org/apache/doris/statistics/StatisticsJobSchedulerTest.java b/fe/fe-core/src/test/java/org/apache/doris/statistics/StatisticsJobSchedulerTest.java new file mode 100644 index 0000000000..717fd7aa14 --- /dev/null +++ b/fe/fe-core/src/test/java/org/apache/doris/statistics/StatisticsJobSchedulerTest.java @@ -0,0 +1,173 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.statistics; + +import org.apache.doris.catalog.Column; +import org.apache.doris.catalog.Database; +import org.apache.doris.catalog.Env; +import org.apache.doris.catalog.HashDistributionInfo; +import org.apache.doris.catalog.KeysType; +import org.apache.doris.catalog.OlapTable; +import org.apache.doris.catalog.PartitionInfo; +import org.apache.doris.catalog.PrimitiveType; +import org.apache.doris.common.jmockit.Deencapsulation; +import org.apache.doris.datasource.InternalCatalog; +import org.apache.doris.system.SystemInfoService; + +import com.google.common.collect.Maps; +import com.google.common.collect.Sets; +import mockit.Mock; +import mockit.MockUp; +import org.junit.Assert; +import org.junit.Before; +import org.junit.Test; + +import java.util.Arrays; +import java.util.Collections; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.concurrent.ConcurrentHashMap; + +public class StatisticsJobSchedulerTest { + private StatisticsJob statisticsJob; + + private StatisticsJobScheduler statisticsJobSchedulerUnderTest; + + @Before + public void setUp() throws Exception { + HashSet tblIds = Sets.newHashSet(); + tblIds.add(0L); + tblIds.add(1L); + + Map> tableIdToColumnName = Maps.newHashMap(); + tableIdToColumnName.put(0L, Arrays.asList("c1", "c2")); + tableIdToColumnName.put(1L, Arrays.asList("c1", "c2")); + Map> tblIdToPartitionName = Maps.newHashMap(); + + statisticsJob = new StatisticsJob(0L, tblIds, tblIdToPartitionName, + tableIdToColumnName, null); + statisticsJobSchedulerUnderTest = new StatisticsJobScheduler(); + statisticsJobSchedulerUnderTest.addPendingJob(statisticsJob); + } + + @Test + public void testRunAfterCatalogReady() { + // Setup + Column col1 = new Column("c1", PrimitiveType.STRING); + Column col2 = new Column("c2", PrimitiveType.INT); + OlapTable tbl1 = new OlapTable(0L, "tbl1", Arrays.asList(col1, col2), KeysType.AGG_KEYS, + new PartitionInfo(), new HashDistributionInfo()); + OlapTable tbl2 = new OlapTable(1L, "tbl2", Arrays.asList(col1, col2), KeysType.DUP_KEYS, + new PartitionInfo(), new HashDistributionInfo()); + Database database = new Database(0L, "db"); + database.createTable(tbl1); + database.createTable(tbl2); + + InternalCatalog catalog = Env.getCurrentInternalCatalog(); + ConcurrentHashMap fullNameToDb = new ConcurrentHashMap<>(); + fullNameToDb.put("cluster:db", database); + Deencapsulation.setField(catalog, "fullNameToDb", fullNameToDb); + + ConcurrentHashMap idToDb = new ConcurrentHashMap<>(); + idToDb.put(0L, database); + Deencapsulation.setField(catalog, "idToDb", idToDb); + + new MockUp(SystemInfoService.class) { + @Mock + public List getBackendIds(boolean needAlive) { + return Collections.singletonList(1L); + } + }; + + // Run the test + statisticsJobSchedulerUnderTest.runAfterCatalogReady(); + + /* + * expected results: + * mateTask(2): + * - tbl1: + * - task1: + * - data_size + * - max_size(c2) + * - avg_size(c2) + * - tbl2: + * - task: + * - row_count + * - data_size + * - max_size(c2) + * - avg_size(c2) + * + * sqlTask(11): + * - tbl1: + * - task: + * - ndv(c1) + * - min_value(c1) + * - max_value(c1) + * - task: + * - ndv(c2) + * - min_value(c2) + * - max_value(c2) + * - task: + * - max_size(c1) + * - avg_size(c1) + * - task: + * - num_nulls(c1) + * - task: + * - num_nulls(c2) + * - task + * - row_count + * - tbl2: + * - task: + * - ndv(c1) + * - min_value(c1) + * - max_value(c1) + * - task: + * - ndv(c2) + * - min_value(c2) + * - max_value(c2) + * - task: + * - max_size(c1) + * - avg_size(c1) + * - task: + * - num_nulls(c1) + * - task: + * - num_nulls(c2) + */ + + // Verify the results + List tasks = statisticsJob.getTasks(); + Assert.assertEquals(13, tasks.size()); + + int sqlTaskCount = 0; + int metaTaskCount = 0; + + for (StatisticsTask task : tasks) { + if (task instanceof SQLStatisticsTask) { + sqlTaskCount++; + } else if (task instanceof MetaStatisticsTask) { + metaTaskCount++; + } else { + Assert.fail("Unknown task type."); + } + } + + Assert.assertEquals(2, metaTaskCount); + Assert.assertEquals(11, sqlTaskCount); + } +} diff --git a/fe/fe-core/src/test/java/org/apache/doris/statistics/StatisticsJobTest.java b/fe/fe-core/src/test/java/org/apache/doris/statistics/StatisticsJobTest.java new file mode 100644 index 0000000000..eb6cd576e4 --- /dev/null +++ b/fe/fe-core/src/test/java/org/apache/doris/statistics/StatisticsJobTest.java @@ -0,0 +1,120 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.statistics; + +import org.apache.doris.common.DdlException; +import org.apache.doris.statistics.StatisticsJob.JobState; +import org.apache.doris.statistics.StatisticsTask.TaskState; + +import com.google.common.collect.Maps; +import org.junit.Assert; +import org.junit.Before; +import org.junit.Test; + +import java.util.Collections; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; + +public class StatisticsJobTest { + private StatisticsJob statisticsJobUnderTest; + + private StatisticsTask statisticsTaskUnderTest; + + @Before + public void setUp() throws Exception { + HashSet tblIds = new HashSet<>(Collections.singletonList(0L)); + Map> tblIdToPartitionName = Maps.newHashMap(); + Map> tableIdToColumnName = Maps.newHashMap(); + statisticsJobUnderTest = new StatisticsJob(0L, tblIds, tblIdToPartitionName, + tableIdToColumnName, new HashMap<>()); + + StatsCategory statsCategory = new StatsCategory(); + StatsGranularity statsGranularity = new StatsGranularity(); + List statsTypes = Collections.singletonList(StatsType.ROW_COUNT); + statisticsTaskUnderTest = new SQLStatisticsTask(0L, + Collections.singletonList(new StatisticsDesc(statsCategory, statsGranularity, statsTypes))); + + List tasks = statisticsJobUnderTest.getTasks(); + tasks.add(statisticsTaskUnderTest); + } + + @Test + public void testUpdateJobState() throws Exception { + // Run the test + statisticsJobUnderTest.updateJobState(JobState.SCHEDULING); + + // Verify the results + JobState jobState = statisticsJobUnderTest.getJobState(); + Assert.assertEquals(JobState.SCHEDULING, jobState); + } + + @Test + public void testUpdateJobState_ThrowsDdlException() { + // Run the test + Assert.assertThrows(DdlException.class, + () -> statisticsJobUnderTest.updateJobState(JobState.RUNNING)); + } + + @Test + public void testUpdateJobInfoByTaskId() throws Exception { + // Setup + statisticsJobUnderTest.updateJobState(JobState.SCHEDULING); + statisticsJobUnderTest.updateJobState(JobState.RUNNING); + statisticsTaskUnderTest.updateTaskState(TaskState.RUNNING); + + // Run the test + long taskId = statisticsTaskUnderTest.getId(); + statisticsJobUnderTest.updateJobInfoByTaskId(taskId, ""); + + // Verify the results + JobState jobState = statisticsJobUnderTest.getJobState(); + Assert.assertEquals(JobState.FINISHED, jobState); + + TaskState taskState = statisticsTaskUnderTest.getTaskState(); + Assert.assertEquals(TaskState.FINISHED, taskState); + } + + @Test + public void testUpdateJobInfoByTaskIdFailed() throws Exception { + // Setup + statisticsJobUnderTest.updateJobState(JobState.SCHEDULING); + statisticsJobUnderTest.updateJobState(JobState.RUNNING); + statisticsTaskUnderTest.updateTaskState(TaskState.RUNNING); + + // Run the test + long taskId = statisticsTaskUnderTest.getId(); + statisticsJobUnderTest.updateJobInfoByTaskId(taskId, "errorMsg"); + + // Verify the results + JobState jobState = statisticsJobUnderTest.getJobState(); + Assert.assertEquals(JobState.FAILED, jobState); + + TaskState taskState = statisticsTaskUnderTest.getTaskState(); + Assert.assertEquals(TaskState.FAILED, taskState); + } + + @Test + public void testUpdateJobInfoByTaskId_ThrowsDdlException() { + // Run the test + long taskId = statisticsTaskUnderTest.getId(); + Assert.assertThrows(DdlException.class, + () -> statisticsJobUnderTest.updateJobInfoByTaskId(taskId, "")); + } +} diff --git a/fe/fe-core/src/test/java/org/apache/doris/statistics/StatisticsManagerTest.java b/fe/fe-core/src/test/java/org/apache/doris/statistics/StatisticsManagerTest.java new file mode 100644 index 0000000000..640cb0a1e3 --- /dev/null +++ b/fe/fe-core/src/test/java/org/apache/doris/statistics/StatisticsManagerTest.java @@ -0,0 +1,120 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.statistics; + +import org.apache.doris.catalog.Column; +import org.apache.doris.catalog.Database; +import org.apache.doris.catalog.Env; +import org.apache.doris.catalog.HashDistributionInfo; +import org.apache.doris.catalog.KeysType; +import org.apache.doris.catalog.OlapTable; +import org.apache.doris.catalog.PartitionInfo; +import org.apache.doris.catalog.PrimitiveType; +import org.apache.doris.common.AnalysisException; +import org.apache.doris.common.jmockit.Deencapsulation; +import org.apache.doris.datasource.InternalCatalog; +import org.apache.doris.statistics.StatisticsTaskResult.TaskResult; +import org.apache.doris.statistics.StatsCategory.Category; +import org.apache.doris.statistics.StatsGranularity.Granularity; + +import org.junit.Assert; +import org.junit.Before; +import org.junit.Test; + +import java.util.Arrays; +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.concurrent.ConcurrentHashMap; + +public class StatisticsManagerTest { + private StatisticsManager statisticsManagerUnderTest; + + @Before + public void setUp() throws Exception { + Column col1 = new Column("c1", PrimitiveType.STRING); + Column col2 = new Column("c2", PrimitiveType.INT); + OlapTable tbl1 = new OlapTable(0L, "tbl1", Arrays.asList(col1, col2), KeysType.AGG_KEYS, + new PartitionInfo(), new HashDistributionInfo()); + OlapTable tbl2 = new OlapTable(1L, "tbl2", Arrays.asList(col1, col2), KeysType.DUP_KEYS, + new PartitionInfo(), new HashDistributionInfo()); + Database database = new Database(0L, "db"); + database.createTable(tbl1); + database.createTable(tbl2); + + InternalCatalog catalog = Env.getCurrentInternalCatalog(); + ConcurrentHashMap fullNameToDb = new ConcurrentHashMap<>(); + fullNameToDb.put("cluster:db", database); + Deencapsulation.setField(catalog, "fullNameToDb", fullNameToDb); + + ConcurrentHashMap idToDb = new ConcurrentHashMap<>(); + idToDb.put(0L, database); + Deencapsulation.setField(catalog, "idToDb", idToDb); + + statisticsManagerUnderTest = new StatisticsManager(); + } + + @Test + public void testUpdateStatistics() throws Exception { + // Setup + TaskResult taskResult = new TaskResult(); + taskResult.setDbId(0L); + taskResult.setTableId(0L); + taskResult.setCategory(Category.TABLE); + taskResult.setGranularity(Granularity.TABLE); + Map statsTypeToValue = new HashMap<>(); + statsTypeToValue.put(StatsType.ROW_COUNT, "1000"); + statsTypeToValue.put(StatsType.DATA_SIZE, "10240"); + taskResult.setStatsTypeToValue(statsTypeToValue); + + List statsTaskResults = Collections.singletonList( + new StatisticsTaskResult(Collections.singletonList(taskResult))); + + // Run the test + statisticsManagerUnderTest.updateStatistics(statsTaskResults); + Statistics statistics = statisticsManagerUnderTest.getStatistics(); + TableStats tableStats = statistics.getTableStats(0L); + + // Verify the results + long rowCount = tableStats.getRowCount(); + Assert.assertEquals(1000L, rowCount); + + long dataSize = tableStats.getDataSize(); + Assert.assertEquals(10240L, dataSize); + } + + @Test + public void testUpdateStatistics_ThrowsAnalysisException() { + // Setup + TaskResult taskResult = new TaskResult(); + taskResult.setDbId(0L); + taskResult.setTableId(1L); + taskResult.setPartitionName("partitionName"); + taskResult.setColumnName("columnName"); + taskResult.setCategory(Category.TABLE); + taskResult.setGranularity(Granularity.TABLE); + taskResult.setStatsTypeToValue(new HashMap<>()); + List statsTaskResults = Collections.singletonList( + new StatisticsTaskResult(Collections.singletonList(taskResult))); + + // Run the test + Assert.assertThrows(AnalysisException.class, + () -> statisticsManagerUnderTest.updateStatistics(statsTaskResults)); + } +} diff --git a/fe/fe-core/src/test/java/org/apache/doris/statistics/StatisticsTest.java b/fe/fe-core/src/test/java/org/apache/doris/statistics/StatisticsTest.java index a758fe0bc7..4a45f301bd 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/statistics/StatisticsTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/statistics/StatisticsTest.java @@ -47,7 +47,7 @@ public class StatisticsTest { long rowCount = statisticsUnderTest.getTableStats(0L).getRowCount(); // Verify the results - Assert.assertEquals(rowCount, 1000L); + Assert.assertEquals(1000L, rowCount); } @Test @@ -74,7 +74,7 @@ public class StatisticsTest { long rowCount = partitionStats.get("partitionName").getRowCount(); // Verify the results - Assert.assertEquals(rowCount, 1000L); + Assert.assertEquals(1000L, rowCount); } @Test @@ -101,7 +101,7 @@ public class StatisticsTest { long numNulls = columnStats.get("columnName").getNumNulls(); // Verify the results - Assert.assertEquals(numNulls, 1000L); + Assert.assertEquals(1000L, numNulls); } @Test @@ -131,7 +131,7 @@ public class StatisticsTest { long numNulls = columnStats.get("columnName").getNumNulls(); // Verify the results - Assert.assertEquals(numNulls, 1000L); + Assert.assertEquals(1000L, numNulls); } @Test @@ -146,15 +146,6 @@ public class StatisticsTest { 0L, "partitionName", "columnName", columnType, statsTypeToValue)); } - @Test - public void testGetNotNullTableStats() { - // Run the test - TableStats result = statisticsUnderTest.getNotNullTableStats(0L); - - // Verify the results - Assert.assertNotNull(result); - } - @Test public void testGetTableStats() throws Exception { // Setup @@ -166,7 +157,8 @@ public class StatisticsTest { TableStats result = statisticsUnderTest.getTableStats(0L); // Verify the results - Assert.assertNotNull(result); + long rowCount = result.getRowCount(); + Assert.assertEquals(1000, rowCount); } @Test @@ -176,16 +168,6 @@ public class StatisticsTest { () -> statisticsUnderTest.getTableStats(0L)); } - @Test - public void testGetNotNullPartitionStats() { - // Run the test - PartitionStats result = statisticsUnderTest - .getNotNullPartitionStats(0L, "partitionName"); - - // Verify the results - Assert.assertNotNull(result); - } - @Test public void testGetPartitionStats() throws Exception { // Setup @@ -197,7 +179,9 @@ public class StatisticsTest { Map result = statisticsUnderTest.getPartitionStats(0L); // Verify the results - Assert.assertNotNull(result); + PartitionStats partitionStats = result.get("partitionName"); + long rowCount = partitionStats.getRowCount(); + Assert.assertEquals(1000, rowCount); } @Test @@ -219,7 +203,9 @@ public class StatisticsTest { .getPartitionStats(0L, "partitionName"); // Verify the results - Assert.assertNotNull(result); + PartitionStats partitionStats = result.get("partitionName"); + long rowCount = partitionStats.getRowCount(); + Assert.assertEquals(1000, rowCount); } @Test @@ -241,7 +227,9 @@ public class StatisticsTest { Map result = statisticsUnderTest.getColumnStats(0L); // Verify the results - Assert.assertNotNull(result); + ColumnStats columnStats = result.get("columnName"); + long numNulls = columnStats.getNumNulls(); + Assert.assertEquals(1000, numNulls); } @Test @@ -265,7 +253,9 @@ public class StatisticsTest { .getColumnStats(0L, "partitionName"); // Verify the results - Assert.assertNotNull(result); + ColumnStats columnStats = result.get("columnName"); + long numNulls = columnStats.getNumNulls(); + Assert.assertEquals(1000, numNulls); } @Test diff --git a/fe/fe-core/src/test/java/org/apache/doris/statistics/TableStatsTest.java b/fe/fe-core/src/test/java/org/apache/doris/statistics/TableStatsTest.java new file mode 100644 index 0000000000..8106a9ead6 --- /dev/null +++ b/fe/fe-core/src/test/java/org/apache/doris/statistics/TableStatsTest.java @@ -0,0 +1,182 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.statistics; + +import org.apache.doris.catalog.PrimitiveType; +import org.apache.doris.catalog.Type; +import org.apache.doris.common.AnalysisException; + +import org.junit.Assert; +import org.junit.Before; +import org.junit.Test; + +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +public class TableStatsTest { + private TableStats tableStatsUnderTest; + + @Before + public void setUp() throws Exception { + tableStatsUnderTest = new TableStats(); + } + + @Test + public void testUpdateTableStats() throws Exception { + // Setup + Map statsTypeToValue = new HashMap<>(); + statsTypeToValue.put(StatsType.ROW_COUNT, "1000"); + statsTypeToValue.put(StatsType.DATA_SIZE, "10240"); + + // Run the test + tableStatsUnderTest.updateTableStats(statsTypeToValue); + + // Verify the results + long rowCount = tableStatsUnderTest.getRowCount(); + Assert.assertEquals(1000, rowCount); + + long dataSize = tableStatsUnderTest.getDataSize(); + Assert.assertEquals(10240, dataSize); + } + + @Test + public void testUpdateTableStats_ThrowsAnalysisException() { + // Setup + Map statsTypeToValue = new HashMap<>(); + statsTypeToValue.put(StatsType.AVG_SIZE, "8"); + statsTypeToValue.put(StatsType.ROW_COUNT, "abc"); + + // Run the test + Assert.assertThrows(AnalysisException.class, + () -> tableStatsUnderTest.updateTableStats(statsTypeToValue)); + } + + @Test + public void testUpdatePartitionStats() throws Exception { + // Setup + Map statsTypeToValue = new HashMap<>(); + statsTypeToValue.put(StatsType.ROW_COUNT, "1000"); + statsTypeToValue.put(StatsType.DATA_SIZE, "10240"); + + // Run the test + tableStatsUnderTest.updatePartitionStats("partitionName", statsTypeToValue); + PartitionStats partitionStats = tableStatsUnderTest.getNameToPartitionStats().get("partitionName"); + + // Verify the results + long rowCount = partitionStats.getRowCount(); + Assert.assertEquals(1000, rowCount); + + long dataSize = partitionStats.getDataSize(); + Assert.assertEquals(10240, dataSize); + } + + @Test + public void testUpdatePartitionStats_ThrowsAnalysisException() { + // Setup + Map statsTypeToValue = new HashMap<>(); + statsTypeToValue.put(StatsType.ROW_COUNT, "abc"); + + // Run the test + Assert.assertThrows(AnalysisException.class, () -> tableStatsUnderTest + .updatePartitionStats("partitionName", statsTypeToValue)); + } + + @Test + public void testUpdateColumnStats() throws Exception { + // Setup + Type columnType = Type.fromPrimitiveType(PrimitiveType.BIGINT); + Map statsTypeToValue = new HashMap<>(); + statsTypeToValue.put(StatsType.NDV, "1"); + statsTypeToValue.put(StatsType.AVG_SIZE, "8"); + statsTypeToValue.put(StatsType.MAX_SIZE, "8"); + statsTypeToValue.put(StatsType.NUM_NULLS, "2"); + statsTypeToValue.put(StatsType.MIN_VALUE, "0"); + statsTypeToValue.put(StatsType.MAX_VALUE, "1000"); + + // Run the test + tableStatsUnderTest.updateColumnStats("columnName", columnType, statsTypeToValue); + ColumnStats columnStats = tableStatsUnderTest.getColumnStats("columnName"); + + // Verify the results + long ndv = columnStats.getNdv(); + Assert.assertEquals(1L, ndv); + + float avgSize = columnStats.getAvgSize(); + Assert.assertEquals(8.0f, avgSize, 0.0001); + + long maxSize = columnStats.getMaxSize(); + Assert.assertEquals(8L, maxSize); + + long maxValue = columnStats.getMaxValue().getLongValue(); + Assert.assertEquals(1000, maxValue); + + long minValue = columnStats.getMinValue().getLongValue(); + Assert.assertEquals(0L, minValue); + + long numNulls = columnStats.getNumNulls(); + Assert.assertEquals(2, numNulls); + } + + @Test + public void testUpdateColumnStats_ThrowsAnalysisException() { + // Setup + Type columnType = Type.fromPrimitiveType(PrimitiveType.INVALID_TYPE); + Map statsTypeToValue = new HashMap<>(); + statsTypeToValue.put(StatsType.AVG_SIZE, "abc"); + // Run the test + Assert.assertThrows(AnalysisException.class, () -> tableStatsUnderTest + .updateColumnStats("columnName", columnType, statsTypeToValue)); + } + + @Test + public void testGetShowInfo() throws AnalysisException { + // Setup + Map statsTypeToValue = new HashMap<>(); + statsTypeToValue.put(StatsType.ROW_COUNT, "1000"); + statsTypeToValue.put(StatsType.DATA_SIZE, "10240"); + + tableStatsUnderTest.updateTableStats(statsTypeToValue); + String[] expectedInfo = {"1000", "10240"}; + + // Run the test + List showInfo = tableStatsUnderTest.getShowInfo(); + String[] result = showInfo.toArray(new String[0]); + + // Verify the results + Assert.assertArrayEquals(expectedInfo, result); + } + + @Test + public void testGetShowInfoWithPartitionName() throws AnalysisException { + // Setup + Map statsTypeToValue = new HashMap<>(); + statsTypeToValue.put(StatsType.ROW_COUNT, "1000"); + statsTypeToValue.put(StatsType.DATA_SIZE, "10240"); + + tableStatsUnderTest.updatePartitionStats("partitionName", statsTypeToValue); + String[] expectedInfo = {"1000", "10240"}; + + // Run the test + List showInfo = tableStatsUnderTest.getShowInfo("partitionName"); + String[] result = showInfo.toArray(new String[0]); + + // Verify the results + Assert.assertArrayEquals(expectedInfo, result); + } +} diff --git a/fe/fe-core/src/test/java/org/apache/doris/statistics/util/InternalQueryBufferTest.java b/fe/fe-core/src/test/java/org/apache/doris/statistics/util/InternalQueryBufferTest.java index c1e1d889b4..3e27acca6b 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/statistics/util/InternalQueryBufferTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/statistics/util/InternalQueryBufferTest.java @@ -112,9 +112,9 @@ public class InternalQueryBufferTest { Assert.assertEquals(123, result2); float result3 = internalQueryBuffer.readFloat(); - Assert.assertEquals(0.1, result3, 1); + Assert.assertEquals(0.1, result3, 0.0001); double result4 = internalQueryBuffer.readDouble(); - Assert.assertEquals(18.2322, result4, 4); + Assert.assertEquals(18.2322, result4, 0.0001); } } diff --git a/fe/fe-core/src/test/java/org/apache/doris/statistics/util/InternalQueryResultTest.java b/fe/fe-core/src/test/java/org/apache/doris/statistics/util/InternalQueryResultTest.java index 8e6332363d..8d2518ae40 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/statistics/util/InternalQueryResultTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/statistics/util/InternalQueryResultTest.java @@ -94,8 +94,8 @@ public class InternalQueryResultTest { public void testGetColumnValueWithIndex() throws Exception { Assert.assertEquals("s1", resultRow.getColumnValue(0).toString()); Assert.assertEquals(1000, Integer.parseInt((String) resultRow.getColumnValue(1))); - Assert.assertEquals(0.1f, Float.parseFloat((String) resultRow.getColumnValue(2)), 1); - Assert.assertEquals(0.0001, Double.parseDouble((String) resultRow.getColumnValue(3)), 4); + Assert.assertEquals(0.1f, Float.parseFloat((String) resultRow.getColumnValue(2)), 0.0001); + Assert.assertEquals(0.0001, Double.parseDouble((String) resultRow.getColumnValue(3)), 0.0001); Assert.assertEquals(1000000, Long.parseLong((String) resultRow.getColumnValue(4))); } @@ -103,8 +103,8 @@ public class InternalQueryResultTest { public void testGetColumnValueWithName() throws Exception { Assert.assertEquals("s1", resultRow.getColumnValue(0).toString()); Assert.assertEquals(1000, Integer.parseInt((String) resultRow.getColumnValue(1))); - Assert.assertEquals(0.1f, Float.parseFloat((String) resultRow.getColumnValue(2)), 1); - Assert.assertEquals(0.0001, Double.parseDouble((String) resultRow.getColumnValue(3)), 4); + Assert.assertEquals(0.1f, Float.parseFloat((String) resultRow.getColumnValue(2)), 0.0001); + Assert.assertEquals(0.0001, Double.parseDouble((String) resultRow.getColumnValue(3)), 0.0001); Assert.assertEquals(1000000, Long.parseLong((String) resultRow.getColumnValue(4))); } @@ -112,8 +112,8 @@ public class InternalQueryResultTest { public void testGetTypeValue() throws Exception { Assert.assertEquals("s1", resultRow.getString(0)); Assert.assertEquals(1000, resultRow.getInt(1)); - Assert.assertEquals(0.1f, resultRow.getFloat(2), 1); - Assert.assertEquals(0.0001, resultRow.getDouble(3), 4); + Assert.assertEquals(0.1f, resultRow.getFloat(2), 0.0001); + Assert.assertEquals(0.0001, resultRow.getDouble(3), 0.0001); Assert.assertEquals(1000000, resultRow.getLong(4)); } }