[enhancement](stats) Make stats cache item size configurable (#19205)
This commit is contained in:
@ -1948,5 +1948,17 @@ public class Config extends ConfigBase {
|
||||
*/
|
||||
@ConfField(mutable = false)
|
||||
public static boolean enable_delete_existing_files = false;
|
||||
/*
|
||||
* The actual memory size taken by stats cache highly depends on characteristics of data, since on the different
|
||||
* dataset and scenarios the max/min literal's average size and buckets count of histogram would be highly
|
||||
* different. Besides, JVM version etc. also has influence on it, though not much as data itself.
|
||||
* Here I would give the mem size taken by stats cache with 10_0000 items.Each item's avg length of max/min literal
|
||||
* is 32, and the avg column name length is 16, and each column has a histogram with 128 buckets
|
||||
* In this case, stats cache takes total 911.954833984MiB mem.
|
||||
* If without histogram, stats cache takes total 61.2777404785MiB mem.
|
||||
* It's strongly discourage analyzing a column with a very large STRING value in the column, since it would cause
|
||||
* FE OOM.
|
||||
*/
|
||||
@ConfField
|
||||
public static long stats_cache_size = 10_0000;
|
||||
}
|
||||
|
||||
|
||||
@ -50,11 +50,6 @@ public class StatisticConstants {
|
||||
*/
|
||||
public static final int STATISTIC_CLEAN_INTERVAL_IN_HOURS = 24 * 2;
|
||||
|
||||
/**
|
||||
* The max cached item in `StatisticsCache`.
|
||||
*/
|
||||
public static final long STATISTICS_RECORDS_CACHE_SIZE = 100000;
|
||||
|
||||
/**
|
||||
* If analysis job execution time exceeds this time, it would be cancelled.
|
||||
*/
|
||||
|
||||
@ -17,6 +17,7 @@
|
||||
|
||||
package org.apache.doris.statistics;
|
||||
|
||||
import org.apache.doris.common.Config;
|
||||
import org.apache.doris.common.ThreadPoolManager;
|
||||
import org.apache.doris.qe.ConnectContext;
|
||||
import org.apache.doris.statistics.util.InternalQueryResult.ResultRow;
|
||||
@ -53,7 +54,7 @@ public class StatisticsCache {
|
||||
|
||||
private final AsyncLoadingCache<StatisticsCacheKey, Optional<ColumnStatistic>> columnStatisticsCache =
|
||||
Caffeine.newBuilder()
|
||||
.maximumSize(StatisticConstants.STATISTICS_RECORDS_CACHE_SIZE)
|
||||
.maximumSize(Config.stats_cache_size)
|
||||
.expireAfterAccess(Duration.ofHours(StatisticConstants.STATISTICS_CACHE_VALID_DURATION_IN_HOURS))
|
||||
.refreshAfterWrite(Duration.ofHours(StatisticConstants.STATISTICS_CACHE_REFRESH_INTERVAL))
|
||||
.executor(threadPool)
|
||||
@ -61,7 +62,7 @@ public class StatisticsCache {
|
||||
|
||||
private final AsyncLoadingCache<StatisticsCacheKey, Optional<Histogram>> histogramCache =
|
||||
Caffeine.newBuilder()
|
||||
.maximumSize(StatisticConstants.STATISTICS_RECORDS_CACHE_SIZE)
|
||||
.maximumSize(Config.stats_cache_size)
|
||||
.expireAfterAccess(Duration.ofHours(StatisticConstants.STATISTICS_CACHE_VALID_DURATION_IN_HOURS))
|
||||
.refreshAfterWrite(Duration.ofHours(StatisticConstants.STATISTICS_CACHE_REFRESH_INTERVAL))
|
||||
.executor(threadPool)
|
||||
|
||||
@ -23,6 +23,7 @@ import org.apache.doris.catalog.Column;
|
||||
import org.apache.doris.catalog.Env;
|
||||
import org.apache.doris.catalog.Partition;
|
||||
import org.apache.doris.common.AnalysisException;
|
||||
import org.apache.doris.common.Config;
|
||||
import org.apache.doris.common.DdlException;
|
||||
import org.apache.doris.common.FeConstants;
|
||||
import org.apache.doris.statistics.util.DBObjects;
|
||||
@ -104,7 +105,7 @@ public class StatisticsRepository {
|
||||
+ FeConstants.INTERNAL_DB_NAME + "." + StatisticConstants.STATISTIC_TBL_NAME
|
||||
+ " WHERE part_id is NULL "
|
||||
+ " ORDER BY update_time DESC LIMIT "
|
||||
+ StatisticConstants.STATISTICS_RECORDS_CACHE_SIZE;
|
||||
+ Config.stats_cache_size;
|
||||
|
||||
private static final String FETCH_STATS_FULL_NAME =
|
||||
"SELECT id, catalog_id, db_id, tbl_id, idx_id, col_id, part_id FROM "
|
||||
|
||||
Reference in New Issue
Block a user