[enhancement](stats) Make stats cache item size configurable (#19205)

This commit is contained in:
AKIRA
2023-05-11 14:59:37 +09:00
committed by GitHub
parent dc497e11bb
commit 6d2070c59d
4 changed files with 18 additions and 9 deletions

View File

@ -1948,5 +1948,17 @@ public class Config extends ConfigBase {
*/
@ConfField(mutable = false)
public static boolean enable_delete_existing_files = false;
/*
* The actual memory size taken by stats cache highly depends on characteristics of data, since on the different
* dataset and scenarios the max/min literal's average size and buckets count of histogram would be highly
* different. Besides, JVM version etc. also has influence on it, though not much as data itself.
* Here I would give the mem size taken by stats cache with 10_0000 items.Each item's avg length of max/min literal
* is 32, and the avg column name length is 16, and each column has a histogram with 128 buckets
* In this case, stats cache takes total 911.954833984MiB mem.
* If without histogram, stats cache takes total 61.2777404785MiB mem.
* It's strongly discourage analyzing a column with a very large STRING value in the column, since it would cause
* FE OOM.
*/
@ConfField
public static long stats_cache_size = 10_0000;
}

View File

@ -50,11 +50,6 @@ public class StatisticConstants {
*/
public static final int STATISTIC_CLEAN_INTERVAL_IN_HOURS = 24 * 2;
/**
* The max cached item in `StatisticsCache`.
*/
public static final long STATISTICS_RECORDS_CACHE_SIZE = 100000;
/**
* If analysis job execution time exceeds this time, it would be cancelled.
*/

View File

@ -17,6 +17,7 @@
package org.apache.doris.statistics;
import org.apache.doris.common.Config;
import org.apache.doris.common.ThreadPoolManager;
import org.apache.doris.qe.ConnectContext;
import org.apache.doris.statistics.util.InternalQueryResult.ResultRow;
@ -53,7 +54,7 @@ public class StatisticsCache {
private final AsyncLoadingCache<StatisticsCacheKey, Optional<ColumnStatistic>> columnStatisticsCache =
Caffeine.newBuilder()
.maximumSize(StatisticConstants.STATISTICS_RECORDS_CACHE_SIZE)
.maximumSize(Config.stats_cache_size)
.expireAfterAccess(Duration.ofHours(StatisticConstants.STATISTICS_CACHE_VALID_DURATION_IN_HOURS))
.refreshAfterWrite(Duration.ofHours(StatisticConstants.STATISTICS_CACHE_REFRESH_INTERVAL))
.executor(threadPool)
@ -61,7 +62,7 @@ public class StatisticsCache {
private final AsyncLoadingCache<StatisticsCacheKey, Optional<Histogram>> histogramCache =
Caffeine.newBuilder()
.maximumSize(StatisticConstants.STATISTICS_RECORDS_CACHE_SIZE)
.maximumSize(Config.stats_cache_size)
.expireAfterAccess(Duration.ofHours(StatisticConstants.STATISTICS_CACHE_VALID_DURATION_IN_HOURS))
.refreshAfterWrite(Duration.ofHours(StatisticConstants.STATISTICS_CACHE_REFRESH_INTERVAL))
.executor(threadPool)

View File

@ -23,6 +23,7 @@ import org.apache.doris.catalog.Column;
import org.apache.doris.catalog.Env;
import org.apache.doris.catalog.Partition;
import org.apache.doris.common.AnalysisException;
import org.apache.doris.common.Config;
import org.apache.doris.common.DdlException;
import org.apache.doris.common.FeConstants;
import org.apache.doris.statistics.util.DBObjects;
@ -104,7 +105,7 @@ public class StatisticsRepository {
+ FeConstants.INTERNAL_DB_NAME + "." + StatisticConstants.STATISTIC_TBL_NAME
+ " WHERE part_id is NULL "
+ " ORDER BY update_time DESC LIMIT "
+ StatisticConstants.STATISTICS_RECORDS_CACHE_SIZE;
+ Config.stats_cache_size;
private static final String FETCH_STATS_FULL_NAME =
"SELECT id, catalog_id, db_id, tbl_id, idx_id, col_id, part_id FROM "