From 6d2070c59d552c51186f091445c44e5f310ea628 Mon Sep 17 00:00:00 2001 From: AKIRA <33112463+Kikyou1997@users.noreply.github.com> Date: Thu, 11 May 2023 14:59:37 +0900 Subject: [PATCH] [enhancement](stats) Make stats cache item size configurable (#19205) --- .../main/java/org/apache/doris/common/Config.java | 14 +++++++++++++- .../doris/statistics/StatisticConstants.java | 5 ----- .../apache/doris/statistics/StatisticsCache.java | 5 +++-- .../doris/statistics/StatisticsRepository.java | 3 ++- 4 files changed, 18 insertions(+), 9 deletions(-) diff --git a/fe/fe-common/src/main/java/org/apache/doris/common/Config.java b/fe/fe-common/src/main/java/org/apache/doris/common/Config.java index 33141597ec..b52e4a0822 100644 --- a/fe/fe-common/src/main/java/org/apache/doris/common/Config.java +++ b/fe/fe-common/src/main/java/org/apache/doris/common/Config.java @@ -1948,5 +1948,17 @@ public class Config extends ConfigBase { */ @ConfField(mutable = false) public static boolean enable_delete_existing_files = false; + /* + * The actual memory size taken by stats cache highly depends on characteristics of data, since on the different + * dataset and scenarios the max/min literal's average size and buckets count of histogram would be highly + * different. Besides, JVM version etc. also has influence on it, though not much as data itself. + * Here I would give the mem size taken by stats cache with 10_0000 items.Each item's avg length of max/min literal + * is 32, and the avg column name length is 16, and each column has a histogram with 128 buckets + * In this case, stats cache takes total 911.954833984MiB mem. + * If without histogram, stats cache takes total 61.2777404785MiB mem. + * It's strongly discourage analyzing a column with a very large STRING value in the column, since it would cause + * FE OOM. + */ + @ConfField + public static long stats_cache_size = 10_0000; } - diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticConstants.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticConstants.java index ff091955b5..8b358bf74d 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticConstants.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticConstants.java @@ -50,11 +50,6 @@ public class StatisticConstants { */ public static final int STATISTIC_CLEAN_INTERVAL_IN_HOURS = 24 * 2; - /** - * The max cached item in `StatisticsCache`. - */ - public static final long STATISTICS_RECORDS_CACHE_SIZE = 100000; - /** * If analysis job execution time exceeds this time, it would be cancelled. */ diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsCache.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsCache.java index 4a2bb6e872..f92ec87841 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsCache.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsCache.java @@ -17,6 +17,7 @@ package org.apache.doris.statistics; +import org.apache.doris.common.Config; import org.apache.doris.common.ThreadPoolManager; import org.apache.doris.qe.ConnectContext; import org.apache.doris.statistics.util.InternalQueryResult.ResultRow; @@ -53,7 +54,7 @@ public class StatisticsCache { private final AsyncLoadingCache> columnStatisticsCache = Caffeine.newBuilder() - .maximumSize(StatisticConstants.STATISTICS_RECORDS_CACHE_SIZE) + .maximumSize(Config.stats_cache_size) .expireAfterAccess(Duration.ofHours(StatisticConstants.STATISTICS_CACHE_VALID_DURATION_IN_HOURS)) .refreshAfterWrite(Duration.ofHours(StatisticConstants.STATISTICS_CACHE_REFRESH_INTERVAL)) .executor(threadPool) @@ -61,7 +62,7 @@ public class StatisticsCache { private final AsyncLoadingCache> histogramCache = Caffeine.newBuilder() - .maximumSize(StatisticConstants.STATISTICS_RECORDS_CACHE_SIZE) + .maximumSize(Config.stats_cache_size) .expireAfterAccess(Duration.ofHours(StatisticConstants.STATISTICS_CACHE_VALID_DURATION_IN_HOURS)) .refreshAfterWrite(Duration.ofHours(StatisticConstants.STATISTICS_CACHE_REFRESH_INTERVAL)) .executor(threadPool) diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsRepository.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsRepository.java index 09ce8e6948..428313596e 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsRepository.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsRepository.java @@ -23,6 +23,7 @@ import org.apache.doris.catalog.Column; import org.apache.doris.catalog.Env; import org.apache.doris.catalog.Partition; import org.apache.doris.common.AnalysisException; +import org.apache.doris.common.Config; import org.apache.doris.common.DdlException; import org.apache.doris.common.FeConstants; import org.apache.doris.statistics.util.DBObjects; @@ -104,7 +105,7 @@ public class StatisticsRepository { + FeConstants.INTERNAL_DB_NAME + "." + StatisticConstants.STATISTIC_TBL_NAME + " WHERE part_id is NULL " + " ORDER BY update_time DESC LIMIT " - + StatisticConstants.STATISTICS_RECORDS_CACHE_SIZE; + + Config.stats_cache_size; private static final String FETCH_STATS_FULL_NAME = "SELECT id, catalog_id, db_id, tbl_id, idx_id, col_id, part_id FROM "