diff --git a/docs/en/docs/admin-manual/config/fe-config.md b/docs/en/docs/admin-manual/config/fe-config.md index a1ed75684c..6f45d9b1bb 100644 --- a/docs/en/docs/admin-manual/config/fe-config.md +++ b/docs/en/docs/admin-manual/config/fe-config.md @@ -827,7 +827,7 @@ IsMutable:true MasterOnly:false -If this switch is turned on, the SQL query result set will be cached. If the interval between the last visit version time in all partitions of all tables in the query is greater than cache_last_version_interval_second, and the result set is less than cache_result_max_row_count, the result set will be cached, and the next same SQL will hit the cache +If this switch is turned on, the SQL query result set will be cached. If the interval between the last visit version time in all partitions of all tables in the query is greater than cache_last_version_interval_second, and the result set is less than cache_result_max_row_count, and the data size is less than cache_result_max_data_size, the result set will be cached, and the next same SQL will hit the cache If set to true, fe will enable sql result caching. This option is suitable for offline data update scenarios @@ -854,7 +854,17 @@ IsMutable:true MasterOnly:false -In order to avoid occupying too much memory, the maximum number of rows that can be cached is 2000 by default. If this threshold is exceeded, the cache cannot be set +In order to avoid occupying too much memory, the maximum number of rows that can be cached is 3000 by default. If this threshold is exceeded, the cache cannot be set + +#### `cache_result_max_data_size` + +Default: 31457280 + +IsMutable: true + +MasterOnly: false + +In order to avoid occupying too much memory, the maximum data size of rows that can be cached is 10MB by default. If this threshold is exceeded, the cache cannot be set #### `cache_last_version_interval_second` diff --git a/docs/en/docs/advanced/cache/partition-cache.md b/docs/en/docs/advanced/cache/partition-cache.md index 25e5ddd4de..ccf9675c2c 100644 --- a/docs/en/docs/advanced/cache/partition-cache.md +++ b/docs/en/docs/advanced/cache/partition-cache.md @@ -155,7 +155,15 @@ Partition fields can also be other fields, but need to ensure that only a small ## How to Use -### Enable SQL Cache +> NOTE: +> +> In the following scenarios, the cache result is wrong +> 1. Use session variable: default_order_by_limit, sql_select_limit +> 2. Use var = cur_date(), var = random() functions that generate random values +> +> There may be other cases where the cache result is wrong, so it is recommended to enable it only in controllable scenarios such as reports. + +### Enable SQLCache Make sure cache_enable_sql_mode=true in fe.conf (default is true) @@ -228,14 +236,14 @@ Other monitoring: You can view the CPU and memory indicators of the BE node, the ### Optimization Parameters -The configuration item cache_result_max_row_count of FE, the maximum number of rows in the cache for the query result set, can be adjusted according to the actual situation, but it is recommended not to set it too large to avoid taking up too much memory, and the result set exceeding this size will not be cached. +The configuration item cache_result_max_row_count of FE, the maximum number of rows in the cache for the query result set, FE configuration item cache_result_max_data_size, the maximum data size of the query result set put into the cache, can be adjusted according to the actual situation, but it is recommended not to set it too large to avoid taking up too much memory, and the result set exceeding this size will not be cached. ```text vim fe/conf/fe.conf cache_result_max_row_count=3000 ``` -The maximum number of partitions in BE cache_max_partition_count refers to the maximum number of partitions corresponding to each SQL. If it is partitioned by date, it can cache data for more than 2 years. If you want to keep the cache for a longer time, please set this parameter to a larger value and modify it at the same time. Parameter of cache_result_max_row_count. +The maximum number of partitions in BE cache_max_partition_count refers to the maximum number of partitions corresponding to each SQL. If it is partitioned by date, it can cache data for more than 2 years. If you want to keep the cache for a longer time, please set this parameter to a larger value and modify it at the same time. Parameter of cache_result_max_row_count and cache_result_max_data_size. ```text vim be/conf/be.conf diff --git a/docs/en/docs/query-acceleration/nereids.md b/docs/en/docs/query-acceleration/nereids.md index 00dc8b8ed6..6e1724fc8c 100644 --- a/docs/en/docs/query-acceleration/nereids.md +++ b/docs/en/docs/query-acceleration/nereids.md @@ -83,4 +83,4 @@ Recommand execute analyze on table before query on it to get the benefits of cbo ### known issues -- Cannot use query cache and partition cache to accelarate query +- Cannot use partition cache to accelarate query diff --git a/docs/zh-CN/docs/admin-manual/config/fe-config.md b/docs/zh-CN/docs/admin-manual/config/fe-config.md index 8eb3436939..7c33cdfd96 100644 --- a/docs/zh-CN/docs/admin-manual/config/fe-config.md +++ b/docs/zh-CN/docs/admin-manual/config/fe-config.md @@ -828,7 +828,7 @@ trace导出到 collector: `http://127.0.0.1:4318/v1/traces` 是否为 Master FE 节点独有的配置项:false -如果设置为 true,SQL 查询结果集将被缓存。如果查询中所有表的所有分区最后一次访问版本时间的间隔大于cache_last_version_interval_second,且结果集小于cache_result_max_row_count,则结果集会被缓存,下一条相同的SQL会命中缓存 +如果设置为 true,SQL 查询结果集将被缓存。如果查询中所有表的所有分区最后一次访问版本时间的间隔大于cache_last_version_interval_second,且结果集行数小于cache_result_max_row_count,且数据大小小于cache_result_max_data_size,则结果集会被缓存,下一条相同的SQL会命中缓存 如果设置为 true,FE 会启用 sql 结果缓存,该选项适用于离线数据更新场景 @@ -857,6 +857,16 @@ trace导出到 collector: `http://127.0.0.1:4318/v1/traces` 设置可以缓存的最大行数,详细的原理可以参考官方文档:操作手册->分区缓存 +#### `cache_result_max_data_size` + +默认值:31457280 + +是否可以动态配置:true + +是否为 Master FE 节点独有的配置项:false + +设置可以缓存的最大数据大小,单位Bytes + #### `cache_last_version_interval_second` 默认值:900 diff --git a/docs/zh-CN/docs/advanced/cache/partition-cache.md b/docs/zh-CN/docs/advanced/cache/partition-cache.md index 1871b312af..778c27a37a 100644 --- a/docs/zh-CN/docs/advanced/cache/partition-cache.md +++ b/docs/zh-CN/docs/advanced/cache/partition-cache.md @@ -228,14 +228,14 @@ Partition平均数据大小 = cache_memory_total / cache_partition_total ### 优化参数 -FE的配置项cache_result_max_row_count,查询结果集放入缓存的最大行数,可以根据实际情况调整,但建议不要设置过大,避免过多占用内存,超过这个大小的结果集不会被缓存。 +FE的配置项cache_result_max_row_count,查询结果集放入缓存的最大行数,FE的配置项cache_result_max_data_size,查询结果集放入缓存的最大数据大小,可以根据实际情况调整,但建议不要设置过大,避免过多占用内存,超过这个大小的结果集不会被缓存。 ```text vim fe/conf/fe.conf cache_result_max_row_count=3000 ``` -BE最大分区数量cache_max_partition_count,指每个SQL对应的最大分区数,如果是按日期分区,能缓存2年多的数据,假如想保留更长时间的缓存,请把这个参数设置得更大,同时修改cache_result_max_row_count的参数。 +BE最大分区数量cache_max_partition_count,指每个SQL对应的最大分区数,如果是按日期分区,能缓存2年多的数据,假如想保留更长时间的缓存,请把这个参数设置得更大,同时修改cache_result_max_row_count和cache_result_max_data_size的参数。 ```text vim be/conf/be.conf diff --git a/docs/zh-CN/docs/query-acceleration/nereids.md b/docs/zh-CN/docs/query-acceleration/nereids.md index 717013605c..1f0489fa23 100644 --- a/docs/zh-CN/docs/query-acceleration/nereids.md +++ b/docs/zh-CN/docs/query-acceleration/nereids.md @@ -83,4 +83,4 @@ SET enable_fallback_to_original_planner=true; ### 已知问题 -- 不支持命中 Query Cache 和 Partition Cache +- 不支持命中 Partition Cache diff --git a/fe/fe-common/src/main/java/org/apache/doris/common/Config.java b/fe/fe-common/src/main/java/org/apache/doris/common/Config.java index 08e6f773d1..f433244cbb 100644 --- a/fe/fe-common/src/main/java/org/apache/doris/common/Config.java +++ b/fe/fe-common/src/main/java/org/apache/doris/common/Config.java @@ -1172,9 +1172,17 @@ public class Config extends ConfigBase { /** * Set the maximum number of rows that can be cached */ - @ConfField(mutable = true, masterOnly = false) + @ConfField(mutable = true, masterOnly = false, description = {"SQL/Partition Cache可以缓存的最大行数。", + "Maximum number of rows that can be cached in SQL/Partition Cache, is 3000 by default."}) public static int cache_result_max_row_count = 3000; + /** + * Set the maximum data size that can be cached + */ + @ConfField(mutable = true, masterOnly = false, description = {"SQL/Partition Cache可以缓存的最大数据大小。", + "Maximum data size of rows that can be cached in SQL/Partition Cache, is 3000 by default."}) + public static int cache_result_max_data_size = 31457280; // 30M + /** * Used to limit element num of InPredicate in delete statement. */ diff --git a/fe/fe-core/src/main/java/org/apache/doris/qe/cache/Cache.java b/fe/fe-core/src/main/java/org/apache/doris/qe/cache/Cache.java index 797e252621..31388c99c9 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/qe/cache/Cache.java +++ b/fe/fe-core/src/main/java/org/apache/doris/qe/cache/Cache.java @@ -85,9 +85,13 @@ public abstract class Cache { return false; } if (rowBatchBuilder.getRowSize() > Config.cache_result_max_row_count) { - LOG.info("can not be cached. rowbatch size {} is more than {}", rowBatchBuilder.getRowSize(), + LOG.debug("can not be cached. rowbatch size {} is more than {}", rowBatchBuilder.getRowSize(), Config.cache_result_max_row_count); return false; + } else if (rowBatchBuilder.getDataSize() > Config.cache_result_max_data_size) { + LOG.debug("can not be cached. rowbatch data size {} is more than {}", rowBatchBuilder.getDataSize(), + Config.cache_result_max_data_size); + return false; } else { return true; } diff --git a/fe/fe-core/src/main/java/org/apache/doris/qe/cache/RowBatchBuilder.java b/fe/fe-core/src/main/java/org/apache/doris/qe/cache/RowBatchBuilder.java index 6d5a635a30..93f06455f0 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/qe/cache/RowBatchBuilder.java +++ b/fe/fe-core/src/main/java/org/apache/doris/qe/cache/RowBatchBuilder.java @@ -55,6 +55,10 @@ public class RowBatchBuilder { return rowSize; } + public int getDataSize() { + return dataSize; + } + public RowBatchBuilder(CacheAnalyzer.CacheMode model) { cacheMode = model; keyIndex = 0;