From ddaa645a4fe4d72f4eacc7b4e2423e6f4c9220f1 Mon Sep 17 00:00:00 2001 From: Jibing-Li <64681310+Jibing-Li@users.noreply.github.com> Date: Mon, 8 Jan 2024 13:49:37 +0800 Subject: [PATCH] [improvement](statistics) Force to use zonemap for collecting string type min max. (#29631) Force to use zonemap for collecting string type min max. String type is not using zonemap for min max, because zonemap value at BE side is truncated at 512 bytes which may cause the value not accurate. But it's OK for statisitcs min max, and this could also avoid scan whole table while sampling. --- .../implementation/AggregateStrategies.java | 9 ++++-- .../org/apache/doris/qe/SessionVariable.java | 11 +++++++ .../doris/statistics/util/StatisticsUtil.java | 1 + .../suites/statistics/analyze_stats.groovy | 32 +++++++++++++++---- 4 files changed, 44 insertions(+), 9 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/implementation/AggregateStrategies.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/implementation/AggregateStrategies.java index b4fce67beb..a0eb011ba9 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/implementation/AggregateStrategies.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/implementation/AggregateStrategies.java @@ -610,10 +610,10 @@ public class AggregateStrategies implements ImplementationRuleFactory { if (mergeOp == PushDownAggOp.MIN_MAX || mergeOp == PushDownAggOp.MIX) { PrimitiveType colType = column.getType().getPrimitiveType(); if (colType.isComplexType() || colType.isHllType() || colType.isBitmapType() - || colType == PrimitiveType.STRING) { + || (colType == PrimitiveType.STRING && !enablePushDownStringMinMax())) { return canNotPush; } - if (colType.isCharFamily() && column.getType().getLength() > 512) { + if (colType.isCharFamily() && column.getType().getLength() > 512 && !enablePushDownStringMinMax()) { return canNotPush; } } @@ -665,6 +665,11 @@ public class AggregateStrategies implements ImplementationRuleFactory { } } + private boolean enablePushDownStringMinMax() { + ConnectContext connectContext = ConnectContext.get(); + return connectContext != null && connectContext.getSessionVariable().isEnablePushDownStringMinMax(); + } + /** * sql: select count(*) from tbl group by id *

diff --git a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java index 6f078746d4..be6e13e399 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java +++ b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java @@ -482,6 +482,8 @@ public class SessionVariable implements Serializable, Writable { public static final String ENABLE_PUSHDOWN_MINMAX_ON_UNIQUE = "enable_pushdown_minmax_on_unique"; + public static final String ENABLE_PUSHDOWN_STRING_MINMAX = "enable_pushdown_string_minmax"; + // When set use fix replica = true, the fixed replica maybe bad, try to use the health one if // this session variable is set to true. public static final String FALLBACK_OTHER_REPLICA_WHEN_FIXED_CORRUPT = "fallback_other_replica_when_fixed_corrupt"; @@ -1229,6 +1231,11 @@ public class SessionVariable implements Serializable, Writable { "是否启用pushdown minmax on unique table。", "Set whether to pushdown minmax on unique table."}) public boolean enablePushDownMinMaxOnUnique = false; + // Whether enable push down string type minmax to scan node. + @VariableMgr.VarAttr(name = ENABLE_PUSHDOWN_STRING_MINMAX, needForward = true, description = { + "是否启用string类型min max下推。", "Set whether to enable push down string type minmax."}) + public boolean enablePushDownStringMinMax = false; + // Whether drop table when create table as select insert data appear error. @VariableMgr.VarAttr(name = DROP_TABLE_IF_CTAS_FAILED, needForward = true) public boolean dropTableIfCtasFailed = true; @@ -2474,6 +2481,10 @@ public class SessionVariable implements Serializable, Writable { this.enablePushDownMinMaxOnUnique = enablePushDownMinMaxOnUnique; } + public boolean isEnablePushDownStringMinMax() { + return enablePushDownStringMinMax; + } + /** * Nereids only support vectorized engine. * diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/util/StatisticsUtil.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/util/StatisticsUtil.java index d6c171161f..5c8aec3fbf 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/util/StatisticsUtil.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/util/StatisticsUtil.java @@ -193,6 +193,7 @@ public class StatisticsUtil { sessionVariable.enableFileCache = false; sessionVariable.forbidUnknownColStats = false; sessionVariable.enablePushDownMinMaxOnUnique = true; + sessionVariable.enablePushDownStringMinMax = true; connectContext.setEnv(Env.getCurrentEnv()); connectContext.setDatabase(FeConstants.INTERNAL_DB_NAME); connectContext.setQualifiedUser(UserIdentity.ROOT.getQualifiedUser()); diff --git a/regression-test/suites/statistics/analyze_stats.groovy b/regression-test/suites/statistics/analyze_stats.groovy index 67c5705b62..1559b5350b 100644 --- a/regression-test/suites/statistics/analyze_stats.groovy +++ b/regression-test/suites/statistics/analyze_stats.groovy @@ -121,8 +121,6 @@ suite("test_analyze") { SET forbid_unknown_col_stats=true; """ - Thread.sleep(1000 * 60) - sql """ SELECT * FROM ${tbl}; """ @@ -2626,15 +2624,35 @@ PARTITION `p599` VALUES IN (599) ); """ sql """insert into agg_table_test values (1,'name1'), (2, 'name2')""" - Thread.sleep(1000 * 90) + Thread.sleep(1000 * 60) sql """analyze table agg_table_test with sample rows 100 with sync""" def agg_result = sql """show column stats agg_table_test (name)""" assertEquals(agg_result[0][6], "N/A") assertEquals(agg_result[0][7], "N/A") - agg_result = sql """show column stats agg_table_test (id)""" - assertEquals(agg_result[0][6], "1") - assertEquals(agg_result[0][7], "2") - sql """DROP DATABASE IF EXISTS AggTableTest""" + + // Test sample string type min max + sql """ + CREATE TABLE `string_min_max` ( + `id` BIGINT NOT NULL, + `name` string NULL + ) ENGINE=OLAP + DUPLICATE KEY(`id`) + COMMENT 'OLAP' + DISTRIBUTED BY HASH(`id`) BUCKETS 32 + PROPERTIES ( + "replication_num" = "1" + ); + """ + sql """insert into string_min_max values (1,'name1'), (2, 'name2')""" + explain { + sql("select min(name), max(name) from string_min_max") + contains "pushAggOp=NONE" + } + sql """set enable_pushdown_string_minmax = true""" + explain { + sql("select min(name), max(name) from string_min_max") + contains "pushAggOp=MINMAX" + } // Test trigger type. sql """DROP DATABASE IF EXISTS trigger"""