[improvement](statistics) Force to use zonemap for collecting string type min max. (#29631)

Force to use zonemap for collecting string type min max.
String type is not using zonemap for min max, because zonemap value at BE side is truncated at 512 bytes which may cause the value not accurate. But it's OK for statisitcs min max, and this could also avoid scan whole table while sampling.
This commit is contained in:
Jibing-Li
2024-01-08 13:49:37 +08:00
committed by yiguolei
parent 9791a63212
commit ddaa645a4f
4 changed files with 44 additions and 9 deletions

View File

@ -610,10 +610,10 @@ public class AggregateStrategies implements ImplementationRuleFactory {
if (mergeOp == PushDownAggOp.MIN_MAX || mergeOp == PushDownAggOp.MIX) {
PrimitiveType colType = column.getType().getPrimitiveType();
if (colType.isComplexType() || colType.isHllType() || colType.isBitmapType()
|| colType == PrimitiveType.STRING) {
|| (colType == PrimitiveType.STRING && !enablePushDownStringMinMax())) {
return canNotPush;
}
if (colType.isCharFamily() && column.getType().getLength() > 512) {
if (colType.isCharFamily() && column.getType().getLength() > 512 && !enablePushDownStringMinMax()) {
return canNotPush;
}
}
@ -665,6 +665,11 @@ public class AggregateStrategies implements ImplementationRuleFactory {
}
}
private boolean enablePushDownStringMinMax() {
ConnectContext connectContext = ConnectContext.get();
return connectContext != null && connectContext.getSessionVariable().isEnablePushDownStringMinMax();
}
/**
* sql: select count(*) from tbl group by id
* <p>

View File

@ -482,6 +482,8 @@ public class SessionVariable implements Serializable, Writable {
public static final String ENABLE_PUSHDOWN_MINMAX_ON_UNIQUE = "enable_pushdown_minmax_on_unique";
public static final String ENABLE_PUSHDOWN_STRING_MINMAX = "enable_pushdown_string_minmax";
// When set use fix replica = true, the fixed replica maybe bad, try to use the health one if
// this session variable is set to true.
public static final String FALLBACK_OTHER_REPLICA_WHEN_FIXED_CORRUPT = "fallback_other_replica_when_fixed_corrupt";
@ -1229,6 +1231,11 @@ public class SessionVariable implements Serializable, Writable {
"是否启用pushdown minmax on unique table。", "Set whether to pushdown minmax on unique table."})
public boolean enablePushDownMinMaxOnUnique = false;
// Whether enable push down string type minmax to scan node.
@VariableMgr.VarAttr(name = ENABLE_PUSHDOWN_STRING_MINMAX, needForward = true, description = {
"是否启用string类型min max下推。", "Set whether to enable push down string type minmax."})
public boolean enablePushDownStringMinMax = false;
// Whether drop table when create table as select insert data appear error.
@VariableMgr.VarAttr(name = DROP_TABLE_IF_CTAS_FAILED, needForward = true)
public boolean dropTableIfCtasFailed = true;
@ -2474,6 +2481,10 @@ public class SessionVariable implements Serializable, Writable {
this.enablePushDownMinMaxOnUnique = enablePushDownMinMaxOnUnique;
}
public boolean isEnablePushDownStringMinMax() {
return enablePushDownStringMinMax;
}
/**
* Nereids only support vectorized engine.
*

View File

@ -193,6 +193,7 @@ public class StatisticsUtil {
sessionVariable.enableFileCache = false;
sessionVariable.forbidUnknownColStats = false;
sessionVariable.enablePushDownMinMaxOnUnique = true;
sessionVariable.enablePushDownStringMinMax = true;
connectContext.setEnv(Env.getCurrentEnv());
connectContext.setDatabase(FeConstants.INTERNAL_DB_NAME);
connectContext.setQualifiedUser(UserIdentity.ROOT.getQualifiedUser());

View File

@ -121,8 +121,6 @@ suite("test_analyze") {
SET forbid_unknown_col_stats=true;
"""
Thread.sleep(1000 * 60)
sql """
SELECT * FROM ${tbl};
"""
@ -2626,15 +2624,35 @@ PARTITION `p599` VALUES IN (599)
);
"""
sql """insert into agg_table_test values (1,'name1'), (2, 'name2')"""
Thread.sleep(1000 * 90)
Thread.sleep(1000 * 60)
sql """analyze table agg_table_test with sample rows 100 with sync"""
def agg_result = sql """show column stats agg_table_test (name)"""
assertEquals(agg_result[0][6], "N/A")
assertEquals(agg_result[0][7], "N/A")
agg_result = sql """show column stats agg_table_test (id)"""
assertEquals(agg_result[0][6], "1")
assertEquals(agg_result[0][7], "2")
sql """DROP DATABASE IF EXISTS AggTableTest"""
// Test sample string type min max
sql """
CREATE TABLE `string_min_max` (
`id` BIGINT NOT NULL,
`name` string NULL
) ENGINE=OLAP
DUPLICATE KEY(`id`)
COMMENT 'OLAP'
DISTRIBUTED BY HASH(`id`) BUCKETS 32
PROPERTIES (
"replication_num" = "1"
);
"""
sql """insert into string_min_max values (1,'name1'), (2, 'name2')"""
explain {
sql("select min(name), max(name) from string_min_max")
contains "pushAggOp=NONE"
}
sql """set enable_pushdown_string_minmax = true"""
explain {
sql("select min(name), max(name) from string_min_max")
contains "pushAggOp=MINMAX"
}
// Test trigger type.
sql """DROP DATABASE IF EXISTS trigger"""