[improvement](statistics) Force to use zonemap for collecting string type min max. (#29631)
Force to use zonemap for collecting string type min max. String type is not using zonemap for min max, because zonemap value at BE side is truncated at 512 bytes which may cause the value not accurate. But it's OK for statisitcs min max, and this could also avoid scan whole table while sampling.
This commit is contained in:
@ -610,10 +610,10 @@ public class AggregateStrategies implements ImplementationRuleFactory {
|
||||
if (mergeOp == PushDownAggOp.MIN_MAX || mergeOp == PushDownAggOp.MIX) {
|
||||
PrimitiveType colType = column.getType().getPrimitiveType();
|
||||
if (colType.isComplexType() || colType.isHllType() || colType.isBitmapType()
|
||||
|| colType == PrimitiveType.STRING) {
|
||||
|| (colType == PrimitiveType.STRING && !enablePushDownStringMinMax())) {
|
||||
return canNotPush;
|
||||
}
|
||||
if (colType.isCharFamily() && column.getType().getLength() > 512) {
|
||||
if (colType.isCharFamily() && column.getType().getLength() > 512 && !enablePushDownStringMinMax()) {
|
||||
return canNotPush;
|
||||
}
|
||||
}
|
||||
@ -665,6 +665,11 @@ public class AggregateStrategies implements ImplementationRuleFactory {
|
||||
}
|
||||
}
|
||||
|
||||
private boolean enablePushDownStringMinMax() {
|
||||
ConnectContext connectContext = ConnectContext.get();
|
||||
return connectContext != null && connectContext.getSessionVariable().isEnablePushDownStringMinMax();
|
||||
}
|
||||
|
||||
/**
|
||||
* sql: select count(*) from tbl group by id
|
||||
* <p>
|
||||
|
||||
@ -482,6 +482,8 @@ public class SessionVariable implements Serializable, Writable {
|
||||
|
||||
public static final String ENABLE_PUSHDOWN_MINMAX_ON_UNIQUE = "enable_pushdown_minmax_on_unique";
|
||||
|
||||
public static final String ENABLE_PUSHDOWN_STRING_MINMAX = "enable_pushdown_string_minmax";
|
||||
|
||||
// When set use fix replica = true, the fixed replica maybe bad, try to use the health one if
|
||||
// this session variable is set to true.
|
||||
public static final String FALLBACK_OTHER_REPLICA_WHEN_FIXED_CORRUPT = "fallback_other_replica_when_fixed_corrupt";
|
||||
@ -1229,6 +1231,11 @@ public class SessionVariable implements Serializable, Writable {
|
||||
"是否启用pushdown minmax on unique table。", "Set whether to pushdown minmax on unique table."})
|
||||
public boolean enablePushDownMinMaxOnUnique = false;
|
||||
|
||||
// Whether enable push down string type minmax to scan node.
|
||||
@VariableMgr.VarAttr(name = ENABLE_PUSHDOWN_STRING_MINMAX, needForward = true, description = {
|
||||
"是否启用string类型min max下推。", "Set whether to enable push down string type minmax."})
|
||||
public boolean enablePushDownStringMinMax = false;
|
||||
|
||||
// Whether drop table when create table as select insert data appear error.
|
||||
@VariableMgr.VarAttr(name = DROP_TABLE_IF_CTAS_FAILED, needForward = true)
|
||||
public boolean dropTableIfCtasFailed = true;
|
||||
@ -2474,6 +2481,10 @@ public class SessionVariable implements Serializable, Writable {
|
||||
this.enablePushDownMinMaxOnUnique = enablePushDownMinMaxOnUnique;
|
||||
}
|
||||
|
||||
public boolean isEnablePushDownStringMinMax() {
|
||||
return enablePushDownStringMinMax;
|
||||
}
|
||||
|
||||
/**
|
||||
* Nereids only support vectorized engine.
|
||||
*
|
||||
|
||||
@ -193,6 +193,7 @@ public class StatisticsUtil {
|
||||
sessionVariable.enableFileCache = false;
|
||||
sessionVariable.forbidUnknownColStats = false;
|
||||
sessionVariable.enablePushDownMinMaxOnUnique = true;
|
||||
sessionVariable.enablePushDownStringMinMax = true;
|
||||
connectContext.setEnv(Env.getCurrentEnv());
|
||||
connectContext.setDatabase(FeConstants.INTERNAL_DB_NAME);
|
||||
connectContext.setQualifiedUser(UserIdentity.ROOT.getQualifiedUser());
|
||||
|
||||
@ -121,8 +121,6 @@ suite("test_analyze") {
|
||||
SET forbid_unknown_col_stats=true;
|
||||
"""
|
||||
|
||||
Thread.sleep(1000 * 60)
|
||||
|
||||
sql """
|
||||
SELECT * FROM ${tbl};
|
||||
"""
|
||||
@ -2626,15 +2624,35 @@ PARTITION `p599` VALUES IN (599)
|
||||
);
|
||||
"""
|
||||
sql """insert into agg_table_test values (1,'name1'), (2, 'name2')"""
|
||||
Thread.sleep(1000 * 90)
|
||||
Thread.sleep(1000 * 60)
|
||||
sql """analyze table agg_table_test with sample rows 100 with sync"""
|
||||
def agg_result = sql """show column stats agg_table_test (name)"""
|
||||
assertEquals(agg_result[0][6], "N/A")
|
||||
assertEquals(agg_result[0][7], "N/A")
|
||||
agg_result = sql """show column stats agg_table_test (id)"""
|
||||
assertEquals(agg_result[0][6], "1")
|
||||
assertEquals(agg_result[0][7], "2")
|
||||
sql """DROP DATABASE IF EXISTS AggTableTest"""
|
||||
|
||||
// Test sample string type min max
|
||||
sql """
|
||||
CREATE TABLE `string_min_max` (
|
||||
`id` BIGINT NOT NULL,
|
||||
`name` string NULL
|
||||
) ENGINE=OLAP
|
||||
DUPLICATE KEY(`id`)
|
||||
COMMENT 'OLAP'
|
||||
DISTRIBUTED BY HASH(`id`) BUCKETS 32
|
||||
PROPERTIES (
|
||||
"replication_num" = "1"
|
||||
);
|
||||
"""
|
||||
sql """insert into string_min_max values (1,'name1'), (2, 'name2')"""
|
||||
explain {
|
||||
sql("select min(name), max(name) from string_min_max")
|
||||
contains "pushAggOp=NONE"
|
||||
}
|
||||
sql """set enable_pushdown_string_minmax = true"""
|
||||
explain {
|
||||
sql("select min(name), max(name) from string_min_max")
|
||||
contains "pushAggOp=MINMAX"
|
||||
}
|
||||
|
||||
// Test trigger type.
|
||||
sql """DROP DATABASE IF EXISTS trigger"""
|
||||
|
||||
Reference in New Issue
Block a user