Do not collect min max for agg table value columns while doing sample analyze. (#29483)

This commit is contained in:
Jibing-Li
2024-01-06 17:15:40 +08:00
committed by GitHub
parent 911635fac6
commit 612e0631ac
2 changed files with 35 additions and 3 deletions

View File

@ -104,8 +104,10 @@ public class OlapAnalysisTask extends BaseAnalysisTask {
// Get basic stats, including min and max.
ResultRow basicStats = collectBasicStat(r);
long rowCount = tbl.getRowCount();
String min = StatisticsUtil.escapeSQL(basicStats.get(0));
String max = StatisticsUtil.escapeSQL(basicStats.get(1));
String min = StatisticsUtil.escapeSQL(basicStats != null && basicStats.getValues().size() > 0
? basicStats.get(0) : null);
String max = StatisticsUtil.escapeSQL(basicStats != null && basicStats.getValues().size() > 1
? basicStats.get(1) : null);
boolean limitFlag = false;
long rowsToSample = pair.second;
@ -166,6 +168,13 @@ public class OlapAnalysisTask extends BaseAnalysisTask {
}
protected ResultRow collectBasicStat(AutoCloseConnectContext context) {
// Agg table value columns has no zone map.
// For these columns, skip collecting min and max value to avoid scan whole table.
if (((OlapTable) tbl).getKeysType().equals(KeysType.AGG_KEYS) && !col.isKey()) {
LOG.info("Aggregation table {} column {} is not a key column, skip collecting min and max.",
tbl.getName(), col.getName());
return null;
}
Map<String, String> params = new HashMap<>();
params.put("dbName", db.getFullName());
params.put("colName", info.colName);

View File

@ -2612,6 +2612,30 @@ PARTITION `p599` VALUES IN (599)
partition_result = sql """show table stats partition_test"""
assertEquals(partition_result[0][6], "false")
// Test sample agg table value column
sql """
CREATE TABLE `agg_table_test` (
`id` BIGINT NOT NULL,
`name` VARCHAR(10) REPLACE NULL
) ENGINE=OLAP
AGGREGATE KEY(`id`)
COMMENT 'OLAP'
DISTRIBUTED BY HASH(`id`) BUCKETS 32
PROPERTIES (
"replication_num" = "1"
);
"""
sql """insert into agg_table_test values (1,'name1'), (2, 'name2')"""
Thread.sleep(1000 * 90)
sql """analyze table agg_table_test with sample rows 100 with sync"""
def agg_result = sql """show column stats agg_table_test (name)"""
assertEquals(agg_result[0][6], "N/A")
assertEquals(agg_result[0][7], "N/A")
agg_result = sql """show column stats agg_table_test (id)"""
assertEquals(agg_result[0][6], "1")
assertEquals(agg_result[0][7], "2")
sql """DROP DATABASE IF EXISTS AggTableTest"""
// Test trigger type.
sql """DROP DATABASE IF EXISTS trigger"""
sql """CREATE DATABASE IF NOT EXISTS trigger"""
@ -2650,5 +2674,4 @@ PARTITION `p599` VALUES IN (599)
assertEquals(result[1][10], "MANUAL")
}
sql """DROP DATABASE IF EXISTS trigger"""
}