[fix](stats) truncate min/max if too long (#27955)

For some string value the max/min might be a very long string
which might take too many memory of FE,
so we truncate to 1024 chars if it's too long
This commit is contained in:
AKIRA
2023-12-05 20:40:38 +08:00
committed by GitHub
parent 05adbfdb3d
commit 7f1b558011
6 changed files with 84 additions and 26 deletions

View File

@ -544,7 +544,9 @@ public class AnalysisManager implements Writable {
AnalysisInfoBuilder colTaskInfoBuilder = new AnalysisInfoBuilder(jobInfo);
if (jobInfo.analysisType != AnalysisType.HISTOGRAM) {
colTaskInfoBuilder.setAnalysisType(AnalysisType.FUNDAMENTALS);
colTaskInfoBuilder.setColToPartitions(Collections.singletonMap(colName, entry.getValue()));
Map<String, Set<String>> colToParts = new HashMap<>();
colToParts.put(colName, entry.getValue());
colTaskInfoBuilder.setColToPartitions(colToParts);
}
AnalysisInfo analysisInfo = colTaskInfoBuilder.setColName(colName).setIndexId(indexId)
.setTaskId(taskId).setLastExecTimeInMs(System.currentTimeMillis()).build();

View File

@ -58,8 +58,8 @@ public abstract class BaseAnalysisTask {
+ " COUNT(1) AS `row_count`, "
+ " NDV(`${colName}`) AS `ndv`, "
+ " COUNT(1) - COUNT(`${colName}`) AS `null_count`, "
+ " CAST(MIN(`${colName}`) AS STRING) AS `min`, "
+ " CAST(MAX(`${colName}`) AS STRING) AS `max`, "
+ " SUBSTRING(CAST(MIN(`${colName}`) AS STRING), 1, 1024) AS `min`, "
+ " SUBSTRING(CAST(MAX(`${colName}`) AS STRING), 1, 1024) AS `max`, "
+ " ${dataSizeFunction} AS `data_size`, "
+ " NOW() AS `update_time` "
+ " FROM `${catalogName}`.`${dbName}`.`${tblName}`";
@ -75,8 +75,8 @@ public abstract class BaseAnalysisTask {
+ "${rowCount} AS `row_count`, "
+ "${ndvFunction} as `ndv`, "
+ "ROUND(SUM(CASE WHEN `${colName}` IS NULL THEN 1 ELSE 0 END) * ${scaleFactor}) AS `null_count`, "
+ "${min} AS `min`, "
+ "${max} AS `max`, "
+ "SUBSTRING(CAST(${min} AS STRING), 1, 1024) AS `min`, "
+ "SUBSTRING(CAST(${max} AS STRING), 1, 1024) AS `max`, "
+ "${dataSizeFunction} * ${scaleFactor} AS `data_size`, "
+ "NOW() "
+ "FROM `${catalogName}`.`${dbName}`.`${tblName}` ${sampleHints} ${limit}";
@ -92,8 +92,8 @@ public abstract class BaseAnalysisTask {
+ "${rowCount} AS `row_count`, "
+ "${ndvFunction} as `ndv`, "
+ "IFNULL(SUM(IF(`t1`.`column_key` IS NULL, `t1`.`count`, 0)), 0) * ${scaleFactor} as `null_count`, "
+ "${min} AS `min`, "
+ "${max} AS `max`, "
+ "SUBSTRING(CAST(${min} AS STRING), 1, 1024) AS `min`, "
+ "SUBSTRING(CAST(${max} AS STRING), 1, 1024) AS `max`, "
+ "${dataSizeFunction} * ${scaleFactor} AS `data_size`, "
+ "NOW() "
+ "FROM ( "
@ -115,8 +115,8 @@ public abstract class BaseAnalysisTask {
+ "${row_count} AS `row_count`, "
+ "${ndv} AS `ndv`, "
+ "${null_count} AS `null_count`, "
+ "${min} AS `min`, "
+ "${max} AS `max`, "
+ "SUBSTRING(CAST(${min} AS STRING), 1, 1024) AS `min`, "
+ "SUBSTRING(CAST(${max} AS STRING), 1, 1024) AS `max`, "
+ "${data_size} AS `data_size`, "
+ "NOW() ";

View File

@ -41,8 +41,8 @@ public class JdbcAnalysisTask extends BaseAnalysisTask {
+ "COUNT(1) AS row_count, "
+ "NDV(`${colName}`) AS ndv, "
+ "SUM(CASE WHEN `${colName}` IS NULL THEN 1 ELSE 0 END) AS null_count, "
+ "MIN(`${colName}`) AS min, "
+ "MAX(`${colName}`) AS max, "
+ "SUBSTRING(CAST(MIN(`${colName}`) AS STRING), 1, 1024) AS min, "
+ "SUBSTRING(CAST(MAX(`${colName}`) AS STRING), 1, 1024) AS max, "
+ "${dataSizeFunction} AS data_size, "
+ "NOW() "
+ "FROM `${catalogName}`.`${dbName}`.`${tblName}`";

View File

@ -46,8 +46,8 @@ import java.util.stream.Collectors;
public class OlapAnalysisTask extends BaseAnalysisTask {
private static final String BASIC_STATS_TEMPLATE = "SELECT "
+ "MIN(`${colName}`) as min, "
+ "MAX(`${colName}`) as max "
+ "SUBSTRING(CAST(MIN(`${colName}`) AS STRING), 1, 1024) as min, "
+ "SUBSTRING(CAST(MAX(`${colName}`) AS STRING), 1, 1024) as max "
+ "FROM `${dbName}`.`${tblName}`";
@VisibleForTesting