[fix](stats)min and max return NaN when table is empty (#27862)
fix analyze empty table and min/max null value bug: 1. Skip empty analyze task for sample analyze task. (Full analyze task already skipped). 2. Check sample rows is not 0 before calculate the scale factor. 3. Remove ' in sql template after remove base64 encoding for min/max value.
This commit is contained in:
@ -92,8 +92,8 @@ public abstract class BaseAnalysisTask {
|
||||
+ "${rowCount} AS `row_count`, "
|
||||
+ "${ndvFunction} as `ndv`, "
|
||||
+ "IFNULL(SUM(IF(`t1`.`column_key` IS NULL, `t1`.`count`, 0)), 0) * ${scaleFactor} as `null_count`, "
|
||||
+ "'${min}' AS `min`, "
|
||||
+ "'${max}' AS `max`, "
|
||||
+ "${min} AS `min`, "
|
||||
+ "${max} AS `max`, "
|
||||
+ "${dataSizeFunction} * ${scaleFactor} AS `data_size`, "
|
||||
+ "NOW() "
|
||||
+ "FROM ( "
|
||||
@ -115,8 +115,8 @@ public abstract class BaseAnalysisTask {
|
||||
+ "${row_count} AS `row_count`, "
|
||||
+ "${ndv} AS `ndv`, "
|
||||
+ "${null_count} AS `null_count`, "
|
||||
+ "'${min}' AS `min`, "
|
||||
+ "'${max}' AS `max`, "
|
||||
+ "${min} AS `min`, "
|
||||
+ "${max} AS `max`, "
|
||||
+ "${data_size} AS `data_size`, "
|
||||
+ "NOW() ";
|
||||
|
||||
@ -311,7 +311,7 @@ public abstract class BaseAnalysisTask {
|
||||
this.job = job;
|
||||
}
|
||||
|
||||
protected void runQuery(String sql, boolean needEncode) {
|
||||
protected void runQuery(String sql) {
|
||||
long startTime = System.currentTimeMillis();
|
||||
try (AutoCloseConnectContext a = StatisticsUtil.buildConnectContext()) {
|
||||
stmtExecutor = new StmtExecutor(a.connectContext, sql);
|
||||
|
||||
@ -159,7 +159,7 @@ public class HMSAnalysisTask extends BaseAnalysisTask {
|
||||
}
|
||||
stringSubstitutor = new StringSubstitutor(params);
|
||||
String sql = stringSubstitutor.replace(sb.toString());
|
||||
runQuery(sql, true);
|
||||
runQuery(sql);
|
||||
}
|
||||
|
||||
// Collect the partition column stats through HMS metadata.
|
||||
@ -201,12 +201,12 @@ public class HMSAnalysisTask extends BaseAnalysisTask {
|
||||
params.put("row_count", String.valueOf(count));
|
||||
params.put("ndv", String.valueOf(ndv));
|
||||
params.put("null_count", String.valueOf(numNulls));
|
||||
params.put("min", min);
|
||||
params.put("max", max);
|
||||
params.put("min", StatisticsUtil.quote(min));
|
||||
params.put("max", StatisticsUtil.quote(max));
|
||||
params.put("data_size", String.valueOf(dataSize));
|
||||
StringSubstitutor stringSubstitutor = new StringSubstitutor(params);
|
||||
String sql = stringSubstitutor.replace(ANALYZE_PARTITION_COLUMN_TEMPLATE);
|
||||
runQuery(sql, true);
|
||||
runQuery(sql);
|
||||
}
|
||||
|
||||
private String updateMinValue(String currentMin, String value) {
|
||||
@ -313,6 +313,9 @@ public class HMSAnalysisTask extends BaseAnalysisTask {
|
||||
for (long size : chunkSizes) {
|
||||
total += size;
|
||||
}
|
||||
if (total == 0) {
|
||||
return Pair.of(1.0, 0L);
|
||||
}
|
||||
// Calculate the sample target size for percent and rows sample.
|
||||
if (tableSample.isPercent()) {
|
||||
target = total * tableSample.getSampleValue() / 100;
|
||||
|
||||
@ -110,7 +110,7 @@ public class JdbcAnalysisTask extends BaseAnalysisTask {
|
||||
params.put("dataSizeFunction", getDataSizeFunction(col, false));
|
||||
StringSubstitutor stringSubstitutor = new StringSubstitutor(params);
|
||||
String sql = stringSubstitutor.replace(sb.toString());
|
||||
runQuery(sql, true);
|
||||
runQuery(sql);
|
||||
}
|
||||
|
||||
private Map<String, String> buildTableStatsParams(String partId) {
|
||||
|
||||
@ -59,7 +59,13 @@ public class OlapAnalysisTask extends BaseAnalysisTask {
|
||||
}
|
||||
|
||||
public void doExecute() throws Exception {
|
||||
|
||||
Set<String> partitionNames = info.colToPartitions.get(info.colName);
|
||||
if (partitionNames.isEmpty()) {
|
||||
LOG.debug("Skip empty empty partition task for column {} in {}.{}.{}",
|
||||
info.catalogId, info.dbId, info.tblId, info.colName);
|
||||
job.appendBuf(this, Collections.emptyList());
|
||||
return;
|
||||
}
|
||||
if (tableSample != null) {
|
||||
doSample();
|
||||
} else {
|
||||
@ -113,24 +119,25 @@ public class OlapAnalysisTask extends BaseAnalysisTask {
|
||||
params.put("scaleFactor", String.valueOf(scaleFactor));
|
||||
params.put("sampleHints", tabletStr.isEmpty() ? "" : String.format("TABLET(%s)", tabletStr));
|
||||
params.put("ndvFunction", getNdvFunction(String.valueOf(rowCount)));
|
||||
params.put("min", min);
|
||||
params.put("max", max);
|
||||
params.put("min", StatisticsUtil.quote(min));
|
||||
params.put("max", StatisticsUtil.quote(max));
|
||||
params.put("rowCount", String.valueOf(rowCount));
|
||||
params.put("type", col.getType().toString());
|
||||
params.put("limit", "");
|
||||
if (needLimit()) {
|
||||
// If the tablets to be sampled are too large, use limit to control the rows to read, and re-calculate
|
||||
// the scaleFactor.
|
||||
limitFlag = true;
|
||||
rowsToSample = Math.min(getSampleRows(), pair.second);
|
||||
params.put("limit", "limit " + rowsToSample);
|
||||
params.put("scaleFactor", String.valueOf(scaleFactor * (double) pair.second / rowsToSample));
|
||||
// Empty table doesn't need to limit.
|
||||
if (rowsToSample > 0) {
|
||||
limitFlag = true;
|
||||
params.put("limit", "limit " + rowsToSample);
|
||||
params.put("scaleFactor", String.valueOf(scaleFactor * (double) pair.second / rowsToSample));
|
||||
}
|
||||
}
|
||||
StringSubstitutor stringSubstitutor = new StringSubstitutor(params);
|
||||
String sql;
|
||||
if (useLinearAnalyzeTemplate()) {
|
||||
params.put("min", StatisticsUtil.quote(min));
|
||||
params.put("max", StatisticsUtil.quote(max));
|
||||
// For single unique key, use count as ndv.
|
||||
if (isSingleUniqueKey()) {
|
||||
params.put("ndvFunction", String.valueOf(rowCount));
|
||||
@ -148,7 +155,7 @@ public class OlapAnalysisTask extends BaseAnalysisTask {
|
||||
col.getName(), params.get("rowCount"), rowsToSample, params.get("scaleFactor"),
|
||||
limitFlag, tbl.isDistributionColumn(col.getName()),
|
||||
tbl.isPartitionColumn(col.getName()), col.isKey(), isSingleUniqueKey());
|
||||
runQuery(sql, false);
|
||||
runQuery(sql);
|
||||
}
|
||||
}
|
||||
|
||||
@ -169,11 +176,6 @@ public class OlapAnalysisTask extends BaseAnalysisTask {
|
||||
*/
|
||||
protected void doFull() throws Exception {
|
||||
LOG.debug("Will do full collection for column {}", col.getName());
|
||||
Set<String> partitionNames = info.colToPartitions.get(info.colName);
|
||||
if (partitionNames.isEmpty()) {
|
||||
job.appendBuf(this, Collections.emptyList());
|
||||
return;
|
||||
}
|
||||
Map<String, String> params = new HashMap<>();
|
||||
params.put("internalDB", FeConstants.INTERNAL_DB_NAME);
|
||||
params.put("columnStatTbl", StatisticConstants.STATISTIC_TBL_NAME);
|
||||
@ -189,7 +191,7 @@ public class OlapAnalysisTask extends BaseAnalysisTask {
|
||||
params.put("tblName", String.valueOf(tbl.getName()));
|
||||
StringSubstitutor stringSubstitutor = new StringSubstitutor(params);
|
||||
String collectColStats = stringSubstitutor.replace(COLLECT_COL_STATISTICS);
|
||||
runQuery(collectColStats, true);
|
||||
runQuery(collectColStats);
|
||||
}
|
||||
|
||||
// Get sample tablets id and scale up scaleFactor
|
||||
|
||||
Reference in New Issue
Block a user