[fix](statistics)Fix sample min max npe bug (#27702)

Min and max value may be NULL, need to handle this case in sample analyze.
This commit is contained in:
Jibing-Li
2023-11-28 21:24:20 +08:00
committed by GitHub
parent 8910772cb8
commit aa6573db4f
4 changed files with 52 additions and 10 deletions

View File

@ -31,10 +31,8 @@ import org.apache.doris.statistics.util.StatisticsUtil;
import com.google.common.annotations.VisibleForTesting;
import org.apache.commons.text.StringSubstitutor;
import java.nio.charset.StandardCharsets;
import java.security.SecureRandom;
import java.util.ArrayList;
import java.util.Base64;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
@ -94,8 +92,8 @@ public class OlapAnalysisTask extends BaseAnalysisTask {
// Get basic stats, including min and max.
ResultRow basicStats = collectBasicStat(r);
long rowCount = tbl.getRowCount();
String min = Base64.getEncoder().encodeToString(basicStats.get(0).getBytes(StandardCharsets.UTF_8));
String max = Base64.getEncoder().encodeToString(basicStats.get(1).getBytes(StandardCharsets.UTF_8));
String min = StatisticsUtil.encodeValue(basicStats, 0);
String max = StatisticsUtil.encodeValue(basicStats, 1);
boolean limitFlag = false;
long rowsToSample = pair.second;

View File

@ -35,8 +35,6 @@ import org.apache.commons.text.StringSubstitutor;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import java.nio.charset.StandardCharsets;
import java.util.Base64;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
@ -271,10 +269,8 @@ public class StatisticsRepository {
params.put("count", String.valueOf(columnStatistic.count));
params.put("ndv", String.valueOf(columnStatistic.ndv));
params.put("nullCount", String.valueOf(columnStatistic.numNulls));
params.put("min", min == null ? "NULL" :
Base64.getEncoder().encodeToString(min.getBytes(StandardCharsets.UTF_8)));
params.put("max", max == null ? "NULL" :
Base64.getEncoder().encodeToString(max.getBytes(StandardCharsets.UTF_8)));
params.put("min", StatisticsUtil.encodeString(min));
params.put("max", StatisticsUtil.encodeString(max));
params.put("dataSize", String.valueOf(columnStatistic.dataSize));
if (partitionIds.isEmpty()) {

View File

@ -90,12 +90,14 @@ import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import java.net.InetSocketAddress;
import java.nio.charset.StandardCharsets;
import java.text.SimpleDateFormat;
import java.time.LocalTime;
import java.time.format.DateTimeFormatter;
import java.time.format.DateTimeParseException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Base64;
import java.util.Collection;
import java.util.Collections;
import java.util.Date;
@ -937,4 +939,19 @@ public class StatisticsUtil {
return StatisticConstants.ANALYZE_TIMEOUT_IN_SEC;
}
public static String encodeValue(ResultRow row, int index) {
if (row == null || row.getValues().size() <= index) {
return "NULL";
}
return encodeString(row.get(index));
}
public static String encodeString(String value) {
if (value == null) {
return "NULL";
} else {
return Base64.getEncoder().encodeToString(value.getBytes(StandardCharsets.UTF_8));
}
}
}