[opt](stats) Use escape rather than base64 for min/max value (#27746)
This commit is contained in:
@ -252,7 +252,7 @@ public abstract class BaseAnalysisTask {
|
||||
|
||||
protected String getMinFunction() {
|
||||
if (tableSample == null) {
|
||||
return "to_base64(CAST(MIN(`${colName}`) as ${type})) ";
|
||||
return "CAST(MIN(`${colName}`) as ${type}) ";
|
||||
} else {
|
||||
// Min value is not accurate while sample, so set it to NULL to avoid optimizer generate bad plan.
|
||||
return "NULL";
|
||||
@ -276,7 +276,7 @@ public abstract class BaseAnalysisTask {
|
||||
// Max value is not accurate while sample, so set it to NULL to avoid optimizer generate bad plan.
|
||||
protected String getMaxFunction() {
|
||||
if (tableSample == null) {
|
||||
return "to_base64(CAST(MAX(`${colName}`) as ${type})) ";
|
||||
return "CAST(MAX(`${colName}`) as ${type}) ";
|
||||
} else {
|
||||
return "NULL";
|
||||
}
|
||||
@ -315,7 +315,7 @@ public abstract class BaseAnalysisTask {
|
||||
long startTime = System.currentTimeMillis();
|
||||
try (AutoCloseConnectContext a = StatisticsUtil.buildConnectContext()) {
|
||||
stmtExecutor = new StmtExecutor(a.connectContext, sql);
|
||||
ColStatsData colStatsData = new ColStatsData(stmtExecutor.executeInternalQuery().get(0), needEncode);
|
||||
ColStatsData colStatsData = new ColStatsData(stmtExecutor.executeInternalQuery().get(0));
|
||||
job.appendBuf(this, Collections.singletonList(colStatsData));
|
||||
} finally {
|
||||
LOG.debug("End cost time in secs: " + (System.currentTimeMillis() - startTime) / 1000);
|
||||
|
||||
@ -21,8 +21,6 @@ import org.apache.doris.statistics.util.StatisticsUtil;
|
||||
|
||||
import com.google.common.annotations.VisibleForTesting;
|
||||
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.util.Base64;
|
||||
import java.util.StringJoiner;
|
||||
|
||||
/**
|
||||
@ -56,8 +54,6 @@ public class ColStatsData {
|
||||
|
||||
public final String updateTime;
|
||||
|
||||
public final boolean needEncode;
|
||||
|
||||
@VisibleForTesting
|
||||
public ColStatsData() {
|
||||
statsId = new StatsId();
|
||||
@ -68,10 +64,9 @@ public class ColStatsData {
|
||||
maxLit = null;
|
||||
dataSizeInBytes = 0;
|
||||
updateTime = null;
|
||||
needEncode = true;
|
||||
}
|
||||
|
||||
public ColStatsData(ResultRow row, boolean needEncode) {
|
||||
public ColStatsData(ResultRow row) {
|
||||
this.statsId = new StatsId(row);
|
||||
this.count = (long) Double.parseDouble(row.get(7));
|
||||
this.ndv = (long) Double.parseDouble(row.getWithDefault(8, "0"));
|
||||
@ -80,7 +75,6 @@ public class ColStatsData {
|
||||
this.maxLit = row.get(11);
|
||||
this.dataSizeInBytes = (long) Double.parseDouble(row.getWithDefault(12, "0"));
|
||||
this.updateTime = row.get(13);
|
||||
this.needEncode = needEncode;
|
||||
}
|
||||
|
||||
public String toSQL(boolean roundByParentheses) {
|
||||
@ -93,12 +87,8 @@ public class ColStatsData {
|
||||
sj.add(String.valueOf(count));
|
||||
sj.add(String.valueOf(ndv));
|
||||
sj.add(String.valueOf(nullCount));
|
||||
sj.add(minLit == null ? "NULL" : needEncode
|
||||
? "'" + Base64.getEncoder().encodeToString(minLit.getBytes(StandardCharsets.UTF_8)) + "'"
|
||||
: "'" + minLit + "'");
|
||||
sj.add(maxLit == null ? "NULL" : needEncode
|
||||
? "'" + Base64.getEncoder().encodeToString(maxLit.getBytes(StandardCharsets.UTF_8)) + "'"
|
||||
: "'" + maxLit + "'");
|
||||
sj.add(minLit == null ? "NULL" : "'" + StatisticsUtil.escapeSQL(minLit) + "'");
|
||||
sj.add(maxLit == null ? "NULL" : "'" + StatisticsUtil.escapeSQL(maxLit) + "'");
|
||||
sj.add(String.valueOf(dataSizeInBytes));
|
||||
sj.add(StatisticsUtil.quote(updateTime));
|
||||
return sj.toString();
|
||||
|
||||
@ -31,8 +31,6 @@ import org.apache.logging.log4j.LogManager;
|
||||
import org.apache.logging.log4j.Logger;
|
||||
import org.json.JSONObject;
|
||||
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.util.Base64;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
@ -175,7 +173,6 @@ public class ColumnStatistic {
|
||||
String min = row.get(10);
|
||||
String max = row.get(11);
|
||||
if (min != null && !min.equalsIgnoreCase("NULL")) {
|
||||
min = new String(Base64.getDecoder().decode(min), StandardCharsets.UTF_8);
|
||||
// Internal catalog get the min/max value using a separate SQL,
|
||||
// and the value is already encoded by base64. Need to handle internal and external catalog separately.
|
||||
if (catalogId != InternalCatalog.INTERNAL_CATALOG_ID && min.equalsIgnoreCase("NULL")) {
|
||||
@ -193,7 +190,6 @@ public class ColumnStatistic {
|
||||
columnStatisticBuilder.setMinValue(Double.NEGATIVE_INFINITY);
|
||||
}
|
||||
if (max != null && !max.equalsIgnoreCase("NULL")) {
|
||||
max = new String(Base64.getDecoder().decode(max), StandardCharsets.UTF_8);
|
||||
if (catalogId != InternalCatalog.INTERNAL_CATALOG_ID && max.equalsIgnoreCase("NULL")) {
|
||||
columnStatisticBuilder.setMaxValue(Double.POSITIVE_INFINITY);
|
||||
} else {
|
||||
|
||||
@ -92,8 +92,8 @@ public class OlapAnalysisTask extends BaseAnalysisTask {
|
||||
// Get basic stats, including min and max.
|
||||
ResultRow basicStats = collectBasicStat(r);
|
||||
long rowCount = tbl.getRowCount();
|
||||
String min = StatisticsUtil.encodeValue(basicStats, 0);
|
||||
String max = StatisticsUtil.encodeValue(basicStats, 1);
|
||||
String min = StatisticsUtil.escapeSQL(basicStats.get(0));
|
||||
String max = StatisticsUtil.escapeSQL(basicStats.get(1));
|
||||
|
||||
boolean limitFlag = false;
|
||||
long rowsToSample = pair.second;
|
||||
|
||||
@ -269,8 +269,8 @@ public class StatisticsRepository {
|
||||
params.put("count", String.valueOf(columnStatistic.count));
|
||||
params.put("ndv", String.valueOf(columnStatistic.ndv));
|
||||
params.put("nullCount", String.valueOf(columnStatistic.numNulls));
|
||||
params.put("min", StatisticsUtil.encodeString(min));
|
||||
params.put("max", StatisticsUtil.encodeString(max));
|
||||
params.put("min", StatisticsUtil.escapeSQL(min));
|
||||
params.put("max", StatisticsUtil.escapeSQL(max));
|
||||
params.put("dataSize", String.valueOf(columnStatistic.dataSize));
|
||||
|
||||
if (partitionIds.isEmpty()) {
|
||||
|
||||
@ -788,7 +788,9 @@ public class StatisticsUtil {
|
||||
if (str == null) {
|
||||
return null;
|
||||
}
|
||||
return org.apache.commons.lang3.StringUtils.replace(str, "'", "''");
|
||||
return str.replace("'", "''")
|
||||
.replace("\\", "\\\\")
|
||||
.replace("\"", "\"\"");
|
||||
}
|
||||
|
||||
public static boolean isExternalTable(String catalogName, String dbName, String tblName) {
|
||||
|
||||
Reference in New Issue
Block a user