[improvement](statistics)Support get index row count and table delta rows. (#38520)

backport: https://github.com/apache/doris/pull/38492
This commit is contained in:
Jibing-Li
2024-07-31 10:39:21 +08:00
committed by GitHub
parent d9d7f8de95
commit c78e04ac31
8 changed files with 173 additions and 35 deletions

View File

@ -134,6 +134,10 @@ public class AlterColumnStatsStmt extends DdlStmt {
throw new AnalysisException(optional.get() + " is invalid statistics");
}
if (!properties.containsKey(StatsType.ROW_COUNT.getValue())) {
throw new AnalysisException("Set column stats must set row_count. e.g. 'row_count'='5'");
}
// get statsTypeToValue
properties.forEach((key, value) -> {
StatsType statsType = StatsType.fromString(key);

View File

@ -20,6 +20,7 @@ package org.apache.doris.analysis;
import org.apache.doris.catalog.Column;
import org.apache.doris.catalog.DatabaseIf;
import org.apache.doris.catalog.Env;
import org.apache.doris.catalog.OlapTable;
import org.apache.doris.catalog.Partition;
import org.apache.doris.catalog.ScalarType;
import org.apache.doris.catalog.TableIf;
@ -58,17 +59,25 @@ public class ShowTableStatsStmt extends ShowStmt {
.add("user_inject")
.build();
private final TableName tableName;
private static final ImmutableList<String> INDEX_TITLE_NAMES =
new ImmutableList.Builder<String>()
.add("table_name")
.add("index_name")
.add("row_count")
.build();
private final TableName tableName;
private final PartitionNames partitionNames;
private final boolean cached;
private final String indexName;
private TableIf table;
public ShowTableStatsStmt(TableName tableName, PartitionNames partitionNames, boolean cached) {
public ShowTableStatsStmt(TableName tableName, PartitionNames partitionNames, boolean cached, String indexName) {
this.tableName = tableName;
this.partitionNames = partitionNames;
this.cached = cached;
this.indexName = indexName;
}
public TableName getTableName() {
@ -117,7 +126,13 @@ public class ShowTableStatsStmt extends ShowStmt {
public ShowResultSetMetaData getMetaData() {
ShowResultSetMetaData.Builder builder = ShowResultSetMetaData.builder();
for (String title : TITLE_NAMES) {
ImmutableList<String> titles;
if (indexName != null) {
titles = INDEX_TITLE_NAMES;
} else {
titles = TITLE_NAMES;
}
for (String title : titles) {
builder.addColumn(new Column(title, ScalarType.createVarchar(30)));
}
return builder.build();
@ -127,35 +142,11 @@ public class ShowTableStatsStmt extends ShowStmt {
return table;
}
public long getPartitionId() {
if (partitionNames == null) {
return 0;
}
String partitionName = partitionNames.getPartitionNames().get(0);
return table.getPartition(partitionName).getId();
}
public ShowResultSet constructResultSet(TableStatsMeta tableStatistic) {
DateTimeFormatter formatter = DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss");
if (tableStatistic == null) {
return new ShowResultSet(getMetaData(), new ArrayList<>());
if (indexName != null) {
return constructIndexResultSet(tableStatistic);
}
List<List<String>> result = Lists.newArrayList();
List<String> row = Lists.newArrayList();
row.add(String.valueOf(tableStatistic.updatedRows));
row.add(String.valueOf(tableStatistic.queriedTimes.get()));
row.add(String.valueOf(tableStatistic.rowCount));
LocalDateTime dateTime =
LocalDateTime.ofInstant(Instant.ofEpochMilli(tableStatistic.updatedTime),
java.time.ZoneId.systemDefault());
String formattedDateTime = dateTime.format(formatter);
row.add(formattedDateTime);
row.add(tableStatistic.analyzeColumns().toString());
row.add(tableStatistic.jobType.toString());
row.add(String.valueOf(tableStatistic.newPartitionLoaded.get()));
row.add(String.valueOf(tableStatistic.userInjected));
result.add(row);
return new ShowResultSet(getMetaData(), result);
return constructTableResultSet(tableStatistic);
}
public ShowResultSet constructResultSet(long rowCount) {
@ -173,6 +164,51 @@ public class ShowTableStatsStmt extends ShowStmt {
return new ShowResultSet(getMetaData(), result);
}
public ShowResultSet constructTableResultSet(TableStatsMeta tableStatistic) {
DateTimeFormatter formatter = DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss");
if (tableStatistic == null) {
return new ShowResultSet(getMetaData(), new ArrayList<>());
}
List<List<String>> result = Lists.newArrayList();
List<String> row = Lists.newArrayList();
row.add(String.valueOf(tableStatistic.updatedRows));
row.add(String.valueOf(tableStatistic.queriedTimes.get()));
row.add(String.valueOf(tableStatistic.rowCount));
LocalDateTime dateTime =
LocalDateTime.ofInstant(Instant.ofEpochMilli(tableStatistic.updatedTime),
java.time.ZoneId.systemDefault());
String formattedDateTime = dateTime.format(formatter);
row.add(formattedDateTime);
row.add(tableStatistic.analyzeColumns().toString());
row.add(tableStatistic.jobType.toString());
row.add(String.valueOf(tableStatistic.newPartitionLoaded.get()));
row.add(String.valueOf(tableStatistic.userInjected));
result.add(row);
return new ShowResultSet(getMetaData(), result);
}
public ShowResultSet constructIndexResultSet(TableStatsMeta tableStatistic) {
List<List<String>> result = Lists.newArrayList();
if (!(table instanceof OlapTable)) {
return new ShowResultSet(getMetaData(), result);
}
OlapTable olapTable = (OlapTable) table;
Long indexId = olapTable.getIndexIdByName(indexName);
if (indexId == null) {
throw new RuntimeException(String.format("Index %s not exist.", indexName));
}
long rowCount = tableStatistic.getRowCount(olapTable.getIndexIdByName(indexName));
if (rowCount == -1) {
return new ShowResultSet(getMetaData(), result);
}
List<String> row = Lists.newArrayList();
row.add(table.getName());
row.add(indexName);
row.add(String.valueOf(rowCount));
result.add(row);
return new ShowResultSet(getMetaData(), result);
}
public boolean isCached() {
return cached;
}

View File

@ -38,6 +38,8 @@ import java.text.ParseException;
import java.util.List;
import java.util.Set;
import java.util.StringJoiner;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ConcurrentMap;
public class AnalysisInfo implements Writable {
@ -202,6 +204,8 @@ public class AnalysisInfo implements Writable {
public final boolean userInject;
public final ConcurrentMap<Long, Long> indexesRowCount = new ConcurrentHashMap<>();
public AnalysisInfo(long jobId, long taskId, List<Long> taskIds, long catalogId, long dbId, long tblId,
List<Pair<String, String>> jobColumns, Set<String> partitionNames, String colName, Long indexId,
JobType jobType, AnalysisMode analysisMode, AnalysisMethod analysisMethod, AnalysisType analysisType,
@ -350,4 +354,8 @@ public class AnalysisInfo implements Writable {
public TableIf getTable() {
return StatisticsUtil.findTable(catalogId, dbId, tblId);
}
public void addIndexRowCount(long indexId, long rowCount) {
indexesRowCount.put(indexId, rowCount);
}
}

View File

@ -21,6 +21,7 @@ import org.apache.doris.analysis.TableSample;
import org.apache.doris.catalog.Column;
import org.apache.doris.catalog.DatabaseIf;
import org.apache.doris.catalog.Env;
import org.apache.doris.catalog.OlapTable;
import org.apache.doris.catalog.TableIf;
import org.apache.doris.common.util.DebugUtil;
import org.apache.doris.datasource.CatalogIf;
@ -333,6 +334,14 @@ public abstract class BaseAnalysisTask {
try (AutoCloseConnectContext a = StatisticsUtil.buildConnectContext()) {
stmtExecutor = new StmtExecutor(a.connectContext, sql);
ColStatsData colStatsData = new ColStatsData(stmtExecutor.executeInternalQuery().get(0));
// Update index row count after analyze.
if (this instanceof OlapAnalysisTask) {
AnalysisInfo jobInfo = Env.getCurrentEnv().getAnalysisManager().findJobInfo(job.getJobInfo().jobId);
// For sync job, get jobInfo from job.jobInfo.
jobInfo = jobInfo == null ? job.jobInfo : jobInfo;
long indexId = info.indexId == -1 ? ((OlapTable) tbl).getBaseIndexId() : info.indexId;
jobInfo.addIndexRowCount(indexId, colStatsData.count);
}
Env.getCurrentEnv().getStatisticsCache().syncColStats(colStatsData);
queryId = DebugUtil.printId(stmtExecutor.getContext().queryId());
job.appendBuf(this, Collections.singletonList(colStatsData));

View File

@ -21,6 +21,7 @@ import org.apache.doris.analysis.AlterColumnStatsStmt;
import org.apache.doris.analysis.TableName;
import org.apache.doris.catalog.Column;
import org.apache.doris.catalog.Env;
import org.apache.doris.catalog.OlapTable;
import org.apache.doris.catalog.Partition;
import org.apache.doris.common.AnalysisException;
import org.apache.doris.common.Config;
@ -278,12 +279,13 @@ public class StatisticsRepository {
String max = alterColumnStatsStmt.getValue(StatsType.MAX_VALUE);
String dataSize = alterColumnStatsStmt.getValue(StatsType.DATA_SIZE);
long indexId = alterColumnStatsStmt.getIndexId();
if (rowCount == null) {
throw new RuntimeException("Row count is null.");
}
ColumnStatisticBuilder builder = new ColumnStatisticBuilder();
String colName = alterColumnStatsStmt.getColumnName();
Column column = objects.table.getColumn(colName);
if (rowCount != null) {
builder.setCount(Double.parseDouble(rowCount));
}
builder.setCount(Double.parseDouble(rowCount));
if (ndv != null) {
double dNdv = Double.parseDouble(ndv);
builder.setNdv(dNdv);
@ -338,9 +340,14 @@ public class StatisticsRepository {
.setTblUpdateTime(System.currentTimeMillis())
.setColName("")
.setJobColumns(Lists.newArrayList())
.setRowCount((long) Double.parseDouble(rowCount))
.setUserInject(true)
.setJobType(AnalysisInfo.JobType.MANUAL)
.build();
if (objects.table instanceof OlapTable) {
indexId = indexId == -1 ? ((OlapTable) objects.table).getBaseIndexId() : indexId;
mockedJobInfo.addIndexRowCount(indexId, (long) Double.parseDouble(rowCount));
}
Env.getCurrentEnv().getAnalysisManager().updateTableStatsForAlterStats(mockedJobInfo, objects.table);
} else {
// update partition granularity statistics

View File

@ -24,6 +24,7 @@ import org.apache.doris.catalog.TableIf;
import org.apache.doris.common.Pair;
import org.apache.doris.common.io.Text;
import org.apache.doris.common.io.Writable;
import org.apache.doris.persist.gson.GsonPostProcessable;
import org.apache.doris.persist.gson.GsonUtils;
import org.apache.doris.statistics.AnalysisInfo.JobType;
@ -33,6 +34,8 @@ import com.google.gson.annotations.SerializedName;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import java.util.Iterator;
import java.util.List;
import java.util.Set;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ConcurrentMap;
@ -40,7 +43,7 @@ import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.atomic.AtomicLong;
import java.util.stream.Collectors;
public class TableStatsMeta implements Writable {
public class TableStatsMeta implements Writable, GsonPostProcessable {
@SerializedName("tblId")
public final long tblId;
@ -77,6 +80,9 @@ public class TableStatsMeta implements Writable {
@SerializedName("userInjected")
public boolean userInjected;
@SerializedName("irc")
public ConcurrentMap<Long, Long> indexesRowCount = new ConcurrentHashMap<>();
@VisibleForTesting
public TableStatsMeta() {
tblId = 0;
@ -142,6 +148,8 @@ public class TableStatsMeta implements Writable {
if (tableIf != null) {
if (tableIf instanceof OlapTable) {
rowCount = analyzedJob.rowCount;
indexesRowCount.putAll(analyzedJob.indexesRowCount);
clearStaleIndexRowCount((OlapTable) tableIf);
}
if (analyzedJob.emptyJob) {
return;
@ -166,4 +174,26 @@ public class TableStatsMeta implements Writable {
public void convertDeprecatedColStatsToNewVersion() {
deprecatedColNameToColStatsMeta = null;
}
@Override
public void gsonPostProcess() throws IOException {
if (indexesRowCount == null) {
indexesRowCount = new ConcurrentHashMap<>();
}
}
public long getRowCount(long indexId) {
return indexesRowCount.getOrDefault(indexId, -1L);
}
private void clearStaleIndexRowCount(OlapTable table) {
Iterator<Long> iterator = indexesRowCount.keySet().iterator();
List<Long> indexIds = table.getIndexIds();
while (iterator.hasNext()) {
long key = iterator.next();
if (indexIds.contains(key)) {
iterator.remove();
}
}
}
}