[improvement](statistics)Support get index row count and table delta rows. (#38520)
backport: https://github.com/apache/doris/pull/38492
This commit is contained in:
@ -134,6 +134,10 @@ public class AlterColumnStatsStmt extends DdlStmt {
|
||||
throw new AnalysisException(optional.get() + " is invalid statistics");
|
||||
}
|
||||
|
||||
if (!properties.containsKey(StatsType.ROW_COUNT.getValue())) {
|
||||
throw new AnalysisException("Set column stats must set row_count. e.g. 'row_count'='5'");
|
||||
}
|
||||
|
||||
// get statsTypeToValue
|
||||
properties.forEach((key, value) -> {
|
||||
StatsType statsType = StatsType.fromString(key);
|
||||
|
||||
@ -20,6 +20,7 @@ package org.apache.doris.analysis;
|
||||
import org.apache.doris.catalog.Column;
|
||||
import org.apache.doris.catalog.DatabaseIf;
|
||||
import org.apache.doris.catalog.Env;
|
||||
import org.apache.doris.catalog.OlapTable;
|
||||
import org.apache.doris.catalog.Partition;
|
||||
import org.apache.doris.catalog.ScalarType;
|
||||
import org.apache.doris.catalog.TableIf;
|
||||
@ -58,17 +59,25 @@ public class ShowTableStatsStmt extends ShowStmt {
|
||||
.add("user_inject")
|
||||
.build();
|
||||
|
||||
private final TableName tableName;
|
||||
private static final ImmutableList<String> INDEX_TITLE_NAMES =
|
||||
new ImmutableList.Builder<String>()
|
||||
.add("table_name")
|
||||
.add("index_name")
|
||||
.add("row_count")
|
||||
.build();
|
||||
|
||||
private final TableName tableName;
|
||||
private final PartitionNames partitionNames;
|
||||
private final boolean cached;
|
||||
private final String indexName;
|
||||
|
||||
private TableIf table;
|
||||
|
||||
public ShowTableStatsStmt(TableName tableName, PartitionNames partitionNames, boolean cached) {
|
||||
public ShowTableStatsStmt(TableName tableName, PartitionNames partitionNames, boolean cached, String indexName) {
|
||||
this.tableName = tableName;
|
||||
this.partitionNames = partitionNames;
|
||||
this.cached = cached;
|
||||
this.indexName = indexName;
|
||||
}
|
||||
|
||||
public TableName getTableName() {
|
||||
@ -117,7 +126,13 @@ public class ShowTableStatsStmt extends ShowStmt {
|
||||
public ShowResultSetMetaData getMetaData() {
|
||||
ShowResultSetMetaData.Builder builder = ShowResultSetMetaData.builder();
|
||||
|
||||
for (String title : TITLE_NAMES) {
|
||||
ImmutableList<String> titles;
|
||||
if (indexName != null) {
|
||||
titles = INDEX_TITLE_NAMES;
|
||||
} else {
|
||||
titles = TITLE_NAMES;
|
||||
}
|
||||
for (String title : titles) {
|
||||
builder.addColumn(new Column(title, ScalarType.createVarchar(30)));
|
||||
}
|
||||
return builder.build();
|
||||
@ -127,35 +142,11 @@ public class ShowTableStatsStmt extends ShowStmt {
|
||||
return table;
|
||||
}
|
||||
|
||||
public long getPartitionId() {
|
||||
if (partitionNames == null) {
|
||||
return 0;
|
||||
}
|
||||
String partitionName = partitionNames.getPartitionNames().get(0);
|
||||
return table.getPartition(partitionName).getId();
|
||||
}
|
||||
|
||||
public ShowResultSet constructResultSet(TableStatsMeta tableStatistic) {
|
||||
DateTimeFormatter formatter = DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss");
|
||||
if (tableStatistic == null) {
|
||||
return new ShowResultSet(getMetaData(), new ArrayList<>());
|
||||
if (indexName != null) {
|
||||
return constructIndexResultSet(tableStatistic);
|
||||
}
|
||||
List<List<String>> result = Lists.newArrayList();
|
||||
List<String> row = Lists.newArrayList();
|
||||
row.add(String.valueOf(tableStatistic.updatedRows));
|
||||
row.add(String.valueOf(tableStatistic.queriedTimes.get()));
|
||||
row.add(String.valueOf(tableStatistic.rowCount));
|
||||
LocalDateTime dateTime =
|
||||
LocalDateTime.ofInstant(Instant.ofEpochMilli(tableStatistic.updatedTime),
|
||||
java.time.ZoneId.systemDefault());
|
||||
String formattedDateTime = dateTime.format(formatter);
|
||||
row.add(formattedDateTime);
|
||||
row.add(tableStatistic.analyzeColumns().toString());
|
||||
row.add(tableStatistic.jobType.toString());
|
||||
row.add(String.valueOf(tableStatistic.newPartitionLoaded.get()));
|
||||
row.add(String.valueOf(tableStatistic.userInjected));
|
||||
result.add(row);
|
||||
return new ShowResultSet(getMetaData(), result);
|
||||
return constructTableResultSet(tableStatistic);
|
||||
}
|
||||
|
||||
public ShowResultSet constructResultSet(long rowCount) {
|
||||
@ -173,6 +164,51 @@ public class ShowTableStatsStmt extends ShowStmt {
|
||||
return new ShowResultSet(getMetaData(), result);
|
||||
}
|
||||
|
||||
public ShowResultSet constructTableResultSet(TableStatsMeta tableStatistic) {
|
||||
DateTimeFormatter formatter = DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss");
|
||||
if (tableStatistic == null) {
|
||||
return new ShowResultSet(getMetaData(), new ArrayList<>());
|
||||
}
|
||||
List<List<String>> result = Lists.newArrayList();
|
||||
List<String> row = Lists.newArrayList();
|
||||
row.add(String.valueOf(tableStatistic.updatedRows));
|
||||
row.add(String.valueOf(tableStatistic.queriedTimes.get()));
|
||||
row.add(String.valueOf(tableStatistic.rowCount));
|
||||
LocalDateTime dateTime =
|
||||
LocalDateTime.ofInstant(Instant.ofEpochMilli(tableStatistic.updatedTime),
|
||||
java.time.ZoneId.systemDefault());
|
||||
String formattedDateTime = dateTime.format(formatter);
|
||||
row.add(formattedDateTime);
|
||||
row.add(tableStatistic.analyzeColumns().toString());
|
||||
row.add(tableStatistic.jobType.toString());
|
||||
row.add(String.valueOf(tableStatistic.newPartitionLoaded.get()));
|
||||
row.add(String.valueOf(tableStatistic.userInjected));
|
||||
result.add(row);
|
||||
return new ShowResultSet(getMetaData(), result);
|
||||
}
|
||||
|
||||
public ShowResultSet constructIndexResultSet(TableStatsMeta tableStatistic) {
|
||||
List<List<String>> result = Lists.newArrayList();
|
||||
if (!(table instanceof OlapTable)) {
|
||||
return new ShowResultSet(getMetaData(), result);
|
||||
}
|
||||
OlapTable olapTable = (OlapTable) table;
|
||||
Long indexId = olapTable.getIndexIdByName(indexName);
|
||||
if (indexId == null) {
|
||||
throw new RuntimeException(String.format("Index %s not exist.", indexName));
|
||||
}
|
||||
long rowCount = tableStatistic.getRowCount(olapTable.getIndexIdByName(indexName));
|
||||
if (rowCount == -1) {
|
||||
return new ShowResultSet(getMetaData(), result);
|
||||
}
|
||||
List<String> row = Lists.newArrayList();
|
||||
row.add(table.getName());
|
||||
row.add(indexName);
|
||||
row.add(String.valueOf(rowCount));
|
||||
result.add(row);
|
||||
return new ShowResultSet(getMetaData(), result);
|
||||
}
|
||||
|
||||
public boolean isCached() {
|
||||
return cached;
|
||||
}
|
||||
|
||||
@ -38,6 +38,8 @@ import java.text.ParseException;
|
||||
import java.util.List;
|
||||
import java.util.Set;
|
||||
import java.util.StringJoiner;
|
||||
import java.util.concurrent.ConcurrentHashMap;
|
||||
import java.util.concurrent.ConcurrentMap;
|
||||
|
||||
public class AnalysisInfo implements Writable {
|
||||
|
||||
@ -202,6 +204,8 @@ public class AnalysisInfo implements Writable {
|
||||
|
||||
public final boolean userInject;
|
||||
|
||||
public final ConcurrentMap<Long, Long> indexesRowCount = new ConcurrentHashMap<>();
|
||||
|
||||
public AnalysisInfo(long jobId, long taskId, List<Long> taskIds, long catalogId, long dbId, long tblId,
|
||||
List<Pair<String, String>> jobColumns, Set<String> partitionNames, String colName, Long indexId,
|
||||
JobType jobType, AnalysisMode analysisMode, AnalysisMethod analysisMethod, AnalysisType analysisType,
|
||||
@ -350,4 +354,8 @@ public class AnalysisInfo implements Writable {
|
||||
public TableIf getTable() {
|
||||
return StatisticsUtil.findTable(catalogId, dbId, tblId);
|
||||
}
|
||||
|
||||
public void addIndexRowCount(long indexId, long rowCount) {
|
||||
indexesRowCount.put(indexId, rowCount);
|
||||
}
|
||||
}
|
||||
|
||||
@ -21,6 +21,7 @@ import org.apache.doris.analysis.TableSample;
|
||||
import org.apache.doris.catalog.Column;
|
||||
import org.apache.doris.catalog.DatabaseIf;
|
||||
import org.apache.doris.catalog.Env;
|
||||
import org.apache.doris.catalog.OlapTable;
|
||||
import org.apache.doris.catalog.TableIf;
|
||||
import org.apache.doris.common.util.DebugUtil;
|
||||
import org.apache.doris.datasource.CatalogIf;
|
||||
@ -333,6 +334,14 @@ public abstract class BaseAnalysisTask {
|
||||
try (AutoCloseConnectContext a = StatisticsUtil.buildConnectContext()) {
|
||||
stmtExecutor = new StmtExecutor(a.connectContext, sql);
|
||||
ColStatsData colStatsData = new ColStatsData(stmtExecutor.executeInternalQuery().get(0));
|
||||
// Update index row count after analyze.
|
||||
if (this instanceof OlapAnalysisTask) {
|
||||
AnalysisInfo jobInfo = Env.getCurrentEnv().getAnalysisManager().findJobInfo(job.getJobInfo().jobId);
|
||||
// For sync job, get jobInfo from job.jobInfo.
|
||||
jobInfo = jobInfo == null ? job.jobInfo : jobInfo;
|
||||
long indexId = info.indexId == -1 ? ((OlapTable) tbl).getBaseIndexId() : info.indexId;
|
||||
jobInfo.addIndexRowCount(indexId, colStatsData.count);
|
||||
}
|
||||
Env.getCurrentEnv().getStatisticsCache().syncColStats(colStatsData);
|
||||
queryId = DebugUtil.printId(stmtExecutor.getContext().queryId());
|
||||
job.appendBuf(this, Collections.singletonList(colStatsData));
|
||||
|
||||
@ -21,6 +21,7 @@ import org.apache.doris.analysis.AlterColumnStatsStmt;
|
||||
import org.apache.doris.analysis.TableName;
|
||||
import org.apache.doris.catalog.Column;
|
||||
import org.apache.doris.catalog.Env;
|
||||
import org.apache.doris.catalog.OlapTable;
|
||||
import org.apache.doris.catalog.Partition;
|
||||
import org.apache.doris.common.AnalysisException;
|
||||
import org.apache.doris.common.Config;
|
||||
@ -278,12 +279,13 @@ public class StatisticsRepository {
|
||||
String max = alterColumnStatsStmt.getValue(StatsType.MAX_VALUE);
|
||||
String dataSize = alterColumnStatsStmt.getValue(StatsType.DATA_SIZE);
|
||||
long indexId = alterColumnStatsStmt.getIndexId();
|
||||
if (rowCount == null) {
|
||||
throw new RuntimeException("Row count is null.");
|
||||
}
|
||||
ColumnStatisticBuilder builder = new ColumnStatisticBuilder();
|
||||
String colName = alterColumnStatsStmt.getColumnName();
|
||||
Column column = objects.table.getColumn(colName);
|
||||
if (rowCount != null) {
|
||||
builder.setCount(Double.parseDouble(rowCount));
|
||||
}
|
||||
builder.setCount(Double.parseDouble(rowCount));
|
||||
if (ndv != null) {
|
||||
double dNdv = Double.parseDouble(ndv);
|
||||
builder.setNdv(dNdv);
|
||||
@ -338,9 +340,14 @@ public class StatisticsRepository {
|
||||
.setTblUpdateTime(System.currentTimeMillis())
|
||||
.setColName("")
|
||||
.setJobColumns(Lists.newArrayList())
|
||||
.setRowCount((long) Double.parseDouble(rowCount))
|
||||
.setUserInject(true)
|
||||
.setJobType(AnalysisInfo.JobType.MANUAL)
|
||||
.build();
|
||||
if (objects.table instanceof OlapTable) {
|
||||
indexId = indexId == -1 ? ((OlapTable) objects.table).getBaseIndexId() : indexId;
|
||||
mockedJobInfo.addIndexRowCount(indexId, (long) Double.parseDouble(rowCount));
|
||||
}
|
||||
Env.getCurrentEnv().getAnalysisManager().updateTableStatsForAlterStats(mockedJobInfo, objects.table);
|
||||
} else {
|
||||
// update partition granularity statistics
|
||||
|
||||
@ -24,6 +24,7 @@ import org.apache.doris.catalog.TableIf;
|
||||
import org.apache.doris.common.Pair;
|
||||
import org.apache.doris.common.io.Text;
|
||||
import org.apache.doris.common.io.Writable;
|
||||
import org.apache.doris.persist.gson.GsonPostProcessable;
|
||||
import org.apache.doris.persist.gson.GsonUtils;
|
||||
import org.apache.doris.statistics.AnalysisInfo.JobType;
|
||||
|
||||
@ -33,6 +34,8 @@ import com.google.gson.annotations.SerializedName;
|
||||
import java.io.DataInput;
|
||||
import java.io.DataOutput;
|
||||
import java.io.IOException;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
import java.util.Set;
|
||||
import java.util.concurrent.ConcurrentHashMap;
|
||||
import java.util.concurrent.ConcurrentMap;
|
||||
@ -40,7 +43,7 @@ import java.util.concurrent.atomic.AtomicBoolean;
|
||||
import java.util.concurrent.atomic.AtomicLong;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
public class TableStatsMeta implements Writable {
|
||||
public class TableStatsMeta implements Writable, GsonPostProcessable {
|
||||
|
||||
@SerializedName("tblId")
|
||||
public final long tblId;
|
||||
@ -77,6 +80,9 @@ public class TableStatsMeta implements Writable {
|
||||
@SerializedName("userInjected")
|
||||
public boolean userInjected;
|
||||
|
||||
@SerializedName("irc")
|
||||
public ConcurrentMap<Long, Long> indexesRowCount = new ConcurrentHashMap<>();
|
||||
|
||||
@VisibleForTesting
|
||||
public TableStatsMeta() {
|
||||
tblId = 0;
|
||||
@ -142,6 +148,8 @@ public class TableStatsMeta implements Writable {
|
||||
if (tableIf != null) {
|
||||
if (tableIf instanceof OlapTable) {
|
||||
rowCount = analyzedJob.rowCount;
|
||||
indexesRowCount.putAll(analyzedJob.indexesRowCount);
|
||||
clearStaleIndexRowCount((OlapTable) tableIf);
|
||||
}
|
||||
if (analyzedJob.emptyJob) {
|
||||
return;
|
||||
@ -166,4 +174,26 @@ public class TableStatsMeta implements Writable {
|
||||
public void convertDeprecatedColStatsToNewVersion() {
|
||||
deprecatedColNameToColStatsMeta = null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void gsonPostProcess() throws IOException {
|
||||
if (indexesRowCount == null) {
|
||||
indexesRowCount = new ConcurrentHashMap<>();
|
||||
}
|
||||
}
|
||||
|
||||
public long getRowCount(long indexId) {
|
||||
return indexesRowCount.getOrDefault(indexId, -1L);
|
||||
}
|
||||
|
||||
private void clearStaleIndexRowCount(OlapTable table) {
|
||||
Iterator<Long> iterator = indexesRowCount.keySet().iterator();
|
||||
List<Long> indexIds = table.getIndexIds();
|
||||
while (iterator.hasNext()) {
|
||||
long key = iterator.next();
|
||||
if (indexIds.contains(key)) {
|
||||
iterator.remove();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user