From 66d3371400207f568c7ff6ff6bf5f4f0da32bd2c Mon Sep 17 00:00:00 2001 From: AKIRA <33112463+Kikyou1997@users.noreply.github.com> Date: Thu, 7 Sep 2023 19:02:44 +0900 Subject: [PATCH] [opt](stats) remove table stats when table has been removed (#23803) --- .../apache/doris/datasource/CatalogIf.java | 4 +- .../apache/doris/datasource/CatalogMgr.java | 4 +- .../doris/datasource/ExternalCatalog.java | 3 +- .../doris/datasource/InternalCatalog.java | 3 +- .../org/apache/doris/persist/EditLog.java | 8 ++++ .../apache/doris/persist/OperationType.java | 2 + .../doris/persist/TableStatsDeletionLog.java | 42 +++++++++++++++++++ .../apache/doris/statistics/AnalysisInfo.java | 2 + .../doris/statistics/AnalysisManager.java | 40 ++++++++++++++++-- .../suites/statistics/analyze_stats.groovy | 4 ++ 10 files changed, 103 insertions(+), 9 deletions(-) create mode 100644 fe/fe-core/src/main/java/org/apache/doris/persist/TableStatsDeletionLog.java diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/CatalogIf.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/CatalogIf.java index d135018e75..17874ba0f8 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/datasource/CatalogIf.java +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/CatalogIf.java @@ -20,6 +20,7 @@ package org.apache.doris.datasource; import org.apache.doris.catalog.DatabaseIf; import org.apache.doris.catalog.Env; +import org.apache.doris.catalog.TableIf; import org.apache.doris.common.AnalysisException; import org.apache.doris.common.DdlException; import org.apache.doris.common.ErrorCode; @@ -168,8 +169,7 @@ public interface CatalogIf { } // Return a copy of all db collection. - @SuppressWarnings({"rawtypes", "unchecked"}) - public Collection getAllDbs(); + public Collection> getAllDbs(); public boolean enableAutoAnalyze(); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/CatalogMgr.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/CatalogMgr.java index 253cfa2f69..f8725a2d2a 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/datasource/CatalogMgr.java +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/CatalogMgr.java @@ -289,7 +289,7 @@ public class CatalogMgr implements Writable, GsonPostProcessable { LOG.warn("Non catalog {} is found.", stmt.getCatalogName()); return; } - CatalogIf catalog = nameToCatalog.get(stmt.getCatalogName()); + CatalogIf> catalog = nameToCatalog.get(stmt.getCatalogName()); if (catalog == null) { throw new DdlException("No catalog found with name: " + stmt.getCatalogName()); } @@ -299,7 +299,7 @@ public class CatalogMgr implements Writable, GsonPostProcessable { lastDBOfCatalog.remove(stmt.getCatalogName()); Env.getCurrentEnv().getQueryStats().clear(catalog.getId()); - + Env.getCurrentEnv().getAnalysisManager().removeExternalTableStats(catalog); } finally { writeUnlock(); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/ExternalCatalog.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/ExternalCatalog.java index 35d03dfabc..55a0e9a1fd 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/datasource/ExternalCatalog.java +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/ExternalCatalog.java @@ -21,6 +21,7 @@ import org.apache.doris.catalog.Column; import org.apache.doris.catalog.DatabaseIf; import org.apache.doris.catalog.Env; import org.apache.doris.catalog.Resource; +import org.apache.doris.catalog.TableIf; import org.apache.doris.catalog.external.DeltaLakeExternalDataBase; import org.apache.doris.catalog.external.EsExternalDatabase; import org.apache.doris.catalog.external.ExternalDatabase; @@ -588,7 +589,7 @@ public abstract class ExternalCatalog } @Override - public Collection getAllDbs() { + public Collection> getAllDbs() { makeSureInitialized(); return new HashSet<>(idToDb.values()); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/InternalCatalog.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/InternalCatalog.java index 4114386680..e6f4f65e02 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/datasource/InternalCatalog.java +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/InternalCatalog.java @@ -919,6 +919,7 @@ public class InternalCatalog implements CatalogIf { Env.getCurrentEnv().getEditLog().logDropTable(info); Env.getCurrentEnv().getQueryStats().clear(Env.getCurrentEnv().getCurrentCatalog().getId(), db.getId(), table.getId()); + Env.getCurrentEnv().getAnalysisManager().removeTableStats(table.getId()); } finally { db.writeUnlock(); } @@ -3138,7 +3139,7 @@ public class InternalCatalog implements CatalogIf { } @Override - public Collection getAllDbs() { + public Collection> getAllDbs() { return new HashSet<>(idToDb.values()); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/persist/EditLog.java b/fe/fe-core/src/main/java/org/apache/doris/persist/EditLog.java index 8cdf5eb978..8d5022a74e 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/persist/EditLog.java +++ b/fe/fe-core/src/main/java/org/apache/doris/persist/EditLog.java @@ -1107,6 +1107,10 @@ public class EditLog { env.getAnalysisManager().replayUpdateTableStatsStatus((TableStats) journal.getData()); break; } + case OperationType.OP_DELETE_TABLE_STATS: { + env.getAnalysisManager().replayTableStatsDeletion((TableStatsDeletionLog) journal.getData()); + break; + } default: { IOException e = new IOException(); LOG.error("UNKNOWN Operation Type {}", opCode, e); @@ -1944,4 +1948,8 @@ public class EditLog { public void logCreateTableStats(TableStats tableStats) { logEdit(OperationType.OP_UPDATE_TABLE_STATS, tableStats); } + + public void logDeleteTableStats(TableStatsDeletionLog log) { + logEdit(OperationType.OP_DELETE_ANALYSIS_JOB, log); + } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/persist/OperationType.java b/fe/fe-core/src/main/java/org/apache/doris/persist/OperationType.java index ccfa283177..3d1052b36c 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/persist/OperationType.java +++ b/fe/fe-core/src/main/java/org/apache/doris/persist/OperationType.java @@ -333,6 +333,8 @@ public class OperationType { public static final short OP_UPDATE_TABLE_STATS = 455; + public static final short OP_DELETE_TABLE_STATS = 456; + /** * Get opcode name by op code. **/ diff --git a/fe/fe-core/src/main/java/org/apache/doris/persist/TableStatsDeletionLog.java b/fe/fe-core/src/main/java/org/apache/doris/persist/TableStatsDeletionLog.java new file mode 100644 index 0000000000..d111ba0c78 --- /dev/null +++ b/fe/fe-core/src/main/java/org/apache/doris/persist/TableStatsDeletionLog.java @@ -0,0 +1,42 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.persist; + +import org.apache.doris.common.io.Writable; + +import java.io.DataInput; +import java.io.DataOutput; +import java.io.IOException; + +public class TableStatsDeletionLog implements Writable { + + public final long id; + + public TableStatsDeletionLog(long id) { + this.id = id; + } + + @Override + public void write(DataOutput out) throws IOException { + out.writeLong(id); + } + + public static TableStatsDeletionLog read(DataInput dataInput) throws IOException { + return new TableStatsDeletionLog(dataInput.readLong()); + } +} diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisInfo.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisInfo.java index c20bad6396..8c420ffce6 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisInfo.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisInfo.java @@ -71,6 +71,8 @@ public class AnalysisInfo implements Writable { } public enum ScheduleType { + // Job created by AutoCollector is also `ONCE` type, this is because it runs once only and should be removed + // when its information is expired ONCE, PERIOD, AUTOMATIC diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java index fd2d844ce1..0a4704a509 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java @@ -43,8 +43,10 @@ import org.apache.doris.common.ThreadPoolManager.BlockedPolicy; import org.apache.doris.common.io.Writable; import org.apache.doris.common.util.Daemon; import org.apache.doris.common.util.Util; +import org.apache.doris.datasource.CatalogIf; import org.apache.doris.mysql.privilege.PrivPredicate; import org.apache.doris.persist.AnalyzeDeletionLog; +import org.apache.doris.persist.TableStatsDeletionLog; import org.apache.doris.qe.ConnectContext; import org.apache.doris.qe.ShowResultSet; import org.apache.doris.qe.ShowResultSetMetaData; @@ -220,7 +222,7 @@ public class AnalysisManager extends Daemon implements Writable { } private void clear() { - clearMeta(analysisJobInfoMap, (a) -> + clearExpiredAnalysisInfo(analysisJobInfoMap, (a) -> a.scheduleType.equals(ScheduleType.ONCE) && System.currentTimeMillis() - a.lastExecTimeInMs > TimeUnit.DAYS.toMillis(StatisticConstants.ANALYSIS_JOB_INFO_EXPIRATION_TIME_IN_DAYS), @@ -228,7 +230,7 @@ public class AnalysisManager extends Daemon implements Writable { Env.getCurrentEnv().getEditLog().logDeleteAnalysisJob(new AnalyzeDeletionLog(id)); return null; }); - clearMeta(analysisTaskInfoMap, (a) -> System.currentTimeMillis() - a.lastExecTimeInMs + clearExpiredAnalysisInfo(analysisTaskInfoMap, (a) -> System.currentTimeMillis() - a.lastExecTimeInMs > TimeUnit.DAYS.toMillis(StatisticConstants.ANALYSIS_JOB_INFO_EXPIRATION_TIME_IN_DAYS), (id) -> { Env.getCurrentEnv().getEditLog().logDeleteAnalysisTask(new AnalyzeDeletionLog(id)); @@ -236,7 +238,7 @@ public class AnalysisManager extends Daemon implements Writable { }); } - private void clearMeta(Map infoMap, Predicate isExpired, + private void clearExpiredAnalysisInfo(Map infoMap, Predicate isExpired, Function writeLog) { synchronized (infoMap) { List expired = new ArrayList<>(); @@ -968,4 +970,36 @@ public class AnalysisManager extends Daemon implements Writable { .collect(Collectors.toSet()); } + public void removeExternalTableStats(CatalogIf> catalogIf) { + if (FeConstants.runningUnitTest) { + return; + } + Set tblSet = catalogIf.getAllDbs().stream() + .map(DatabaseIf::getTables) + .flatMap(Collection::stream) + .map(t -> ((TableIf) t).getId()) + .collect(Collectors.toSet()); + List expiredTblIds = new ArrayList<>(); + for (Map.Entry entry : idToTblStatsStatus.entrySet()) { + if (tblSet.contains(entry.getKey())) { + expiredTblIds.add(entry.getKey()); + } + } + for (Long tblId : expiredTblIds) { + removeTableStats(tblId); + } + } + + public void removeTableStats(long tblId) { + if (!idToTblStatsStatus.containsKey(tblId)) { + return; + } + TableStatsDeletionLog log = new TableStatsDeletionLog(tblId); + Env.getCurrentEnv().getEditLog().logDeleteTableStats(log); + replayTableStatsDeletion(log); + } + + public void replayTableStatsDeletion(TableStatsDeletionLog log) { + idToTblStatsStatus.remove(log.id); + } } diff --git a/regression-test/suites/statistics/analyze_stats.groovy b/regression-test/suites/statistics/analyze_stats.groovy index 3220a34ee5..f985e02337 100644 --- a/regression-test/suites/statistics/analyze_stats.groovy +++ b/regression-test/suites/statistics/analyze_stats.groovy @@ -912,4 +912,8 @@ PARTITION `p599` VALUES IN (599) SHOW COLUMN CACHED STATS increment_analyze_test(id) """ expected_id_col_stats(inc_res, 6, 1) + + sql """ + DROP TABLE regression_test_statistics.increment_analyze_test; + """ } \ No newline at end of file