From ffad945dd1e4c1a3581ac60207e8f5500e94da8e Mon Sep 17 00:00:00 2001 From: AKIRA <33112463+Kikyou1997@users.noreply.github.com> Date: Sat, 7 Oct 2023 12:31:45 +0900 Subject: [PATCH] [opt](optimizer) Recycle expired table stats #24777 Remove table stats when olap table is dropped --- .../apache/doris/datasource/CatalogIf.java | 4 +- .../apache/doris/datasource/CatalogMgr.java | 3 +- .../doris/datasource/ExternalCatalog.java | 3 +- .../doris/datasource/InternalCatalog.java | 3 +- .../apache/doris/journal/JournalEntity.java | 6 +++ .../org/apache/doris/persist/EditLog.java | 7 +++ .../apache/doris/persist/OperationType.java | 2 + .../doris/persist/TableStatsDeletionLog.java | 47 +++++++++++++++++++ .../apache/doris/statistics/AnalysisInfo.java | 2 + .../doris/statistics/AnalysisManager.java | 19 ++++++-- .../suites/statistics/analyze_stats.groovy | 4 +- 11 files changed, 89 insertions(+), 11 deletions(-) create mode 100644 fe/fe-core/src/main/java/org/apache/doris/persist/TableStatsDeletionLog.java diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/CatalogIf.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/CatalogIf.java index 22d7cf2b15..63718419ae 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/datasource/CatalogIf.java +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/CatalogIf.java @@ -19,6 +19,7 @@ package org.apache.doris.datasource; import org.apache.doris.catalog.DatabaseIf; import org.apache.doris.catalog.Env; +import org.apache.doris.catalog.TableIf; import org.apache.doris.common.AnalysisException; import org.apache.doris.common.DdlException; import org.apache.doris.common.ErrorCode; @@ -168,8 +169,7 @@ public interface CatalogIf { } // Return a copy of all db collection. - @SuppressWarnings({"rawtypes", "unchecked"}) - public Collection getAllDbs(); + public Collection> getAllDbs(); public boolean enableAutoAnalyze(); diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/CatalogMgr.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/CatalogMgr.java index 253cfa2f69..f6777b809f 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/datasource/CatalogMgr.java +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/CatalogMgr.java @@ -289,7 +289,7 @@ public class CatalogMgr implements Writable, GsonPostProcessable { LOG.warn("Non catalog {} is found.", stmt.getCatalogName()); return; } - CatalogIf catalog = nameToCatalog.get(stmt.getCatalogName()); + CatalogIf> catalog = nameToCatalog.get(stmt.getCatalogName()); if (catalog == null) { throw new DdlException("No catalog found with name: " + stmt.getCatalogName()); } @@ -299,7 +299,6 @@ public class CatalogMgr implements Writable, GsonPostProcessable { lastDBOfCatalog.remove(stmt.getCatalogName()); Env.getCurrentEnv().getQueryStats().clear(catalog.getId()); - } finally { writeUnlock(); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/ExternalCatalog.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/ExternalCatalog.java index 986c63aa96..16d4996861 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/datasource/ExternalCatalog.java +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/ExternalCatalog.java @@ -21,6 +21,7 @@ import org.apache.doris.catalog.Column; import org.apache.doris.catalog.DatabaseIf; import org.apache.doris.catalog.Env; import org.apache.doris.catalog.Resource; +import org.apache.doris.catalog.TableIf; import org.apache.doris.catalog.external.DeltaLakeExternalDataBase; import org.apache.doris.catalog.external.EsExternalDatabase; import org.apache.doris.catalog.external.ExternalDatabase; @@ -600,7 +601,7 @@ public abstract class ExternalCatalog } @Override - public Collection getAllDbs() { + public Collection> getAllDbs() { makeSureInitialized(); return new HashSet<>(idToDb.values()); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/InternalCatalog.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/InternalCatalog.java index 8c710adc80..896572dfad 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/datasource/InternalCatalog.java +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/InternalCatalog.java @@ -929,6 +929,7 @@ public class InternalCatalog implements CatalogIf { Env.getCurrentEnv().getEditLog().logDropTable(info); Env.getCurrentEnv().getQueryStats().clear(Env.getCurrentEnv().getCurrentCatalog().getId(), db.getId(), table.getId()); + Env.getCurrentEnv().getAnalysisManager().removeTableStats(table.getId()); } finally { db.writeUnlock(); } @@ -3197,7 +3198,7 @@ public class InternalCatalog implements CatalogIf { } @Override - public Collection getAllDbs() { + public Collection> getAllDbs() { return new HashSet<>(idToDb.values()); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/journal/JournalEntity.java b/fe/fe-core/src/main/java/org/apache/doris/journal/JournalEntity.java index 80ee1a19aa..c1fd20ceeb 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/journal/JournalEntity.java +++ b/fe/fe-core/src/main/java/org/apache/doris/journal/JournalEntity.java @@ -116,6 +116,7 @@ import org.apache.doris.persist.TableAddOrDropInvertedIndicesInfo; import org.apache.doris.persist.TableInfo; import org.apache.doris.persist.TablePropertyInfo; import org.apache.doris.persist.TableRenameColumnInfo; +import org.apache.doris.persist.TableStatsDeletionLog; import org.apache.doris.persist.TruncateTableInfo; import org.apache.doris.plugin.PluginInfo; import org.apache.doris.policy.DropPolicyLog; @@ -897,6 +898,11 @@ public class JournalEntity implements Writable { isRead = true; break; } + case OperationType.OP_DELETE_TABLE_STATS: { + data = TableStatsDeletionLog.read(in); + isRead = true; + break; + } default: { IOException e = new IOException(); LOG.error("UNKNOWN Operation Type {}", opCode, e); diff --git a/fe/fe-core/src/main/java/org/apache/doris/persist/EditLog.java b/fe/fe-core/src/main/java/org/apache/doris/persist/EditLog.java index 6e714eb3bf..ba38d5ac4c 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/persist/EditLog.java +++ b/fe/fe-core/src/main/java/org/apache/doris/persist/EditLog.java @@ -1131,6 +1131,10 @@ public class EditLog { env.getAnalysisManager().replayPersistSysJob((AnalysisInfo) journal.getData()); break; } + case OperationType.OP_DELETE_TABLE_STATS: { + env.getAnalysisManager().replayTableStatsDeletion((TableStatsDeletionLog) journal.getData()); + break; + } default: { IOException e = new IOException(); LOG.error("UNKNOWN Operation Type {}", opCode, e); @@ -1985,4 +1989,7 @@ public class EditLog { logEdit(OperationType.OP_PERSIST_AUTO_JOB, analysisInfo); } + public void logDeleteTableStats(TableStatsDeletionLog log) { + logEdit(OperationType.OP_DELETE_TABLE_STATS, log); + } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/persist/OperationType.java b/fe/fe-core/src/main/java/org/apache/doris/persist/OperationType.java index c5e784cb9d..c2d82bdabe 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/persist/OperationType.java +++ b/fe/fe-core/src/main/java/org/apache/doris/persist/OperationType.java @@ -339,6 +339,8 @@ public class OperationType { public static final short OP_PERSIST_AUTO_JOB = 456; + public static final short OP_DELETE_TABLE_STATS = 457; + /** * Get opcode name by op code. **/ diff --git a/fe/fe-core/src/main/java/org/apache/doris/persist/TableStatsDeletionLog.java b/fe/fe-core/src/main/java/org/apache/doris/persist/TableStatsDeletionLog.java new file mode 100644 index 0000000000..4016ff0139 --- /dev/null +++ b/fe/fe-core/src/main/java/org/apache/doris/persist/TableStatsDeletionLog.java @@ -0,0 +1,47 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.persist; + +import org.apache.doris.common.io.Text; +import org.apache.doris.common.io.Writable; +import org.apache.doris.persist.gson.GsonUtils; + +import com.google.gson.annotations.SerializedName; + +import java.io.DataInput; +import java.io.DataOutput; +import java.io.IOException; + +public class TableStatsDeletionLog implements Writable { + + @SerializedName("id") + public final long id; + + public TableStatsDeletionLog(long id) { + this.id = id; + } + + @Override + public void write(DataOutput out) throws IOException { + Text.writeString(out, GsonUtils.GSON.toJson(this)); + } + + public static TableStatsDeletionLog read(DataInput dataInput) throws IOException { + return GsonUtils.GSON.fromJson(Text.readString(dataInput), TableStatsDeletionLog.class); + } +} diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisInfo.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisInfo.java index 5f3b0fcacf..e3d5c8a91b 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisInfo.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisInfo.java @@ -73,6 +73,8 @@ public class AnalysisInfo implements Writable { } public enum ScheduleType { + // Job created by AutoCollector is also `ONCE` type, this is because it runs once only and should be removed + // when its information is expired ONCE, PERIOD, AUTOMATIC diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java index a45a8715d7..884cb49730 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java @@ -46,6 +46,7 @@ import org.apache.doris.common.util.Daemon; import org.apache.doris.common.util.Util; import org.apache.doris.mysql.privilege.PrivPredicate; import org.apache.doris.persist.AnalyzeDeletionLog; +import org.apache.doris.persist.TableStatsDeletionLog; import org.apache.doris.persist.gson.GsonUtils; import org.apache.doris.qe.ConnectContext; import org.apache.doris.qe.ShowResultSet; @@ -244,7 +245,7 @@ public class AnalysisManager extends Daemon implements Writable { } private void clear() { - clearMeta(analysisJobInfoMap, (a) -> + clearExpiredAnalysisInfo(analysisJobInfoMap, (a) -> a.scheduleType.equals(ScheduleType.ONCE) && System.currentTimeMillis() - a.lastExecTimeInMs > TimeUnit.DAYS.toMillis(StatisticConstants.ANALYSIS_JOB_INFO_EXPIRATION_TIME_IN_DAYS), @@ -252,7 +253,7 @@ public class AnalysisManager extends Daemon implements Writable { Env.getCurrentEnv().getEditLog().logDeleteAnalysisJob(new AnalyzeDeletionLog(id)); return null; }); - clearMeta(analysisTaskInfoMap, (a) -> System.currentTimeMillis() - a.lastExecTimeInMs + clearExpiredAnalysisInfo(analysisTaskInfoMap, (a) -> System.currentTimeMillis() - a.lastExecTimeInMs > TimeUnit.DAYS.toMillis(StatisticConstants.ANALYSIS_JOB_INFO_EXPIRATION_TIME_IN_DAYS), (id) -> { Env.getCurrentEnv().getEditLog().logDeleteAnalysisTask(new AnalyzeDeletionLog(id)); @@ -260,7 +261,7 @@ public class AnalysisManager extends Daemon implements Writable { }); } - private void clearMeta(Map infoMap, Predicate isExpired, + private void clearExpiredAnalysisInfo(Map infoMap, Predicate isExpired, Function writeLog) { synchronized (infoMap) { List expired = new ArrayList<>(); @@ -1076,4 +1077,16 @@ public class AnalysisManager extends Daemon implements Writable { } } + public void removeTableStats(long tblId) { + if (!idToTblStats.containsKey(tblId)) { + return; + } + TableStatsDeletionLog log = new TableStatsDeletionLog(tblId); + Env.getCurrentEnv().getEditLog().logDeleteTableStats(log); + replayTableStatsDeletion(log); + } + + public void replayTableStatsDeletion(TableStatsDeletionLog log) { + idToTblStats.remove(log.id); + } } diff --git a/regression-test/suites/statistics/analyze_stats.groovy b/regression-test/suites/statistics/analyze_stats.groovy index 4ee9cc2b2f..7ad939dcb2 100644 --- a/regression-test/suites/statistics/analyze_stats.groovy +++ b/regression-test/suites/statistics/analyze_stats.groovy @@ -184,14 +184,14 @@ suite("test_analyze") { assert contains_expected_table(show_result) sql """ - DROP ANALYZE JOB ${a_result_3[0][4]} + DROP ANALYZE JOB ${a_result_3[0][0]} """ show_result = sql """ SHOW ANALYZE """ - assert stats_job_removed(show_result, a_result_3[0][4]) + assert stats_job_removed(show_result, a_result_3[0][0]) sql """ ANALYZE DATABASE ${db} WITH SAMPLE ROWS 5 WITH PERIOD 100000