From ba893a4e604d3696af6b03582cce6aba806e3714 Mon Sep 17 00:00:00 2001 From: yujun Date: Sun, 3 Dec 2023 23:34:21 +0800 Subject: [PATCH] [log](table) add table lock failed log (#27659) --- .../java/org/apache/doris/common/Config.java | 4 ++ .../java/org/apache/doris/catalog/Table.java | 45 ++++++++++++++++--- 2 files changed, 44 insertions(+), 5 deletions(-) diff --git a/fe/fe-common/src/main/java/org/apache/doris/common/Config.java b/fe/fe-common/src/main/java/org/apache/doris/common/Config.java index d1f47bfe6f..231d652559 100644 --- a/fe/fe-common/src/main/java/org/apache/doris/common/Config.java +++ b/fe/fe-common/src/main/java/org/apache/doris/common/Config.java @@ -126,6 +126,10 @@ public class Config extends ConfigBase { @ConfField(description = {"是否压缩 FE 的 Audit 日志", "enable compression for FE audit log file"}) public static boolean audit_log_enable_compress = false; + @ConfField(mutable = false, masterOnly = false, + description = {"是否检查table锁泄漏", "Whether to check table lock leaky"}) + public static boolean check_table_lock_leaky = false; + @ConfField(description = {"插件的安装目录", "The installation directory of the plugin"}) public static String plugin_dir = System.getenv("DORIS_HOME") + "/plugins"; diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/Table.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/Table.java index a1dd3b915e..bb9a9cd28e 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/Table.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/Table.java @@ -20,6 +20,7 @@ package org.apache.doris.catalog; import org.apache.doris.alter.AlterCancelException; import org.apache.doris.analysis.CreateTableStmt; import org.apache.doris.catalog.constraint.Constraint; +import org.apache.doris.common.Config; import org.apache.doris.common.DdlException; import org.apache.doris.common.MetaNotFoundException; import org.apache.doris.common.io.Text; @@ -120,11 +121,17 @@ public abstract class Table extends MetaObject implements Writable, TableIf { @SerializedName(value = "constraints") private HashMap constraintsMap = new HashMap<>(); + // check read lock leaky + private Map readLockThreads = null; + public Table(TableType type) { this.type = type; this.fullSchema = Lists.newArrayList(); this.nameToColumn = Maps.newTreeMap(String.CASE_INSENSITIVE_ORDER); this.rwLock = new QueryableReentrantReadWriteLock(true); + if (Config.check_table_lock_leaky) { + this.readLockThreads = Maps.newConcurrentMap(); + } } public Table(long id, String tableName, TableType type, List fullSchema) { @@ -146,6 +153,9 @@ public abstract class Table extends MetaObject implements Writable, TableIf { } this.rwLock = new QueryableReentrantReadWriteLock(true); this.createTime = Instant.now().getEpochSecond(); + if (Config.check_table_lock_leaky) { + this.readLockThreads = Maps.newConcurrentMap(); + } } public void markDropped() { @@ -158,14 +168,27 @@ public abstract class Table extends MetaObject implements Writable, TableIf { public void readLock() { this.rwLock.readLock().lock(); + if (this.readLockThreads != null && this.rwLock.getReadHoldCount() == 1) { + Thread thread = Thread.currentThread(); + this.readLockThreads.put(thread.getId(), + "(" + thread.toString() + ", time " + System.currentTimeMillis() + ")"); + } } public boolean tryReadLock(long timeout, TimeUnit unit) { try { boolean res = this.rwLock.readLock().tryLock(timeout, unit); - if (!res && unit.toSeconds(timeout) >= 1) { - LOG.warn("Failed to try table {}'s read lock. timeout {} {}. Current owner: {}", - name, timeout, unit.name(), rwLock.getOwner()); + if (res) { + if (this.readLockThreads != null && this.rwLock.getReadHoldCount() == 1) { + Thread thread = Thread.currentThread(); + this.readLockThreads.put(thread.getId(), + "(" + thread.toString() + ", time " + System.currentTimeMillis() + ")"); + } + } else { + if (unit.toSeconds(timeout) >= 1) { + LOG.warn("Failed to try table {}'s read lock. timeout {} {}. Current owner: {}", + name, timeout, unit.name(), rwLock.getOwner()); + } } return res; } catch (InterruptedException e) { @@ -176,6 +199,9 @@ public abstract class Table extends MetaObject implements Writable, TableIf { public void readUnlock() { this.rwLock.readLock().unlock(); + if (this.readLockThreads != null && this.rwLock.getReadHoldCount() == 0) { + this.readLockThreads.remove(Thread.currentThread().getId()); + } } public void writeLock() { @@ -191,12 +217,21 @@ public abstract class Table extends MetaObject implements Writable, TableIf { return true; } + // TabletStatMgr will invoke all olap tables' tryWriteLock every one minute, + // we can set Config.check_table_lock_leaky = true + // and check log to find out whether if the table has lock leaky. public boolean tryWriteLock(long timeout, TimeUnit unit) { try { boolean res = this.rwLock.writeLock().tryLock(timeout, unit); if (!res && unit.toSeconds(timeout) >= 1) { - LOG.warn("Failed to try table {}'s write lock. timeout {} {}. Current owner: {}", - name, timeout, unit.name(), rwLock.getOwner()); + if (readLockThreads == null) { + LOG.warn("Failed to try table {}'s write lock. timeout {} {}. Current owner: {}", + name, timeout, unit.name(), rwLock.getOwner()); + } else { + LOG.warn("Failed to try table {}'s write lock. timeout {} {}. Current owner: {}, " + + "current reader: {}", + name, timeout, unit.name(), rwLock.getOwner(), readLockThreads); + } } return res; } catch (InterruptedException e) {