[feature](diagnose) support "ADMIN DIAGNOSE TABLET" stmt (#8839)
`ADMIN DIAGNOSE TABLET tablet_id`
This statement makes it easier to quickly diagnose the status of a tablet.
See "ADMIN-DIAGNOSE-TABLET.md" for details
```
mysql> admin diagnose tablet 10196;
+----------------------------------+------------------------------+------------+
| Item | Info | Suggestion |
+----------------------------------+------------------------------+------------+
| TabletExist | Yes | |
| TabletId | 10196 | |
| Database | default_cluster:db1: 10192 | |
| Table | tbl1: 10194 | |
| Partition | tbl1: 10193 | |
| MaterializedIndex | tbl1: 10195 | |
| Replicas(ReplicaId -> BackendId) | {"10197":10002} | |
| ReplicasNum | OK | |
| ReplicaBackendStatus | Backend 10002 is not alive. | |
| ReplicaVersionStatus | OK | |
| ReplicaStatus | OK | |
| ReplicaCompactionStatus | OK | |
+----------------------------------+------------------------------+------------+
```
This commit is contained in:
@ -241,7 +241,7 @@ terminal String KW_ADD, KW_ADMIN, KW_AFTER, KW_AGGREGATE, KW_ALIAS, KW_ALL, KW_A
|
||||
KW_COLLATE, KW_COLLATION, KW_COLUMN, KW_COLUMNS, KW_COMMENT, KW_COMMIT, KW_COMMITTED, KW_COMPACT,
|
||||
KW_CONFIG, KW_CONNECTION, KW_CONNECTION_ID, KW_CONSISTENT, KW_CONVERT, KW_COUNT, KW_CREATE, KW_CREATION, KW_CROSS, KW_CUBE, KW_CURRENT, KW_CURRENT_USER,
|
||||
KW_DATA, KW_DATABASE, KW_DATABASES, KW_DATE, KW_DATETIME, KW_DAY, KW_DECIMAL, KW_DECOMMISSION, KW_DEFAULT, KW_DESC, KW_DESCRIBE,
|
||||
KW_DELETE, KW_UPDATE, KW_DISK, KW_DISTINCT, KW_DISTINCTPC, KW_DISTINCTPCSA, KW_DISTRIBUTED, KW_DISTRIBUTION, KW_DYNAMIC, KW_BUCKETS, KW_DIV, KW_DOUBLE, KW_DROP, KW_DROPP, KW_DUPLICATE,
|
||||
KW_DELETE, KW_UPDATE, KW_DIAGNOSE, KW_DISK, KW_DISTINCT, KW_DISTINCTPC, KW_DISTINCTPCSA, KW_DISTRIBUTED, KW_DISTRIBUTION, KW_DYNAMIC, KW_BUCKETS, KW_DIV, KW_DOUBLE, KW_DROP, KW_DROPP, KW_DUPLICATE,
|
||||
KW_ELSE, KW_ENABLE, KW_ENCRYPTKEY, KW_ENCRYPTKEYS, KW_END, KW_ENGINE, KW_ENGINES, KW_ENTER, KW_ERRORS, KW_EVENTS, KW_EXCEPT, KW_EXCLUDE,
|
||||
KW_EXISTS, KW_EXPORT, KW_EXTENDED, KW_EXTERNAL, KW_EXTRACT,
|
||||
KW_FALSE, KW_FEATURE, KW_FOLLOWER, KW_FOLLOWING, KW_FREE, KW_FROM, KW_FIELDS, KW_FILE, KW_FILTER, KW_FIRST, KW_FLOAT, KW_FOR, KW_FORCE, KW_FORMAT, KW_FRONTEND, KW_FRONTENDS, KW_FULL, KW_FUNCTION, KW_FUNCTIONS,
|
||||
@ -5321,6 +5321,10 @@ admin_stmt ::=
|
||||
{:
|
||||
RESULT = new AdminCleanTrashStmt(null);
|
||||
:}
|
||||
| KW_ADMIN KW_DIAGNOSE KW_TABLET INTEGER_LITERAL:tabletId
|
||||
{:
|
||||
RESULT = new AdminDiagnoseTabletStmt(tabletId);
|
||||
:}
|
||||
;
|
||||
|
||||
truncate_stmt ::=
|
||||
@ -5500,6 +5504,8 @@ keyword ::=
|
||||
{: RESULT = id; :}
|
||||
| KW_DECIMAL:id
|
||||
{: RESULT = id; :}
|
||||
| KW_DIAGNOSE:id
|
||||
{: RESULT = id; :}
|
||||
| KW_DISTINCTPC:id
|
||||
{: RESULT = id; :}
|
||||
| KW_DISTINCTPCSA:id
|
||||
|
||||
@ -0,0 +1,76 @@
|
||||
// Licensed to the Apache Software Foundation (ASF) under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing,
|
||||
// software distributed under the License is distributed on an
|
||||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
package org.apache.doris.analysis;
|
||||
|
||||
import org.apache.doris.catalog.Catalog;
|
||||
import org.apache.doris.catalog.Column;
|
||||
import org.apache.doris.catalog.ScalarType;
|
||||
import org.apache.doris.common.ErrorCode;
|
||||
import org.apache.doris.common.ErrorReport;
|
||||
import org.apache.doris.common.UserException;
|
||||
import org.apache.doris.mysql.privilege.PrivPredicate;
|
||||
import org.apache.doris.qe.ConnectContext;
|
||||
import org.apache.doris.qe.ShowResultSetMetaData;
|
||||
|
||||
import com.google.common.collect.ImmutableList;
|
||||
|
||||
// ADMIN DIAGNOSE TABLET tablet_id
|
||||
public class AdminDiagnoseTabletStmt extends ShowStmt {
|
||||
public static final ImmutableList<String> TITLE_NAMES = new ImmutableList.Builder<String>()
|
||||
.add("Item").add("Info").add("Suggestion")
|
||||
.build();
|
||||
|
||||
private long tabletId;
|
||||
|
||||
public AdminDiagnoseTabletStmt(long tabletId) {
|
||||
this.tabletId = tabletId;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void analyze(Analyzer analyzer) throws UserException {
|
||||
super.analyze(analyzer);
|
||||
|
||||
// check auth
|
||||
if (!Catalog.getCurrentCatalog().getAuth().checkGlobalPriv(ConnectContext.get(), PrivPredicate.ADMIN)) {
|
||||
ErrorReport.reportAnalysisException(ErrorCode.ERR_SPECIFIC_ACCESS_DENIED_ERROR, "ADMIN");
|
||||
}
|
||||
}
|
||||
|
||||
public long getTabletId() {
|
||||
return tabletId;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toSql() {
|
||||
return "ADMIN DIAGNOSE TABLET " + tabletId;
|
||||
}
|
||||
|
||||
@Override
|
||||
public ShowResultSetMetaData getMetaData() {
|
||||
ShowResultSetMetaData.Builder builder = ShowResultSetMetaData.builder();
|
||||
for (String title : TITLE_NAMES) {
|
||||
builder.addColumn(new Column(title, ScalarType.createVarchar(1024)));
|
||||
}
|
||||
return builder.build();
|
||||
}
|
||||
|
||||
@Override
|
||||
public RedirectStatus getRedirectStatus() {
|
||||
return RedirectStatus.FORWARD_NO_SYNC;
|
||||
}
|
||||
}
|
||||
@ -17,6 +17,7 @@
|
||||
|
||||
package org.apache.doris.qe;
|
||||
|
||||
import org.apache.doris.analysis.AdminDiagnoseTabletStmt;
|
||||
import org.apache.doris.analysis.AdminShowConfigStmt;
|
||||
import org.apache.doris.analysis.AdminShowReplicaDistributionStmt;
|
||||
import org.apache.doris.analysis.AdminShowReplicaStatusStmt;
|
||||
@ -155,6 +156,7 @@ import org.apache.doris.load.LoadJob.JobState;
|
||||
import org.apache.doris.load.routineload.RoutineLoadJob;
|
||||
import org.apache.doris.mysql.privilege.PrivPredicate;
|
||||
import org.apache.doris.system.Backend;
|
||||
import org.apache.doris.system.Diagnoser;
|
||||
import org.apache.doris.system.SystemInfoService;
|
||||
import org.apache.doris.thrift.TUnit;
|
||||
import org.apache.doris.transaction.GlobalTransactionMgr;
|
||||
@ -333,6 +335,8 @@ public class ShowExecutor {
|
||||
handleShowTableCreation();
|
||||
} else if (stmt instanceof ShowLastInsertStmt) {
|
||||
handleShowLastInsert();
|
||||
} else if (stmt instanceof AdminDiagnoseTabletStmt) {
|
||||
handleAdminDiagnoseTablet();
|
||||
} else {
|
||||
handleEmtpy();
|
||||
}
|
||||
@ -2128,4 +2132,11 @@ public class ShowExecutor {
|
||||
resultSet = new ShowResultSet(showMetaData, resultRowSet);
|
||||
}
|
||||
|
||||
private void handleAdminDiagnoseTablet() {
|
||||
AdminDiagnoseTabletStmt showStmt = (AdminDiagnoseTabletStmt) stmt;
|
||||
List<List<String>> resultRowSet = Diagnoser.diagnoseTablet(showStmt.getTabletId());
|
||||
ShowResultSetMetaData showMetaData = showStmt.getMetaData();
|
||||
resultSet = new ShowResultSet(showMetaData, resultRowSet);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
163
fe/fe-core/src/main/java/org/apache/doris/system/Diagnoser.java
Normal file
163
fe/fe-core/src/main/java/org/apache/doris/system/Diagnoser.java
Normal file
@ -0,0 +1,163 @@
|
||||
// Licensed to the Apache Software Foundation (ASF) under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing,
|
||||
// software distributed under the License is distributed on an
|
||||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
package org.apache.doris.system;
|
||||
|
||||
import org.apache.doris.catalog.Catalog;
|
||||
import org.apache.doris.catalog.Database;
|
||||
import org.apache.doris.catalog.MaterializedIndex;
|
||||
import org.apache.doris.catalog.OlapTable;
|
||||
import org.apache.doris.catalog.Partition;
|
||||
import org.apache.doris.catalog.Replica;
|
||||
import org.apache.doris.catalog.Tablet;
|
||||
import org.apache.doris.catalog.TabletInvertedIndex;
|
||||
import org.apache.doris.catalog.TabletMeta;
|
||||
import org.apache.doris.common.Config;
|
||||
|
||||
import com.google.common.collect.Lists;
|
||||
|
||||
import org.json.simple.JSONObject;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
// This is a util class to diagnose the Doris system
|
||||
public class Diagnoser {
|
||||
// To diagnose a given tablet and return the info and issues about it
|
||||
// - tablet exist:
|
||||
// - tablet id
|
||||
// - database
|
||||
// - table
|
||||
// - partition
|
||||
// - materialized view
|
||||
// - replica info: {"replica_id" : "backend id"}
|
||||
// - replica num
|
||||
// - ReplicaBackendStatus
|
||||
// - ReplicaVersionStatus
|
||||
// - ReplicaStatus
|
||||
// - ReplicaCompactionStatus
|
||||
//
|
||||
public static List<List<String>> diagnoseTablet(long tabletId) {
|
||||
List<List<String>> results = Lists.newArrayList();
|
||||
TabletInvertedIndex invertedIndex = Catalog.getCurrentInvertedIndex();
|
||||
TabletMeta tabletMeta = invertedIndex.getTabletMeta(tabletId);
|
||||
if (tabletMeta == null) {
|
||||
results.add(Lists.newArrayList("TabletExist", "No", ""));
|
||||
return results;
|
||||
}
|
||||
results.add(Lists.newArrayList("TabletExist", "Yes", ""));
|
||||
results.add(Lists.newArrayList("TabletId", String.valueOf(tabletId), ""));
|
||||
// database
|
||||
Database db = Catalog.getCurrentCatalog().getDbNullable(tabletMeta.getDbId());
|
||||
if (db == null) {
|
||||
results.add(Lists.newArrayList("Database", "Not exist", ""));
|
||||
return results;
|
||||
}
|
||||
results.add(Lists.newArrayList("Database", db.getFullName() + ": " + db.getId(), ""));
|
||||
// table
|
||||
OlapTable tbl = (OlapTable) db.getTableNullable(tabletMeta.getTableId());
|
||||
if (tbl == null) {
|
||||
results.add(Lists.newArrayList("Table", "Not exist", ""));
|
||||
return results;
|
||||
}
|
||||
results.add(Lists.newArrayList("Table", tbl.getName() + ": " + tbl.getId(), ""));
|
||||
// partition
|
||||
Partition partition = tbl.getPartition(tabletMeta.getPartitionId());
|
||||
if (partition == null) {
|
||||
results.add(Lists.newArrayList("Partition", "Not exist", ""));
|
||||
return results;
|
||||
}
|
||||
results.add(Lists.newArrayList("Partition", partition.getName() + ": " + partition.getId(), ""));
|
||||
// materialized index
|
||||
MaterializedIndex mIndex = partition.getIndex(tabletMeta.getIndexId());
|
||||
if (mIndex == null) {
|
||||
results.add(Lists.newArrayList("MaterializedIndex", "Not exist", ""));
|
||||
return results;
|
||||
}
|
||||
results.add(Lists.newArrayList("MaterializedIndex", tbl.getIndexNameById(mIndex.getId()) + ": " + mIndex.getId(), ""));
|
||||
// replica info
|
||||
Tablet tablet = mIndex.getTablet(tabletId);
|
||||
List<Replica> replicas = tablet.getReplicas();
|
||||
JSONObject jobj = new JSONObject();
|
||||
for (Replica replica : replicas) {
|
||||
jobj.put(replica.getId(), replica.getBackendId());
|
||||
}
|
||||
results.add(Lists.newArrayList("Replicas(ReplicaId -> BackendId)", jobj.toJSONString(), ""));
|
||||
// replica
|
||||
short replicaNum = tbl.getPartitionInfo().getReplicaAllocation(partition.getId()).getTotalReplicaNum();
|
||||
if (replicas.size() != replicaNum) {
|
||||
results.add(Lists.newArrayList("ReplicasNum", "Replica num is " + replicas.size() + ", expected: " + replicaNum, ""));
|
||||
} else {
|
||||
results.add(Lists.newArrayList("ReplicasNum", "OK", ""));
|
||||
}
|
||||
|
||||
SystemInfoService infoService = Catalog.getCurrentSystemInfo();
|
||||
StringBuilder backendErr = new StringBuilder();
|
||||
StringBuilder versionErr = new StringBuilder();
|
||||
StringBuilder statusErr = new StringBuilder();
|
||||
StringBuilder compactionErr = new StringBuilder();
|
||||
for (Replica replica : replicas) {
|
||||
// backend
|
||||
do {
|
||||
Backend be = infoService.getBackend(replica.getBackendId());
|
||||
if (be == null) {
|
||||
backendErr.append("Backend " + replica.getBackendId() + " does not exist. ");
|
||||
break;
|
||||
}
|
||||
if (!be.isAlive()) {
|
||||
backendErr.append("Backend " + replica.getBackendId() + " is not alive. ");
|
||||
break;
|
||||
}
|
||||
if (be.isDecommissioned()) {
|
||||
backendErr.append("Backend " + replica.getBackendId() + " is decommission. ");
|
||||
break;
|
||||
}
|
||||
if (!be.isLoadAvailable()) {
|
||||
backendErr.append("Backend " + replica.getBackendId() + " is not load available. ");
|
||||
break;
|
||||
}
|
||||
if (!be.isQueryAvailable()) {
|
||||
backendErr.append("Backend " + replica.getBackendId() + " is not query available. ");
|
||||
break;
|
||||
}
|
||||
if (be.diskExceedLimit()) {
|
||||
backendErr.append("Backend " + replica.getBackendId() + " has no space left. ");
|
||||
break;
|
||||
}
|
||||
} while (false);
|
||||
// version
|
||||
if (replica.getVersion() != partition.getVisibleVersion()) {
|
||||
versionErr.append("Replica on backend " + replica.getBackendId() + "'s version (" +
|
||||
replica.getVersion() + ") does not equal" +
|
||||
" to partition visible version (" + partition.getVisibleVersion() + ")");
|
||||
}
|
||||
// status
|
||||
if (!replica.isAlive()) {
|
||||
statusErr.append("Replica on backend " + replica.getBackendId() + "'s state is " + replica.getState()
|
||||
+ ", and is bad: " + (replica.isBad() ? "Yes" : "No"));
|
||||
}
|
||||
if (replica.getVersionCount() > Config.min_version_count_indicate_replica_compaction_too_slow) {
|
||||
compactionErr.append("Replica on backend " + replica.getBackendId() + "'s version count is too high: "
|
||||
+ replica.getVersionCount());
|
||||
}
|
||||
}
|
||||
results.add(Lists.newArrayList("ReplicaBackendStatus", (backendErr.length() == 0 ? "OK" : backendErr.toString()), ""));
|
||||
results.add(Lists.newArrayList("ReplicaVersionStatus", (versionErr.length() == 0 ? "OK" : versionErr.toString()), ""));
|
||||
results.add(Lists.newArrayList("ReplicaStatus", (statusErr.length() == 0 ? "OK" : statusErr.toString()), ""));
|
||||
results.add(Lists.newArrayList("ReplicaCompactionStatus", (compactionErr.length() == 0 ? "OK" : compactionErr.toString()), ""));
|
||||
return results;
|
||||
}
|
||||
}
|
||||
@ -164,6 +164,7 @@ import org.apache.doris.qe.SqlModeHelper;
|
||||
keywordMap.put("delete", new Integer(SqlParserSymbols.KW_DELETE));
|
||||
keywordMap.put("desc", new Integer(SqlParserSymbols.KW_DESC));
|
||||
keywordMap.put("describe", new Integer(SqlParserSymbols.KW_DESCRIBE));
|
||||
keywordMap.put("diagnose", new Integer(SqlParserSymbols.KW_DIAGNOSE));
|
||||
keywordMap.put("distinct", new Integer(SqlParserSymbols.KW_DISTINCT));
|
||||
keywordMap.put("distinctpc", new Integer(SqlParserSymbols.KW_DISTINCTPC));
|
||||
keywordMap.put("distinctpc", new Integer(SqlParserSymbols.KW_DISTINCTPC));
|
||||
|
||||
@ -29,6 +29,7 @@ import org.apache.doris.common.FeConstants;
|
||||
import org.apache.doris.qe.ConnectContext;
|
||||
import org.apache.doris.resource.Tag;
|
||||
import org.apache.doris.system.Backend;
|
||||
import org.apache.doris.system.Diagnoser;
|
||||
import org.apache.doris.system.SystemInfoService;
|
||||
import org.apache.doris.thrift.TDisk;
|
||||
import org.apache.doris.thrift.TStorageMedium;
|
||||
@ -132,7 +133,9 @@ public class TabletReplicaTooSlowTest {
|
||||
private static void updateReplicaVersionCount() {
|
||||
Table<Long, Long, Replica> replicaMetaTable = Catalog.getCurrentInvertedIndex().getReplicaMetaTable();
|
||||
int versionCount = 1;
|
||||
long tabletId = -1;
|
||||
for (Table.Cell<Long, Long, Replica> cell : replicaMetaTable.cellSet()) {
|
||||
tabletId = cell.getRowKey();
|
||||
long beId = cell.getColumnKey();
|
||||
Backend be = Catalog.getCurrentSystemInfo().getBackend(beId);
|
||||
List<Long> pathHashes = be.getDisks().values().stream().map(DiskInfo::getPathHash).collect(Collectors.toList());
|
||||
@ -145,6 +148,10 @@ public class TabletReplicaTooSlowTest {
|
||||
|
||||
replica.setPathHash(pathHashes.get(0));
|
||||
}
|
||||
|
||||
List<List<String>> result = Diagnoser.diagnoseTablet(tabletId);
|
||||
Assert.assertEquals(12, result.size());
|
||||
Assert.assertTrue(result.get(11).get(1).contains("version count is too high"));
|
||||
}
|
||||
|
||||
@Test
|
||||
@ -158,6 +165,7 @@ public class TabletReplicaTooSlowTest {
|
||||
" \"replication_num\" = \"3\"\n" +
|
||||
")";
|
||||
ExceptionChecker.expectThrowsNoException(() -> createTable(createStr));
|
||||
|
||||
int maxLoop = 300;
|
||||
boolean delete = false;
|
||||
while (maxLoop-- > 0) {
|
||||
|
||||
Reference in New Issue
Block a user