diff --git a/fe/src/main/java/org/apache/doris/alter/SchemaChangeJob.java b/fe/src/main/java/org/apache/doris/alter/SchemaChangeJob.java index 7a07790821..e11575b526 100644 --- a/fe/src/main/java/org/apache/doris/alter/SchemaChangeJob.java +++ b/fe/src/main/java/org/apache/doris/alter/SchemaChangeJob.java @@ -1086,7 +1086,7 @@ public class SchemaChangeJob extends AlterJob { @Override public void getJobInfo(List> jobInfos, OlapTable tbl) { - if (changedIndexIdToSchema == null) { + if (changedIndexIdToSchemaVersion == null) { // for compatibility if (state == JobState.FINISHED || state == JobState.CANCELLED) { List jobInfo = new ArrayList(); diff --git a/fe/src/main/java/org/apache/doris/backup/BackupHandler.java b/fe/src/main/java/org/apache/doris/backup/BackupHandler.java index 9a65da3ec2..806e521a20 100644 --- a/fe/src/main/java/org/apache/doris/backup/BackupHandler.java +++ b/fe/src/main/java/org/apache/doris/backup/BackupHandler.java @@ -298,7 +298,7 @@ public class BackupHandler extends Daemon implements Writable { } // copy a table with selected partitions for calculating the signature - OlapTable copiedTbl = olapTbl.selectiveCopy(tblRef.getPartitions()); + OlapTable copiedTbl = olapTbl.selectiveCopy(tblRef.getPartitions(), true); if (copiedTbl == null) { ErrorReport.reportDdlException(ErrorCode.ERR_COMMON_ERROR, "Failed to copy table " + tblName + " with selected partitions"); diff --git a/fe/src/main/java/org/apache/doris/backup/BackupJob.java b/fe/src/main/java/org/apache/doris/backup/BackupJob.java index 55b2b4b158..ee2cf15d9c 100644 --- a/fe/src/main/java/org/apache/doris/backup/BackupJob.java +++ b/fe/src/main/java/org/apache/doris/backup/BackupJob.java @@ -418,7 +418,7 @@ public class BackupJob extends AbstractJob { for (TableRef tableRef : tableRefs) { String tblName = tableRef.getName().getTbl(); OlapTable tbl = (OlapTable) db.getTable(tblName); - OlapTable copiedTbl = tbl.selectiveCopy(tableRef.getPartitions()); + OlapTable copiedTbl = tbl.selectiveCopy(tableRef.getPartitions(), true); if (copiedTbl == null) { status = new Status(ErrCode.COMMON_ERROR, "faild to copy table: " + tblName); return; diff --git a/fe/src/main/java/org/apache/doris/catalog/Catalog.java b/fe/src/main/java/org/apache/doris/catalog/Catalog.java index d5c7973b93..e538f1cd08 100644 --- a/fe/src/main/java/org/apache/doris/catalog/Catalog.java +++ b/fe/src/main/java/org/apache/doris/catalog/Catalog.java @@ -5812,7 +5812,7 @@ public class Catalog { } } - copiedTbl = olapTable.selectiveCopy(origPartitions.keySet()); + copiedTbl = olapTable.selectiveCopy(origPartitions.keySet(), true); } finally { db.readUnlock(); diff --git a/fe/src/main/java/org/apache/doris/catalog/OlapTable.java b/fe/src/main/java/org/apache/doris/catalog/OlapTable.java index 2b96889d59..4529e8427e 100644 --- a/fe/src/main/java/org/apache/doris/catalog/OlapTable.java +++ b/fe/src/main/java/org/apache/doris/catalog/OlapTable.java @@ -30,6 +30,8 @@ import org.apache.doris.analysis.TableName; import org.apache.doris.backup.Status; import org.apache.doris.backup.Status.ErrCode; import org.apache.doris.catalog.DistributionInfo.DistributionInfoType; +import org.apache.doris.catalog.MaterializedIndex.IndexState; +import org.apache.doris.catalog.Partition.PartitionState; import org.apache.doris.catalog.Replica.ReplicaState; import org.apache.doris.catalog.Tablet.TabletStatus; import org.apache.doris.clone.TabletSchedCtx; @@ -893,13 +895,28 @@ public class OlapTable extends Table { return true; } - public OlapTable selectiveCopy(Collection reservedPartNames) { + public OlapTable selectiveCopy(Collection reservedPartNames, boolean resetState) { OlapTable copied = new OlapTable(); if (!DeepCopy.copy(this, copied)) { LOG.warn("failed to copy olap table: " + getName()); return null; } + if (resetState) { + copied.setState(OlapTableState.NORMAL); + for (Partition partition : copied.getPartitions()) { + partition.setState(PartitionState.NORMAL); + for (MaterializedIndex idx : partition.getMaterializedIndices()) { + idx.setState(IndexState.NORMAL); + for (Tablet tablet : idx.getTablets()) { + for (Replica replica : tablet.getReplicas()) { + replica.setState(ReplicaState.NORMAL); + } + } + } + } + } + if (reservedPartNames == null || reservedPartNames.isEmpty()) { // reserve all return copied; diff --git a/fe/src/main/java/org/apache/doris/catalog/Replica.java b/fe/src/main/java/org/apache/doris/catalog/Replica.java index 40aea95263..2ef2233125 100644 --- a/fe/src/main/java/org/apache/doris/catalog/Replica.java +++ b/fe/src/main/java/org/apache/doris/catalog/Replica.java @@ -203,7 +203,7 @@ public class Replica implements Writable { LOG.warn("update replica {} on backend {}'s version for recovery. version: {}-{}:{}-{}." + " last failed version: {}-{}:{}-{}, last success version: {}-{}:{}-{}", - this.version, this.versionHash, newVersion, newVersionHash, + this.id, this.backendId, this.version, this.versionHash, newVersion, newVersionHash, this.lastFailedVersion, this.lastFailedVersionHash, lastFailedVersion, lastFailedVersionHash, this.lastSuccessVersion, this.lastSuccessVersionHash, lastSuccessVersion, lastSuccessVersionHash); @@ -315,17 +315,17 @@ public class Replica implements Writable { this.lastSuccessVersion, this.lastSuccessVersionHash, dataSize, rowCount); } - public boolean checkVersionCatchUp(long committedVersion, long committedVersionHash) { - if (committedVersion == Partition.PARTITION_INIT_VERSION - && committedVersionHash == Partition.PARTITION_INIT_VERSION_HASH) { + public boolean checkVersionCatchUp(long expectedVersion, long expectedVersionHash) { + if (expectedVersion == Partition.PARTITION_INIT_VERSION + && expectedVersionHash == Partition.PARTITION_INIT_VERSION_HASH) { // no data is loaded into this replica, just return true return true; } - if (this.version < committedVersion - || (this.version == committedVersion && this.versionHash != committedVersionHash)) { + if (this.version < expectedVersion + || (this.version == expectedVersion && this.versionHash != expectedVersionHash)) { LOG.debug("replica version does not catch up with version: {}-{}. replica: {}", - committedVersion, committedVersionHash, this); + expectedVersion, expectedVersionHash, this); return false; } return true; @@ -371,6 +371,8 @@ public class Replica implements Writable { strBuffer.append(lastSuccessVersionHash); strBuffer.append(", lastFailedTimestamp="); strBuffer.append(lastFailedTimestamp); + strBuffer.append(", schemaHash"); + strBuffer.append(schemaHash); return strBuffer.toString(); } diff --git a/fe/src/main/java/org/apache/doris/catalog/Tablet.java b/fe/src/main/java/org/apache/doris/catalog/Tablet.java index 589be15811..082943aded 100644 --- a/fe/src/main/java/org/apache/doris/catalog/Tablet.java +++ b/fe/src/main/java/org/apache/doris/catalog/Tablet.java @@ -170,12 +170,12 @@ public class Tablet extends MetaObject implements Writable { // for query public void getQueryableReplicas(List allQuerableReplica, List localReplicas, - long committedVersion, long committedVersionHash, long localBeId, int schemaHash) { + long visibleVersion, long visibleVersionHash, long localBeId, int schemaHash) { for (Replica replica : replicas) { ReplicaState state = replica.getState(); if (state == ReplicaState.NORMAL || state == ReplicaState.SCHEMA_CHANGE) { // replica.getSchemaHash() == -1 is for compatibility - if (replica.checkVersionCatchUp(committedVersion, committedVersionHash) + if (replica.checkVersionCatchUp(visibleVersion, visibleVersionHash) && (replica.getSchemaHash() == -1 || replica.getSchemaHash() == schemaHash)) { allQuerableReplica.add(replica); if (localBeId != -1 && replica.getBackendId() == localBeId) { diff --git a/fe/src/main/java/org/apache/doris/catalog/TabletInvertedIndex.java b/fe/src/main/java/org/apache/doris/catalog/TabletInvertedIndex.java index d7bdd6128e..9fd28986f7 100644 --- a/fe/src/main/java/org/apache/doris/catalog/TabletInvertedIndex.java +++ b/fe/src/main/java/org/apache/doris/catalog/TabletInvertedIndex.java @@ -318,9 +318,23 @@ public class TabletInvertedIndex { * if be's report version < fe's meta version, it means some version is missing in BE * because of some unrecoverable failure. */ - private boolean checkNeedRecover(Replica replicaMeta, long backendVersion, long backendVersionHash) { - long metaVersion = replicaMeta.getVersion(); - if (metaVersion > backendVersion) { + private boolean checkNeedRecover(Replica replicaInFe, long backendVersion, long backendVersionHash) { + if (replicaInFe.getVersion() == 2 && replicaInFe.getVersionHash() == 0 + && backendVersion == 1 && backendVersionHash == 0) { + /* + * This is very tricky: + * 1. The newly created replica in FE is with version 1-0, but the new replica is BE is 2-0 + * 2. After the first tablet report, replica in FE with be sync with BE, update its version to 2-0 + * 3. A snapshot of replica with version 2-0 on BE is 1-0 (Because we send snapshot task with + * partition's version, which is 1-0) + * 4. And BE will report version 1-0, but in FE, its 2-0, so we fall into here. + * + * So here we ignore this kind of report + */ + return false; + } + + if (backendVersion < replicaInFe.getVersion()) { return true; } return false; diff --git a/fe/src/main/java/org/apache/doris/clone/TabletScheduler.java b/fe/src/main/java/org/apache/doris/clone/TabletScheduler.java index 40bd54a203..44d82f5ce3 100644 --- a/fe/src/main/java/org/apache/doris/clone/TabletScheduler.java +++ b/fe/src/main/java/org/apache/doris/clone/TabletScheduler.java @@ -1119,18 +1119,20 @@ public class TabletScheduler extends Daemon { return num; } - public synchronized List getSlotInfo(long beId) { - List result = Lists.newArrayList(); + public synchronized List> getSlotInfo(long beId) { + List> results = Lists.newArrayList(); pathSlots.entrySet().stream().forEach(t -> { t.getValue().rectify(); + List result = Lists.newArrayList(); result.add(String.valueOf(beId)); result.add(String.valueOf(t.getKey())); result.add(String.valueOf(t.getValue().available)); result.add(String.valueOf(t.getValue().total)); result.add(String.valueOf(t.getValue().balanceSlot)); result.add(String.valueOf(t.getValue().getAvgRate())); + results.add(result); }); - return result; + return results; } public synchronized long takeBalanceSlot(long pathHash) { @@ -1173,7 +1175,7 @@ public class TabletScheduler extends Daemon { List> result = Lists.newArrayList(); for (long beId : backendsWorkingSlots.keySet()) { PathSlot slot = backendsWorkingSlots.get(beId); - result.add(slot.getSlotInfo(beId)); + result.addAll(slot.getSlotInfo(beId)); } return result; } diff --git a/fe/src/main/java/org/apache/doris/planner/OlapScanNode.java b/fe/src/main/java/org/apache/doris/planner/OlapScanNode.java index a1a73237ca..2efb62ef15 100644 --- a/fe/src/main/java/org/apache/doris/planner/OlapScanNode.java +++ b/fe/src/main/java/org/apache/doris/planner/OlapScanNode.java @@ -441,11 +441,15 @@ public class OlapScanNode extends ScanNode { List allQueryableReplicas = Lists.newArrayList(); List localReplicas = Lists.newArrayList(); tablet.getQueryableReplicas(allQueryableReplicas, localReplicas, - visibleVersion, visibleVersionHash, - localBeId, schemaHash); + visibleVersion, visibleVersionHash, localBeId, schemaHash); if (allQueryableReplicas.isEmpty()) { - LOG.error("no queryable replica found in tablet[{}]. committed version[{}], committed version hash[{}]", + LOG.error("no queryable replica found in tablet {}. visible version {}-{}", tabletId, visibleVersion, visibleVersionHash); + if (LOG.isDebugEnabled()) { + for (Replica replica : tablet.getReplicas()) { + LOG.debug("tablet {}, replica: {}", tabletId, replica.toString()); + } + } throw new UserException("Failed to get scan range, no queryable replica found in tablet: " + tabletId); } diff --git a/fe/src/test/java/org/apache/doris/catalog/ReplicaTest.java b/fe/src/test/java/org/apache/doris/catalog/ReplicaTest.java index 50cc42d1dd..4f0e210d6c 100644 --- a/fe/src/test/java/org/apache/doris/catalog/ReplicaTest.java +++ b/fe/src/test/java/org/apache/doris/catalog/ReplicaTest.java @@ -140,7 +140,6 @@ public class ReplicaTest { // 3. Check equal for (int i = 0; i < 11; i++) { Assert.assertTrue(list1.get(i).equals(list2.get(i))); - Assert.assertTrue(list1.get(i).toString().equals(list2.get(i).toString())); } Assert.assertTrue(list1.get(1).equals(list1.get(1)));