From 9c896efe0b319aa599cde69f11bb61944f9a9509 Mon Sep 17 00:00:00 2001 From: yujun Date: Wed, 19 Jun 2024 15:38:50 +0800 Subject: [PATCH] [fix](race) fix access colocate group ids race #36444 (#36501) cherry pick from #36444 --- .../doris/catalog/ColocateTableIndex.java | 2 +- .../clone/ColocateTableCheckerAndBalancer.java | 18 ++++++++++++++++-- 2 files changed, 17 insertions(+), 3 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/ColocateTableIndex.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/ColocateTableIndex.java index fcefcff132..470464407d 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/ColocateTableIndex.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/ColocateTableIndex.java @@ -429,7 +429,7 @@ public class ColocateTableIndex implements Writable { public Set getAllGroupIds() { readLock(); try { - return group2Tables.keySet(); + return Sets.newHashSet(group2Tables.keySet()); } finally { readUnlock(); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/clone/ColocateTableCheckerAndBalancer.java b/fe/fe-core/src/main/java/org/apache/doris/clone/ColocateTableCheckerAndBalancer.java index 740acd331c..292013ec05 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/clone/ColocateTableCheckerAndBalancer.java +++ b/fe/fe-core/src/main/java/org/apache/doris/clone/ColocateTableCheckerAndBalancer.java @@ -378,7 +378,7 @@ public class ColocateTableCheckerAndBalancer extends MasterDaemon { * A B C D */ private void relocateAndBalanceGroups() { - Set groupIds = Sets.newHashSet(Env.getCurrentEnv().getColocateTableIndex().getAllGroupIds()); + Set groupIds = Env.getCurrentEnv().getColocateTableIndex().getAllGroupIds(); // balance only inside each group, excluded balance between all groups Set changeGroups = relocateAndBalanceGroup(groupIds, false); @@ -410,6 +410,10 @@ public class ColocateTableCheckerAndBalancer extends MasterDaemon { } ColocateGroupSchema groupSchema = colocateIndex.getGroupSchema(groupId); + if (groupSchema == null) { + LOG.info("Not found colocate group {}, maybe delete", groupId); + continue; + } ReplicaAllocation replicaAlloc = groupSchema.getReplicaAlloc(); try { Env.getCurrentSystemInfo().checkReplicaAllocation(replicaAlloc); @@ -480,13 +484,18 @@ public class ColocateTableCheckerAndBalancer extends MasterDaemon { // check each group Set groupIds = colocateIndex.getAllGroupIds(); for (GroupId groupId : groupIds) { + ColocateGroupSchema groupSchema = colocateIndex.getGroupSchema(groupId); + if (groupSchema == null) { + LOG.info("Not found colocate group {}, maybe delete", groupId); + continue; + } + List tableIds = colocateIndex.getAllTableIds(groupId); List> backendBucketsSeq = colocateIndex.getBackendsPerBucketSeqSet(groupId); if (backendBucketsSeq.isEmpty()) { continue; } - ColocateGroupSchema groupSchema = colocateIndex.getGroupSchema(groupId); ReplicaAllocation replicaAlloc = groupSchema.getReplicaAlloc(); String unstableReason = null; OUT: @@ -588,6 +597,7 @@ public class ColocateTableCheckerAndBalancer extends MasterDaemon { for (GroupId groupId : groupIds) { ColocateGroupSchema groupSchema = colocateIndex.getGroupSchema(groupId); if (groupSchema == null) { + LOG.info("Not found colocate group {}, maybe delete", groupId); continue; } ReplicaAllocation replicaAlloc = groupSchema.getReplicaAlloc(); @@ -718,6 +728,10 @@ public class ColocateTableCheckerAndBalancer extends MasterDaemon { GlobalColocateStatistic globalColocateStatistic, List> balancedBackendsPerBucketSeq, boolean balanceBetweenGroups) { ColocateGroupSchema groupSchema = colocateIndex.getGroupSchema(groupId); + if (groupSchema == null) { + LOG.info("Not found colocate group {}, maybe delete", groupId); + return false; + } short replicaNum = groupSchema.getReplicaAlloc().getReplicaNumByTag(tag); List> backendsPerBucketSeq = Lists.newArrayList( colocateIndex.getBackendsPerBucketSeqByTag(groupId, tag));