[improvement](statistics)Use min row count of all replicas as tablet/table row count. (#41894) (#41978)

backport: https://github.com/apache/doris/pull/41894
This commit is contained in:
Jibing-Li
2024-10-16 21:45:37 +08:00
committed by GitHub
parent 7d99d5fcc4
commit d04082f685
4 changed files with 66 additions and 4 deletions

View File

@ -509,6 +509,23 @@ public class Tablet extends MetaObject implements Writable {
return singleReplica ? Double.valueOf(s.average().orElse(0)).longValue() : s.sum();
}
// Get the least row count among all valid replicas.
// The replica with the least row count is the most accurate one. Because it performs most compaction.
public long getMinReplicaRowCount(long version) {
long minRowCount = Long.MAX_VALUE;
long maxReplicaVersion = 0;
for (Replica r : replicas) {
if (r.isAlive()
&& r.checkVersionCatchUp(version, false)
&& (r.getVersion() > maxReplicaVersion
|| r.getVersion() == maxReplicaVersion && r.getRowCount() < minRowCount)) {
minRowCount = r.getRowCount();
maxReplicaVersion = r.getVersion();
}
}
return minRowCount == Long.MAX_VALUE ? 0 : minRowCount;
}
/**
* A replica is healthy only if
* 1. the backend is available

View File

@ -120,14 +120,17 @@ public class TabletStatMgr extends MasterDaemon {
long indexRowCount = 0L;
boolean indexReported = true;
for (Tablet tablet : index.getTablets()) {
long tabletRowCount = 0L;
long tabletRowCount = Long.MAX_VALUE;
boolean tabletReported = false;
for (Replica replica : tablet.getReplicas()) {
LOG.debug("Table {} replica {} current version {}, report version {}",
olapTable.getName(), replica.getId(),
replica.getVersion(), replica.getLastReportVersion());
// Replica with less row count is more accurate than the others
// when replicas' version are identical. Because less row count
// means this replica does more compaction than the others.
if (replica.checkVersionCatchUp(version, false)
&& replica.getRowCount() >= tabletRowCount) {
&& replica.getRowCount() < tabletRowCount) {
// 1. If replica version and reported replica version are all equal to
// PARTITION_INIT_VERSION, set tabletReported to true, which indicates this
// tablet is empty for sure when previous report.
@ -144,6 +147,11 @@ public class TabletStatMgr extends MasterDaemon {
tabletRowCount = replica.getRowCount();
}
}
// When all BEs are down, avoid set Long.MAX_VALUE to index and table row count. Use 0.
if (tabletRowCount == Long.MAX_VALUE) {
tabletRowCount = 0L;
}
indexRowCount += tabletRowCount;
// Only when all tablets of this index are reported, we set indexReported to true.
indexReported = indexReported && tabletReported;

View File

@ -194,7 +194,7 @@ public class OlapAnalysisTask extends BaseAnalysisTask {
params.put("index", getIndex());
StringSubstitutor stringSubstitutor = new StringSubstitutor(params);
String sql = stringSubstitutor.replace(BASIC_STATS_TEMPLATE);
ResultRow resultRow = null;
ResultRow resultRow;
try (AutoCloseConnectContext r = StatisticsUtil.buildConnectContext(false)) {
stmtExecutor = new StmtExecutor(r.connectContext, sql);
resultRow = stmtExecutor.executeInternalQuery().get(0);
@ -287,7 +287,8 @@ public class OlapAnalysisTask extends BaseAnalysisTask {
int seekTid = (int) ((i + seek) % ids.size());
long tabletId = ids.get(seekTid);
sampleTabletIds.add(tabletId);
actualSampledRowCount += materializedIndex.getTablet(tabletId).getRowCount(true);
actualSampledRowCount += materializedIndex.getTablet(tabletId)
.getMinReplicaRowCount(p.getVisibleVersion());
if (actualSampledRowCount >= sampleRows && !forPartitionColumn) {
enough = true;
break;

View File

@ -213,4 +213,40 @@ public class TabletTest {
Pair.of(1L, false), Pair.of(2L, false), Pair.of(3L, false), Pair.of(4L, true)
);
}
@Test
public void testGetMinReplicaRowCount() {
Tablet t = new Tablet(1);
long row = t.getMinReplicaRowCount(1);
Assert.assertEquals(0, row);
Replica r1 = new Replica(1, 1, 10, 0, 0, 0, 100, ReplicaState.NORMAL, 0, 10);
t.addReplica(r1);
row = t.getMinReplicaRowCount(10);
Assert.assertEquals(100, row);
row = t.getMinReplicaRowCount(11);
Assert.assertEquals(0, row);
Replica r2 = new Replica(2, 2, 10, 0, 0, 0, 110, ReplicaState.NORMAL, 0, 10);
Replica r3 = new Replica(3, 3, 10, 0, 0, 0, 90, ReplicaState.NORMAL, 0, 10);
t.addReplica(r2);
t.addReplica(r3);
row = t.getMinReplicaRowCount(11);
Assert.assertEquals(0, row);
row = t.getMinReplicaRowCount(9);
Assert.assertEquals(90, row);
r3.setBad(true);
row = t.getMinReplicaRowCount(9);
Assert.assertEquals(100, row);
r3.setBad(false);
row = t.getMinReplicaRowCount(9);
Assert.assertEquals(90, row);
r2.updateVersion(11);
row = t.getMinReplicaRowCount(9);
Assert.assertEquals(110, row);
}
}