From f1362e0d1eae56c006c142879fc5fe3f7d9087c5 Mon Sep 17 00:00:00 2001 From: Xinyi Zou Date: Thu, 2 Nov 2023 15:52:48 +0800 Subject: [PATCH] [fix](planner) Fix sample partition table (#25912) In the past, two conditions needed to be met when sampling a partitioned table: 1. Data is evenly distributed between partitions; 2. Data is evenly distributed between buckets. Finally, the number of sampled rows in each partition and each bucket is the same. Now, sampling will be proportional to the number of partitioned and bucketed rows. --- .../translator/PhysicalPlanTranslator.java | 2 +- .../apache/doris/planner/OlapScanNode.java | 129 ++++++++++-------- .../apache/doris/analysis/SelectStmtTest.java | 9 +- 3 files changed, 82 insertions(+), 58 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PhysicalPlanTranslator.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PhysicalPlanTranslator.java index ebf7a8dcd8..d4c6fe07c3 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PhysicalPlanTranslator.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PhysicalPlanTranslator.java @@ -624,7 +624,7 @@ public class PhysicalPlanTranslator extends DefaultPlanVisitor replicaOptional = replicas.stream() - .filter(r -> r.getId() == coolDownReplicaId).findAny(); + .filter(r -> r.getId() == coolDownReplicaId).findAny(); replicaOptional.ifPresent( r -> { Backend backend = Env.getCurrentSystemInfo() @@ -930,75 +933,91 @@ public class OlapScanNode extends ScanNode { } /** - * First, determine how many rows to sample from each partition according to the number of partitions. - * Then determine the number of Tablets to be selected for each partition according to the average number - * of rows of Tablet, - * If seek is not specified, the specified number of Tablets are pseudo-randomly selected from each partition. - * If seek is specified, it will be selected sequentially from the seek tablet of the partition. - * And add the manually specified Tablet id to the selected Tablet. - * simpleTabletNums = simpleRows / partitionNums / (partitionRows / partitionTabletNums) + * Sample some tablets in the selected partition. + * If Seek is specified, the tablets sampled each time are the same. */ public void computeSampleTabletIds() { if (tableSample == null) { return; } OlapTable olapTable = (OlapTable) desc.getTable(); - long sampleRows; // The total number of sample rows - long hitRows = 1; // The total number of rows hit by the tablet - long totalRows = 0; // The total number of partition rows hit - long totalTablet = 0; // The total number of tablets in the hit partition + + // 1. Calculate the total number of rows in the selected partition, and sort partition list. + long selectedRows = 0; + long totalSampleRows = 0; + List selectedPartitionList = new ArrayList<>(); + if (FeConstants.runningUnitTest && selectedIndexId == -1) { + selectedIndexId = olapTable.getBaseIndexId(); + } + for (Long partitionId : selectedPartitionIds) { + final Partition partition = olapTable.getPartition(partitionId); + final MaterializedIndex selectedTable = partition.getIndex(selectedIndexId); + selectedRows += selectedTable.getRowCount(); + selectedPartitionList.add(partitionId); + } + selectedPartitionList.sort(Comparator.naturalOrder()); + + // 2.Sampling is not required in some cases, will not take effect after clear sampleTabletIds. if (tableSample.isPercent()) { - sampleRows = (long) Math.max(olapTable.getRowCount() * (tableSample.getSampleValue() / 100.0), 1); + if (tableSample.getSampleValue() >= 100) { + return; + } + totalSampleRows = (long) Math.max(selectedRows * (tableSample.getSampleValue() / 100.0), 1); } else { - sampleRows = Math.max(tableSample.getSampleValue(), 1); + if (tableSample.getSampleValue() > selectedRows) { + return; + } + totalSampleRows = tableSample.getSampleValue(); } - // calculate the number of tablets by each partition - long avgRowsPerPartition = sampleRows / Math.max(olapTable.getPartitions().size(), 1); - - for (Partition p : olapTable.getPartitions()) { - List ids = p.getBaseIndex().getTabletIdsInOrder(); - - if (ids.isEmpty()) { + // 3. Sampling partition. If Seek is specified, the partition will be the same for each sampling. + long hitRows = 0; // The number of rows hit by the tablet + long partitionSeek = tableSample.getSeek() != -1 + ? tableSample.getSeek() : (long) (new SecureRandom().nextDouble() * selectedPartitionIds.size()); + for (int i = 0; i < selectedPartitionList.size(); i++) { + int seekPid = (int) ((i + partitionSeek) % selectedPartitionList.size()); + final Partition partition = olapTable.getPartition(selectedPartitionList.get(seekPid)); + final MaterializedIndex selectedTable = partition.getIndex(selectedIndexId); + List tablets = selectedTable.getTablets(); + if (tablets.isEmpty()) { continue; } - // Skip partitions with row count < row count / 2 expected to be sampled per partition. - // It can be expected to sample a smaller number of partitions to avoid uneven distribution - // of sampling results. - if (p.getBaseIndex().getRowCount() < (avgRowsPerPartition / 2)) { - continue; + // 4. Calculate the number of rows that need to be sampled in the current partition. + long sampleRows = 0; // The number of sample rows in partition + if (tableSample.isPercent()) { + sampleRows = (long) Math.max(selectedTable.getRowCount() * (tableSample.getSampleValue() / 100.0), 1); + } else { + sampleRows = (long) Math.max( + tableSample.getSampleValue() * (selectedTable.getRowCount() / selectedRows), 1); } - // It is assumed here that all tablets row count is uniformly distributed - // TODO Use `p.getBaseIndex().getTablet(n).getRowCount()` to get each tablet row count to compute sample. - long avgRowsPerTablet = Math.max(p.getBaseIndex().getRowCount() / ids.size(), 1); - long tabletCounts = Math.max( - avgRowsPerPartition / avgRowsPerTablet + (avgRowsPerPartition % avgRowsPerTablet != 0 ? 1 : 0), 1); - tabletCounts = Math.min(tabletCounts, ids.size()); - - long seek = tableSample.getSeek() != -1 - ? tableSample.getSeek() : (long) (new SecureRandom().nextDouble() * ids.size()); - for (int i = 0; i < tabletCounts; i++) { - int seekTid = (int) ((i + seek) % ids.size()); - sampleTabletIds.add(ids.get(seekTid)); + // 5. Sampling tablets. If Seek is specified, the same tablet will be sampled each time. + long tabletSeek = tableSample.getSeek() != -1 + ? tableSample.getSeek() : (long) (new SecureRandom().nextDouble() * tablets.size()); + for (int j = 0; j < tablets.size(); j++) { + int seekTid = (int) ((j + tabletSeek) % tablets.size()); + long tabletRowCount; + if (!FeConstants.runningUnitTest) { + tabletRowCount = tablets.get(seekTid).getRowCount(true); + } else { + tabletRowCount = selectedTable.getRowCount() / tablets.size(); + } + if (tabletRowCount == 0) { + continue; + } + sampleTabletIds.add(tablets.get(seekTid).getId()); + sampleRows -= tabletRowCount; + hitRows += tabletRowCount; + if (sampleRows <= 0) { + break; + } + } + if (hitRows > totalSampleRows) { + break; } - - hitRows += avgRowsPerTablet * tabletCounts; - totalRows += p.getBaseIndex().getRowCount(); - totalTablet += ids.size(); - } - - // all hit, direct full - if (totalRows < sampleRows) { - // can't fill full sample rows - sampleTabletIds.clear(); - } else if (sampleTabletIds.size() == totalTablet) { - // TODO add limit - sampleTabletIds.clear(); - } else if (!sampleTabletIds.isEmpty()) { - // TODO add limit } + LOG.debug("after computeSampleTabletIds, hitRows {}, selectedRows {}", hitRows, selectedRows); } public boolean isFromPrepareStmt() { diff --git a/fe/fe-core/src/test/java/org/apache/doris/analysis/SelectStmtTest.java b/fe/fe-core/src/test/java/org/apache/doris/analysis/SelectStmtTest.java index f65843f511..f829457516 100755 --- a/fe/fe-core/src/test/java/org/apache/doris/analysis/SelectStmtTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/analysis/SelectStmtTest.java @@ -26,6 +26,7 @@ import org.apache.doris.catalog.Partition; import org.apache.doris.catalog.Tablet; import org.apache.doris.common.AnalysisException; import org.apache.doris.common.Config; +import org.apache.doris.common.FeConstants; import org.apache.doris.common.util.Util; import org.apache.doris.planner.OlapScanNode; import org.apache.doris.planner.OriginalPlanner; @@ -858,6 +859,7 @@ public class SelectStmtTest { @Test public void testSelectSampleHashBucketTable() throws Exception { + FeConstants.runningUnitTest = true; Database db = Env.getCurrentInternalCatalog().getDbOrMetaException("default_cluster:db1"); OlapTable tbl = (OlapTable) db.getTableOrMetaException("table1"); long tabletId = 10031L; @@ -890,7 +892,7 @@ public class SelectStmtTest { String sql4 = "SELECT * FROM db1.table1 TABLESAMPLE(9500 ROWS)"; OriginalPlanner planner4 = (OriginalPlanner) dorisAssert.query(sql4).internalExecuteOneAndGetPlan(); Set sampleTabletIds4 = ((OlapScanNode) planner4.getScanNodes().get(0)).getSampleTabletIds(); - Assert.assertEquals(0, sampleTabletIds4.size()); // no sample, all tablet + Assert.assertEquals(10, sampleTabletIds4.size()); String sql5 = "SELECT * FROM db1.table1 TABLESAMPLE(11000 ROWS)"; OriginalPlanner planner5 = (OriginalPlanner) dorisAssert.query(sql5).internalExecuteOneAndGetPlan(); @@ -959,10 +961,12 @@ public class SelectStmtTest { OriginalPlanner planner16 = (OriginalPlanner) dorisAssert.query(sql16).internalExecuteOneAndGetPlan(); Set sampleTabletIds16 = ((OlapScanNode) planner16.getScanNodes().get(0)).getSampleTabletIds(); Assert.assertEquals(1, sampleTabletIds16.size()); + FeConstants.runningUnitTest = false; } @Test public void testSelectSampleRandomBucketTable() throws Exception { + FeConstants.runningUnitTest = true; Database db = Env.getCurrentInternalCatalog().getDbOrMetaException("default_cluster:db1"); OlapTable tbl = (OlapTable) db.getTableOrMetaException("table3"); long tabletId = 10031L; @@ -995,7 +999,7 @@ public class SelectStmtTest { String sql4 = "SELECT * FROM db1.table3 TABLESAMPLE(9500 ROWS)"; OriginalPlanner planner4 = (OriginalPlanner) dorisAssert.query(sql4).internalExecuteOneAndGetPlan(); Set sampleTabletIds4 = ((OlapScanNode) planner4.getScanNodes().get(0)).getSampleTabletIds(); - Assert.assertEquals(0, sampleTabletIds4.size()); // no sample, all tablet + Assert.assertEquals(10, sampleTabletIds4.size()); String sql5 = "SELECT * FROM db1.table3 TABLESAMPLE(11000 ROWS)"; OriginalPlanner planner5 = (OriginalPlanner) dorisAssert.query(sql5).internalExecuteOneAndGetPlan(); @@ -1064,6 +1068,7 @@ public class SelectStmtTest { OriginalPlanner planner16 = (OriginalPlanner) dorisAssert.query(sql16).internalExecuteOneAndGetPlan(); Set sampleTabletIds16 = ((OlapScanNode) planner16.getScanNodes().get(0)).getSampleTabletIds(); Assert.assertEquals(1, sampleTabletIds16.size()); + FeConstants.runningUnitTest = false; }