From 445f72b39514569a0d3dae4bee1539832046f3d6 Mon Sep 17 00:00:00 2001 From: Nitin-Kashyap <66766227+Nitin-Kashyap@users.noreply.github.com> Date: Fri, 29 Dec 2023 23:09:50 +0700 Subject: [PATCH] [ut](stats) Added tests for HMS analysis tasks (#28583) --- .../doris/statistics/HMSAnalysisTask.java | 2 +- .../doris/statistics/HMSAnalysisTaskTest.java | 151 ++++++++++++++++++ 2 files changed, 152 insertions(+), 1 deletion(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/HMSAnalysisTask.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/HMSAnalysisTask.java index efd99d1eca..5053fc62a2 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/HMSAnalysisTask.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/HMSAnalysisTask.java @@ -88,7 +88,7 @@ public class HMSAnalysisTask extends BaseAnalysisTask { /** * Get column statistics and insert the result to __internal_schema.column_statistics */ - private void getTableColumnStats() throws Exception { + protected void getTableColumnStats() throws Exception { if (!info.usingSqlForPartitionColumn && isPartitionColumn()) { try { getPartitionColumnStats(); diff --git a/fe/fe-core/src/test/java/org/apache/doris/statistics/HMSAnalysisTaskTest.java b/fe/fe-core/src/test/java/org/apache/doris/statistics/HMSAnalysisTaskTest.java index 12a1a9c046..fb0a3b3c2c 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/statistics/HMSAnalysisTaskTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/statistics/HMSAnalysisTaskTest.java @@ -19,12 +19,17 @@ package org.apache.doris.statistics; import org.apache.doris.analysis.TableSample; import org.apache.doris.catalog.Column; +import org.apache.doris.catalog.DatabaseIf; import org.apache.doris.catalog.PrimitiveType; import org.apache.doris.catalog.external.HMSExternalTable; import org.apache.doris.common.Pair; +import org.apache.doris.datasource.CatalogIf; import org.apache.doris.statistics.util.StatisticsUtil; +import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableSet; import com.google.common.collect.Lists; +import mockit.Expectations; import mockit.Mock; import mockit.MockUp; import mockit.Mocked; @@ -32,7 +37,9 @@ import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.Test; import java.util.ArrayList; +import java.util.Arrays; import java.util.List; +import java.util.Set; public class HMSAnalysisTaskTest { @@ -160,4 +167,148 @@ public class HMSAnalysisTaskTest { Assertions.assertEquals(0, info2.second); } + @Test + public void testGetSampleInfoPercent(@Mocked HMSExternalTable tableIf) + throws Exception { + new MockUp() { + @Mock + public List getChunkSizes() { + return Arrays.asList(1024L, 2048L); + } + }; + HMSAnalysisTask task = new HMSAnalysisTask(); + task.setTable(tableIf); + AnalysisInfoBuilder analysisInfoBuilder = new AnalysisInfoBuilder(); + analysisInfoBuilder.setJobType(AnalysisInfo.JobType.MANUAL); + analysisInfoBuilder.setAnalysisMethod(AnalysisInfo.AnalysisMethod.SAMPLE); + analysisInfoBuilder.setSamplePercent(10); + task.info = analysisInfoBuilder.build(); + + task.tableSample = new TableSample(true, 10L); + Pair info = task.getSampleInfo(); + Assertions.assertEquals(1.5, info.first); + Assertions.assertEquals(2048, info.second); + } + + @Test + public void testOrdinaryStats(@Mocked CatalogIf catalogIf, @Mocked DatabaseIf databaseIf, @Mocked HMSExternalTable tableIf) + throws Exception { + + new Expectations() { + { + tableIf.getId(); + result = 30001; + tableIf.getName(); + result = "test"; + catalogIf.getId(); + result = 10001; + catalogIf.getName(); + result = "hms"; + databaseIf.getId(); + result = 20001; + databaseIf.getFullName(); + result = "default"; + } + }; + + new MockUp() { + @Mock + public Set getPartitionNames() { + return ImmutableSet.of("date=20230101/hour=12"); + } + }; + + new MockUp() { + @Mock + public void runQuery(String sql) { + Assertions.assertEquals("SELECT CONCAT(30001, '-', -1, '-', 'hour') AS `id`," + + " 10001 AS `catalog_id`," + + " 20001 AS `db_id`," + + " 30001 AS `tbl_id`," + + " -1 AS `idx_id`," + + " 'hour' AS `col_id`," + + " NULL AS `part_id`," + + " COUNT(1) AS `row_count`," + + " NDV(`hour`) AS `ndv`," + + " COUNT(1) - COUNT(`hour`) AS `null_count`," + + " SUBSTRING(CAST(MIN(`hour`) AS STRING), 1, 1024) AS `min`," + + " SUBSTRING(CAST(MAX(`hour`) AS STRING), 1, 1024) AS `max`," + + " COUNT(1) * 4 AS `data_size`," + + " NOW() AS `update_time`" + + " FROM `hms`.`default`.`test`", sql); + } + }; + + HMSAnalysisTask task = new HMSAnalysisTask(); + task.col = new Column("hour", PrimitiveType.INT); + task.tbl = tableIf; + task.catalog = catalogIf; + task.db = databaseIf; + task.setTable(tableIf); + + AnalysisInfoBuilder analysisInfoBuilder = new AnalysisInfoBuilder(); + analysisInfoBuilder.setColName("hour"); + analysisInfoBuilder.setJobType(AnalysisInfo.JobType.MANUAL); + analysisInfoBuilder.setUsingSqlForPartitionColumn(true); + task.info = analysisInfoBuilder.build(); + + task.getTableColumnStats(); + } + + + @Test + public void testPartitionHMSStats(@Mocked CatalogIf catalogIf, @Mocked DatabaseIf databaseIf, @Mocked HMSExternalTable tableIf) + throws Exception { + + new Expectations() { + { + tableIf.getId(); + result = 30001; + catalogIf.getId(); + result = 10001; + catalogIf.getName(); + result = "hms"; + databaseIf.getId(); + result = 20001; + } + }; + + new MockUp() { + @Mock + public Set getPartitionNames() { + return ImmutableSet.of("date=20230101/hour=12"); + } + + @Mock + public List getPartitionColumns() { + return ImmutableList.of(new Column("hour", PrimitiveType.INT)); + } + }; + + new MockUp() { + @Mock + public void runQuery(String sql) { + Assertions.assertEquals(" SELECT CONCAT(30001, '-', -1, '-', 'hour') AS `id`, " + + "10001 AS `catalog_id`, 20001 AS `db_id`, 30001 AS `tbl_id`, -1 AS `idx_id`, " + + "'hour' AS `col_id`, NULL AS `part_id`, 0 AS `row_count`, 1 AS `ndv`, " + + "0 AS `null_count`, SUBSTRING(CAST('12' AS STRING), 1, 1024) AS `min`, " + + "SUBSTRING(CAST('12' AS STRING), 1, 1024) AS `max`, 0 AS `data_size`, NOW() ", sql); + } + }; + + HMSAnalysisTask task = new HMSAnalysisTask(); + task.col = new Column("hour", PrimitiveType.INT); + task.tbl = tableIf; + task.catalog = catalogIf; + task.db = databaseIf; + task.setTable(tableIf); + + AnalysisInfoBuilder analysisInfoBuilder = new AnalysisInfoBuilder(); + analysisInfoBuilder.setColName("hour"); + analysisInfoBuilder.setJobType(AnalysisInfo.JobType.MANUAL); + analysisInfoBuilder.setUsingSqlForPartitionColumn(false); + task.info = analysisInfoBuilder.build(); + + task.getTableColumnStats(); + } }