From d8e53da764e3ba2810e8fbd5ca1c6dfce65dde5b Mon Sep 17 00:00:00 2001 From: ElvinWei Date: Tue, 18 Oct 2022 16:34:01 +0800 Subject: [PATCH] [feature-wip](statistics) collect statistics by sampling sql-tasks (#13399) 1. Collect statistics by sampling sql-tasks. 2. Consolidate statistics SQL statements and remove redundant statements. --- .../doris/statistics/SQLStatisticsTask.java | 26 +- .../statistics/SampleSQLStatisticsTask.java | 25 +- .../statistics/util/InternalSqlTemplate.java | 235 +++------ .../SampleSQLStatisticsTaskTest.java | 205 ++++++++ .../util/InternalSqlTemplateTest.java | 463 +++++++----------- 5 files changed, 490 insertions(+), 464 deletions(-) create mode 100644 fe/fe-core/src/test/java/org/apache/doris/statistics/SampleSQLStatisticsTaskTest.java diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/SQLStatisticsTask.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/SQLStatisticsTask.java index 741803ca88..e12a04b3f2 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/SQLStatisticsTask.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/SQLStatisticsTask.java @@ -28,6 +28,7 @@ import org.apache.doris.statistics.util.InternalQuery; import org.apache.doris.statistics.util.InternalQueryResult; import org.apache.doris.statistics.util.InternalQueryResult.ResultRow; import org.apache.doris.statistics.util.InternalSqlTemplate; +import org.apache.doris.statistics.util.InternalSqlTemplate.QueryType; import com.google.common.collect.Lists; import com.google.common.collect.Maps; @@ -40,7 +41,9 @@ import java.util.Map; * The results of the query will be returned as @StatisticsTaskResult. */ public class SQLStatisticsTask extends StatisticsTask { - private String statement; + protected QueryType queryType = QueryType.FULL; + + protected String statement; public SQLStatisticsTask(long jobId, List statsDescs) { super(jobId, statsDescs); @@ -55,6 +58,7 @@ public class SQLStatisticsTask extends StatisticsTask { statement = constructQuery(statsDesc); TaskResult taskResult = executeQuery(statsDesc); taskResults.add(taskResult); + LOG.info("Collected statistics successfully by SQL: {}", statement); } return new StatisticsTaskResult(taskResults); @@ -63,6 +67,7 @@ public class SQLStatisticsTask extends StatisticsTask { protected String constructQuery(StatisticsDesc statsDesc) throws DdlException, InvalidFormatException { Map params = getQueryParams(statsDesc); + List statsTypes = statsDesc.getStatsTypes(); StatsType type = statsTypes.get(0); @@ -72,20 +77,20 @@ public class SQLStatisticsTask extends StatisticsTask { switch (type) { case ROW_COUNT: - return nonPartitioned ? InternalSqlTemplate.buildStatsRowCountSql(params) - : InternalSqlTemplate.buildStatsPartitionRowCountSql(params); + return nonPartitioned ? InternalSqlTemplate.buildStatsRowCountSql(params, queryType) + : InternalSqlTemplate.buildStatsPartitionRowCountSql(params, queryType); case NUM_NULLS: - return nonPartitioned ? InternalSqlTemplate.buildStatsNumNullsSql(params) - : InternalSqlTemplate.buildStatsPartitionNumNullsSql(params); + return nonPartitioned ? InternalSqlTemplate.buildStatsNumNullsSql(params, queryType) + : InternalSqlTemplate.buildStatsPartitionNumNullsSql(params, queryType); case MAX_SIZE: case AVG_SIZE: - return nonPartitioned ? InternalSqlTemplate.buildStatsMaxAvgSizeSql(params) - : InternalSqlTemplate.buildStatsPartitionMaxAvgSizeSql(params); + return nonPartitioned ? InternalSqlTemplate.buildStatsMaxAvgSizeSql(params, queryType) + : InternalSqlTemplate.buildStatsPartitionMaxAvgSizeSql(params, queryType); case NDV: case MAX_VALUE: case MIN_VALUE: - return nonPartitioned ? InternalSqlTemplate.buildStatsMinMaxNdvValueSql(params) - : InternalSqlTemplate.buildStatsPartitionMinMaxNdvValueSql(params); + return nonPartitioned ? InternalSqlTemplate.buildStatsMinMaxNdvValueSql(params, queryType) + : InternalSqlTemplate.buildStatsPartitionMinMaxNdvValueSql(params, queryType); case DATA_SIZE: default: throw new DdlException("Unsupported statistics type: " + type); @@ -122,7 +127,7 @@ public class SQLStatisticsTask extends StatisticsTask { + statement + " queryResult: " + queryResult); } - private Map getQueryParams(StatisticsDesc statsDesc) throws DdlException { + protected Map getQueryParams(StatisticsDesc statsDesc) throws DdlException { StatsCategory category = statsDesc.getStatsCategory(); Database db = Env.getCurrentInternalCatalog().getDbOrDdlException(category.getDbId()); Table table = db.getTableOrDdlException(category.getTableId()); @@ -131,6 +136,7 @@ public class SQLStatisticsTask extends StatisticsTask { params.put(InternalSqlTemplate.TABLE, table.getName()); params.put(InternalSqlTemplate.PARTITION, category.getPartitionName()); params.put(InternalSqlTemplate.COLUMN, category.getColumnName()); + return params; } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/SampleSQLStatisticsTask.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/SampleSQLStatisticsTask.java index 89ac522927..9cf3dd365d 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/SampleSQLStatisticsTask.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/SampleSQLStatisticsTask.java @@ -18,19 +18,32 @@ package org.apache.doris.statistics; import org.apache.doris.common.Config; +import org.apache.doris.common.DdlException; +import org.apache.doris.statistics.util.InternalSqlTemplate; +import org.apache.doris.statistics.util.InternalSqlTemplate.QueryType; import java.util.List; +import java.util.Map; -/* -The @SampleSQLStatisticsTask is also a statistical task that executes a query -and uses the query result as a statistical value (same as @SQLStatisticsTask). -The only difference from the SQLStatisticsTask is that the query is a sampling table query. +/** + * The @SampleSQLStatisticsTask is also a statistical task that executes a query + * and uses the query result as a statistical value (same as @SQLStatisticsTask). + * The only difference from the SQLStatisticsTask is that the query is a sampling table query. */ public class SampleSQLStatisticsTask extends SQLStatisticsTask { - private float samplePercentage = Config.cbo_default_sample_percentage; + // TODO(wzt): If the job configuration has percentage value, obtain from the job, + // if not, use the default value. + private int samplePercentage = Config.cbo_default_sample_percentage; public SampleSQLStatisticsTask(long jobId, List statsDescs) { - // TODO(wzt): implement sql sampling to collect statistics super(jobId, statsDescs); + queryType = QueryType.SAMPLE; + } + + @Override + protected Map getQueryParams(StatisticsDesc statsDesc) throws DdlException { + Map params = super.getQueryParams(statsDesc); + params.put(InternalSqlTemplate.PERCENT, String.valueOf(samplePercentage)); + return params; } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/util/InternalSqlTemplate.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/util/InternalSqlTemplate.java index 33a26c4ce1..207b560bb3 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/util/InternalSqlTemplate.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/util/InternalSqlTemplate.java @@ -35,50 +35,62 @@ import java.util.regex.Pattern; * - ${column} and ${table} will be replaced with the actual executed table and column. */ public class InternalSqlTemplate { + /** Sample query or full query of statistics. */ + public enum QueryType { + FULL, + SAMPLE + } + /** common parameters: tableName, columnName, partitionName */ public static final String TABLE = "table"; public static final String PARTITION = "partition"; public static final String COLUMN = "column"; + public static final String PERCENT = "percent"; /** -------------------------- for statistics begin -------------------------- */ - public static final String MIN_VALUE_SQL = "SELECT MIN(${column}) AS min_value FROM ${table};"; - public static final String PARTITION_MIN_VALUE_SQL = "SELECT MIN(${column}) AS min_value" - + " FROM ${table} PARTITION (${partition});"; - - public static final String MAX_VALUE_SQL = "SELECT MAX(${column}) AS max_value FROM ${table};"; - public static final String PARTITION_MAX_VALUE_SQL = "SELECT MAX(${column}) AS max_value FROM" - + " ${table} PARTITION (${partition});"; - - public static final String NDV_VALUE_SQL = "SELECT NDV(${column}) AS ndv FROM ${table};"; - public static final String PARTITION_NDV_VALUE_SQL = "SELECT NDV(${column}) AS ndv FROM" - + " ${table} PARTITION (${partition});"; - public static final String MIN_MAX_NDV_VALUE_SQL = "SELECT MIN(${column}) AS min_value, MAX(${column})" + " AS max_value, NDV(${column}) AS ndv FROM ${table};"; public static final String PARTITION_MIN_MAX_NDV_VALUE_SQL = "SELECT MIN(${column}) AS min_value," - + " MAX(${column}) AS max_value, NDV(${column}) AS ndv FROM ${table} PARTITION (${partition});"; + + " MAX(${column}) AS max_value, NDV(${column}) AS ndv FROM ${table} PARTITIONS (${partition});"; public static final String ROW_COUNT_SQL = "SELECT COUNT(1) AS row_count FROM ${table};"; public static final String PARTITION_ROW_COUNT_SQL = "SELECT COUNT(1) AS row_count FROM ${table} PARTITION" + " (${partition});"; - public static final String MAX_SIZE_SQL = "SELECT MAX(LENGTH(${column})) AS max_size FROM ${table};"; - public static final String PARTITION_MAX_SIZE_SQL = "SELECT MAX(LENGTH(${column})) AS max_size FROM" - + " ${table} PARTITION (${partition});"; - - public static final String AVG_SIZE_SQL = "SELECT AVG(LENGTH(${column})) AS avg_size FROM ${table};"; - public static final String PARTITION_AVG_SIZE_SQL = "SELECT AVG(LENGTH(${column})) AS avg_size" - + " FROM ${table} PARTITION (${partition});"; - public static final String MAX_AVG_SIZE_SQL = "SELECT MAX(LENGTH(${column})) AS max_size," + " AVG(LENGTH(${column})) AS avg_size FROM ${table};"; public static final String PARTITION_MAX_AVG_SIZE_SQL = "SELECT MAX(LENGTH(${column}))" - + " AS max_size, AVG(LENGTH(${column})) AS avg_size FROM ${table} PARTITION (${partition});"; + + " AS max_size, AVG(LENGTH(${column})) AS avg_size FROM ${table} PARTITIONS (${partition});"; public static final String NUM_NULLS_SQL = "SELECT COUNT(1) AS num_nulls FROM ${table}" + " WHERE ${column} IS NULL;"; public static final String PARTITION_NUM_NULLS_SQL = "SELECT COUNT(1) AS num_nulls FROM" - + " ${table} PARTITION (${partition}) WHERE ${column} IS NULL;"; + + " ${table} PARTITIONS (${partition}) WHERE ${column} IS NULL;"; + + // Sample SQL + public static final String SAMPLE_MIN_MAX_NDV_VALUE_SQL = "SELECT MIN(${column}) AS min_value, MAX(${column})" + + " AS max_value, NDV(${column}) AS ndv FROM ${table} TABLESAMPLE(${percent} PERCENT);"; + public static final String SAMPLE_PARTITION_MIN_MAX_NDV_VALUE_SQL = "SELECT MIN(${column}) AS min_value," + + " MAX(${column}) AS max_value, NDV(${column}) AS ndv FROM ${table} PARTITIONS (${partition})" + + " TABLESAMPLE(${percent} PERCENT);"; + + public static final String SAMPLE_ROW_COUNT_SQL = "SELECT COUNT(1) AS row_count FROM ${table}" + + " TABLESAMPLE(${percent} PERCENT);"; + public static final String SAMPLE_PARTITION_ROW_COUNT_SQL = "SELECT COUNT(1) AS row_count FROM ${table}" + + " PARTITIONS (${partition}) TABLESAMPLE(${percent} PERCENT);"; + + public static final String SAMPLE_MAX_AVG_SIZE_SQL = "SELECT MAX(LENGTH(${column})) AS max_size," + + " AVG(LENGTH(${column})) AS avg_size FROM ${table} TABLESAMPLE(${percent} PERCENT);"; + public static final String SAMPLE_PARTITION_MAX_AVG_SIZE_SQL = "SELECT MAX(LENGTH(${column}))" + + " AS max_size, AVG(LENGTH(${column})) AS avg_size FROM ${table} PARTITIONS (${partition})" + + " TABLESAMPLE(${percent} PERCENT);"; + + + public static final String SAMPLE_NUM_NULLS_SQL = "SELECT COUNT(1) AS num_nulls FROM ${table}" + + " TABLESAMPLE(${percent} PERCENT) WHERE ${column} IS NULL;"; + public static final String SAMPLE_PARTITION_NUM_NULLS_SQL = "SELECT COUNT(1) AS num_nulls FROM" + + " ${table} PARTITIONS (${partition}) TABLESAMPLE(${percent} PERCENT) WHERE ${column} IS NULL;"; + /** ---------------------------- for statistics end ---------------------------- */ private static final Logger LOG = LogManager.getLogger(InternalSqlTemplate.class); @@ -97,7 +109,12 @@ public class InternalSqlTemplate { * @param params k,v parameter, if without parameter, params should be null * @return SQL statement with parameters concatenated */ - public static String processTemplate(String template, Map params) { + public static String processTemplate(String template, Map params) throws InvalidFormatException { + Set requiredParams = getTemplateParams(template); + if (!checkParams(requiredParams, params)) { + throw new InvalidFormatException("Wrong parameter format. need params: " + requiredParams); + } + Matcher matcher = PATTERN.matcher(template); StringBuffer sb = new StringBuffer(); @@ -113,167 +130,69 @@ public class InternalSqlTemplate { return sb.toString(); } - public static String buildStatsMinValueSql(Map params) throws InvalidFormatException { - Set requiredParams = getTemplateParams(MIN_VALUE_SQL); - if (checkParams(requiredParams, params)) { - return processTemplate(MIN_VALUE_SQL, params); - } else { - throw new InvalidFormatException("Wrong parameter format. need params: " + requiredParams); - } - } - - public static String buildStatsPartitionMinValueSql(Map params) throws InvalidFormatException { - Set requiredParams = getTemplateParams(PARTITION_MIN_VALUE_SQL); - if (checkParams(requiredParams, params)) { - return processTemplate(PARTITION_MIN_VALUE_SQL, params); - } else { - throw new InvalidFormatException("Wrong parameter format. need params: " + requiredParams); - } - } - - public static String buildStatsMaxValueSql(Map params) throws InvalidFormatException { - Set requiredParams = getTemplateParams(MAX_VALUE_SQL); - if (checkParams(requiredParams, params)) { - return processTemplate(MAX_VALUE_SQL, params); - } else { - throw new InvalidFormatException("Wrong parameter format. need params: " + requiredParams); - } - } - - public static String buildStatsPartitionMaxValueSql(Map params) throws InvalidFormatException { - Set requiredParams = getTemplateParams(PARTITION_MAX_VALUE_SQL); - if (checkParams(requiredParams, params)) { - return processTemplate(PARTITION_MAX_VALUE_SQL, params); - } else { - throw new InvalidFormatException("Wrong parameter format. need params: " + requiredParams); - } - } - - public static String buildStatsNdvValueSql(Map params) throws InvalidFormatException { - Set requiredParams = getTemplateParams(NDV_VALUE_SQL); - if (checkParams(requiredParams, params)) { - return processTemplate(NDV_VALUE_SQL, params); - } else { - throw new InvalidFormatException("Wrong parameter format. need params: " + requiredParams); - } - } - - public static String buildStatsPartitionNdvValueSql(Map params) throws InvalidFormatException { - Set requiredParams = getTemplateParams(PARTITION_NDV_VALUE_SQL); - if (checkParams(requiredParams, params)) { - return processTemplate(PARTITION_NDV_VALUE_SQL, params); - } else { - throw new InvalidFormatException("Wrong parameter format. need params: " + requiredParams); - } - } - - public static String buildStatsMinMaxNdvValueSql(Map params) throws InvalidFormatException { - Set requiredParams = getTemplateParams(MIN_MAX_NDV_VALUE_SQL); - if (checkParams(requiredParams, params)) { - return processTemplate(MIN_MAX_NDV_VALUE_SQL, params); - } else { - throw new InvalidFormatException("Wrong parameter format. need params: " + requiredParams); - } - } - - public static String buildStatsPartitionMinMaxNdvValueSql(Map params) + public static String buildStatsMinMaxNdvValueSql(Map params, QueryType queryType) throws InvalidFormatException { - Set requiredParams = getTemplateParams(PARTITION_MIN_MAX_NDV_VALUE_SQL); - if (checkParams(requiredParams, params)) { + if (queryType == QueryType.FULL) { + return processTemplate(MIN_MAX_NDV_VALUE_SQL, params); + } + return processTemplate(SAMPLE_MIN_MAX_NDV_VALUE_SQL, params); + } + + public static String buildStatsPartitionMinMaxNdvValueSql(Map params, QueryType queryType) + throws InvalidFormatException { + if (queryType == QueryType.FULL) { return processTemplate(PARTITION_MIN_MAX_NDV_VALUE_SQL, params); - } else { - throw new InvalidFormatException("Wrong parameter format. need params: " + requiredParams); } + return processTemplate(SAMPLE_PARTITION_MIN_MAX_NDV_VALUE_SQL, params); } - public static String buildStatsRowCountSql(Map params) throws InvalidFormatException { - Set requiredParams = getTemplateParams(ROW_COUNT_SQL); - if (checkParams(requiredParams, params)) { + public static String buildStatsRowCountSql(Map params, QueryType queryType) + throws InvalidFormatException { + if (queryType == QueryType.FULL) { return processTemplate(ROW_COUNT_SQL, params); - } else { - throw new InvalidFormatException("Wrong parameter format. need params: " + requiredParams); } + return processTemplate(SAMPLE_ROW_COUNT_SQL, params); } - public static String buildStatsPartitionRowCountSql(Map params) throws InvalidFormatException { - Set requiredParams = getTemplateParams(PARTITION_ROW_COUNT_SQL); - if (checkParams(requiredParams, params)) { + public static String buildStatsPartitionRowCountSql(Map params, QueryType queryType) + throws InvalidFormatException { + if (queryType == QueryType.FULL) { return processTemplate(PARTITION_ROW_COUNT_SQL, params); - } else { - throw new InvalidFormatException("Wrong parameter format. need params: " + requiredParams); } + return processTemplate(SAMPLE_PARTITION_ROW_COUNT_SQL, params); } - public static String buildStatsMaxSizeSql(Map params) throws InvalidFormatException { - Set requiredParams = getTemplateParams(MAX_SIZE_SQL); - if (checkParams(requiredParams, params)) { - return processTemplate(MAX_SIZE_SQL, params); - } else { - throw new InvalidFormatException("Wrong parameter format. need params: " + requiredParams); - } - } - - public static String buildStatsPartitionMaxSizeSql(Map params) throws InvalidFormatException { - Set requiredParams = getTemplateParams(PARTITION_MAX_SIZE_SQL); - if (checkParams(requiredParams, params)) { - return processTemplate(PARTITION_MAX_SIZE_SQL, params); - } else { - throw new InvalidFormatException("Wrong parameter format. need params: " + requiredParams); - } - } - - public static String buildStatsAvgSizeSql(Map params) throws InvalidFormatException { - Set requiredParams = getTemplateParams(AVG_SIZE_SQL); - if (checkParams(requiredParams, params)) { - return processTemplate(AVG_SIZE_SQL, params); - } else { - throw new InvalidFormatException("Wrong parameter format. need params: " + requiredParams); - } - } - - public static String buildStatsPartitionAvgSizeSql(Map params) throws InvalidFormatException { - Set requiredParams = getTemplateParams(PARTITION_AVG_SIZE_SQL); - if (checkParams(requiredParams, params)) { - return processTemplate(PARTITION_AVG_SIZE_SQL, params); - } else { - throw new InvalidFormatException("Wrong parameter format. need params: " + requiredParams); - } - } - - public static String buildStatsMaxAvgSizeSql(Map params) throws InvalidFormatException { - Set requiredParams = getTemplateParams(MAX_AVG_SIZE_SQL); - if (checkParams(requiredParams, params)) { + public static String buildStatsMaxAvgSizeSql(Map params, QueryType queryType) + throws InvalidFormatException { + if (queryType == QueryType.FULL) { return processTemplate(MAX_AVG_SIZE_SQL, params); - } else { - throw new InvalidFormatException("Wrong parameter format. need params: " + requiredParams); } + return processTemplate(SAMPLE_MAX_AVG_SIZE_SQL, params); } - public static String buildStatsPartitionMaxAvgSizeSql(Map params) throws InvalidFormatException { - Set requiredParams = getTemplateParams(PARTITION_MAX_AVG_SIZE_SQL); - if (checkParams(requiredParams, params)) { + // SAMPLE_PARTITION_MAX_AVG_SIZE_SQL + public static String buildStatsPartitionMaxAvgSizeSql(Map params, QueryType queryType) + throws InvalidFormatException { + if (queryType == QueryType.FULL) { return processTemplate(PARTITION_MAX_AVG_SIZE_SQL, params); - } else { - throw new InvalidFormatException("Wrong parameter format. need params: " + requiredParams); } + return processTemplate(SAMPLE_PARTITION_MAX_AVG_SIZE_SQL, params); } - public static String buildStatsNumNullsSql(Map params) throws InvalidFormatException { - Set requiredParams = getTemplateParams(NUM_NULLS_SQL); - if (checkParams(requiredParams, params)) { + public static String buildStatsNumNullsSql(Map params, QueryType queryType) + throws InvalidFormatException { + if (queryType == QueryType.FULL) { return processTemplate(NUM_NULLS_SQL, params); - } else { - throw new InvalidFormatException("Wrong parameter format. need params: " + requiredParams); } + return processTemplate(SAMPLE_NUM_NULLS_SQL, params); } - public static String buildStatsPartitionNumNullsSql(Map params) throws InvalidFormatException { - Set requiredParams = getTemplateParams(PARTITION_NUM_NULLS_SQL); - if (checkParams(requiredParams, params)) { + public static String buildStatsPartitionNumNullsSql(Map params, QueryType queryType) + throws InvalidFormatException { + if (queryType == QueryType.FULL) { return processTemplate(PARTITION_NUM_NULLS_SQL, params); - } else { - throw new InvalidFormatException("Wrong parameter format. need params: " + requiredParams); } + return processTemplate(SAMPLE_PARTITION_NUM_NULLS_SQL, params); } private static Set getTemplateParams(String template) { diff --git a/fe/fe-core/src/test/java/org/apache/doris/statistics/SampleSQLStatisticsTaskTest.java b/fe/fe-core/src/test/java/org/apache/doris/statistics/SampleSQLStatisticsTaskTest.java new file mode 100644 index 0000000000..176d5eee0d --- /dev/null +++ b/fe/fe-core/src/test/java/org/apache/doris/statistics/SampleSQLStatisticsTaskTest.java @@ -0,0 +1,205 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.statistics; + +import org.apache.doris.catalog.Column; +import org.apache.doris.catalog.Database; +import org.apache.doris.catalog.Env; +import org.apache.doris.catalog.HashDistributionInfo; +import org.apache.doris.catalog.KeysType; +import org.apache.doris.catalog.OlapTable; +import org.apache.doris.catalog.PartitionInfo; +import org.apache.doris.catalog.PrimitiveType; +import org.apache.doris.common.DdlException; +import org.apache.doris.common.jmockit.Deencapsulation; +import org.apache.doris.datasource.InternalCatalog; +import org.apache.doris.statistics.StatsCategory.Category; +import org.apache.doris.statistics.StatsGranularity.Granularity; +import org.apache.doris.statistics.util.InternalQuery; +import org.apache.doris.statistics.util.InternalQueryResult; + +import mockit.Mock; +import mockit.MockUp; +import org.junit.Assert; +import org.junit.Before; +import org.junit.Test; + +import java.util.Arrays; +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.concurrent.ConcurrentHashMap; + +public class SampleSQLStatisticsTaskTest { + + private SampleSQLStatisticsTask sampleSQLStatisticsTaskUnderTest; + + @Before + public void setUp() throws Exception { + InternalCatalog catalog = Env.getCurrentInternalCatalog(); + Column column = new Column("columnName", PrimitiveType.STRING); + OlapTable tableName = new OlapTable(0L, "tableName", + Collections.singletonList(column), KeysType.AGG_KEYS, + new PartitionInfo(), new HashDistributionInfo()); + Database database = new Database(0L, "db"); + database.createTable(tableName); + + ConcurrentHashMap fullNameToDb = new ConcurrentHashMap<>(); + fullNameToDb.put("cluster:db", database); + Deencapsulation.setField(catalog, "fullNameToDb", fullNameToDb); + + ConcurrentHashMap idToDb = new ConcurrentHashMap<>(); + idToDb.put(0L, database); + Deencapsulation.setField(catalog, "idToDb", idToDb); + + List columns = Collections.singletonList("row_count"); + List types = Arrays.asList(PrimitiveType.STRING, + PrimitiveType.INT, PrimitiveType.FLOAT, + PrimitiveType.DOUBLE, PrimitiveType.BIGINT); + InternalQueryResult queryResult = new InternalQueryResult(); + InternalQueryResult.ResultRow resultRow = + new InternalQueryResult.ResultRow(columns, types, Collections.singletonList("1000")); + queryResult.getResultRows().add(resultRow); + + StatsCategory statsCategory = new StatsCategory(); + StatsGranularity statsGranularity = new StatsGranularity(); + List statsTypes = Collections.singletonList(StatsType.ROW_COUNT); + sampleSQLStatisticsTaskUnderTest = new SampleSQLStatisticsTask(0L, + Collections.singletonList(new StatisticsDesc(statsCategory, statsGranularity, statsTypes))); + + new MockUp(InternalQuery.class) { + @Mock + public InternalQueryResult query() { + return queryResult; + } + }; + } + + @Test + public void testGetQueryParams() throws Exception { + // Setup + Map expectedResult = new HashMap<>(); + expectedResult.put("table", "tableName"); + expectedResult.put("partition", "partitionName"); + expectedResult.put("column", "columnName"); + expectedResult.put("percent", "10"); + + StatsCategory category = new StatsCategory(); + category.setCategory(Category.TABLE); + category.setDbId(0L); + category.setTableId(0L); + category.setPartitionName("partitionName"); + category.setColumnName("columnName"); + category.setStatsValue("statsValue"); + + StatsGranularity statsGranularity = new StatsGranularity(); + statsGranularity.setGranularity(Granularity.TABLE); + statsGranularity.setTableId(0L); + statsGranularity.setPartitionId(0L); + statsGranularity.setTabletId(0L); + + StatisticsDesc statsDesc = new StatisticsDesc(category, statsGranularity, + Collections.singletonList(StatsType.ROW_COUNT)); + + // Run the test + Map result = sampleSQLStatisticsTaskUnderTest.getQueryParams(statsDesc); + + // Verify the results + Assert.assertEquals(expectedResult, result); + } + + @Test + public void testGetQueryParams_ThrowsDdlException() { + // Setup + StatsCategory category = new StatsCategory(); + category.setCategory(Category.TABLE); + category.setDbId(-1L); + category.setTableId(0L); + category.setPartitionName("partitionName"); + category.setColumnName("columnName"); + category.setStatsValue("statsValue"); + + StatsGranularity statsGranularity = new StatsGranularity(); + statsGranularity.setGranularity(Granularity.PARTITION); + statsGranularity.setTableId(0L); + statsGranularity.setPartitionId(0L); + statsGranularity.setTabletId(0L); + + StatisticsDesc statsDesc = new StatisticsDesc(category, statsGranularity, + Collections.singletonList(StatsType.ROW_COUNT)); + + // Run the test + Assert.assertThrows(DdlException.class, + () -> sampleSQLStatisticsTaskUnderTest.getQueryParams(statsDesc)); + } + + @Test + public void testConstructQuery() throws Exception { + // Setup + String expectedSQL = "SELECT COUNT(1) AS row_count FROM tableName TABLESAMPLE(10 PERCENT);"; + + StatsCategory statsCategory = new StatsCategory(); + statsCategory.setCategory(StatsCategory.Category.TABLE); + statsCategory.setDbId(0L); + statsCategory.setTableId(0L); + statsCategory.setPartitionName("partitionName"); + statsCategory.setColumnName("columnName"); + statsCategory.setStatsValue("statsValue"); + + StatsGranularity statsGranularity = new StatsGranularity(); + statsGranularity.setGranularity(StatsGranularity.Granularity.TABLE); + statsGranularity.setTableId(0L); + statsGranularity.setPartitionId(0L); + statsGranularity.setTabletId(0L); + + StatisticsDesc statsDesc = new StatisticsDesc(statsCategory, statsGranularity, + Collections.singletonList(StatsType.ROW_COUNT)); + + // Run the test + String result = sampleSQLStatisticsTaskUnderTest.constructQuery(statsDesc); + + // Verify the results + Assert.assertEquals(expectedSQL, result); + } + + @Test + public void testExecuteQuery_ThrowsException() { + // Setup + StatsGranularity statsGranularity = new StatsGranularity(); + statsGranularity.setGranularity(StatsGranularity.Granularity.TABLE); + statsGranularity.setTableId(0L); + statsGranularity.setPartitionId(0L); + statsGranularity.setTabletId(0L); + + StatsCategory statsCategory = new StatsCategory(); + statsCategory.setCategory(StatsCategory.Category.TABLE); + statsCategory.setDbId(0L); + statsCategory.setTableId(0L); + statsCategory.setPartitionName("partitionName"); + statsCategory.setColumnName("columnName"); + statsCategory.setStatsValue("statsValue"); + + StatisticsDesc statsDesc = new StatisticsDesc(statsCategory, statsGranularity, + Arrays.asList(StatsType.NDV, StatsType.MAX_VALUE, StatsType.MIN_VALUE)); + + // Run the test + Assert.assertThrows(Exception.class, + () -> sampleSQLStatisticsTaskUnderTest.executeQuery(statsDesc)); + } +} diff --git a/fe/fe-core/src/test/java/org/apache/doris/statistics/util/InternalSqlTemplateTest.java b/fe/fe-core/src/test/java/org/apache/doris/statistics/util/InternalSqlTemplateTest.java index 4923986caa..5e7fa281dc 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/statistics/util/InternalSqlTemplateTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/statistics/util/InternalSqlTemplateTest.java @@ -18,6 +18,7 @@ package org.apache.doris.statistics.util; import org.apache.doris.common.InvalidFormatException; +import org.apache.doris.statistics.util.InternalSqlTemplate.QueryType; import org.junit.Assert; import org.junit.Test; @@ -25,10 +26,11 @@ import org.junit.Test; import java.util.HashMap; import java.util.Map; + public class InternalSqlTemplateTest { @Test - public void testProcessTemplate() { + public void testProcessTemplate() throws InvalidFormatException { // Setup String template = "SELECT * FROM ${table} WHERE id = ${id};"; String expectSQL = "SELECT * FROM table0 WHERE id = 123;"; @@ -45,170 +47,16 @@ public class InternalSqlTemplateTest { } @Test - public void testBuildStatsMinValueSql() throws Exception { + public void testProcessTemplate_ThrowsInvalidFormatException() { // Setup - String expectSQL = "SELECT MIN(column0) AS min_value FROM table0;"; + String template = "SELECT * FROM ${table} WHERE id = ${id};"; Map params = new HashMap<>(); params.put("table", "table0"); - params.put("column", "column0"); - - // Run the test - String result = InternalSqlTemplate.buildStatsMinValueSql(params); - - // Verify the results - Assert.assertEquals(expectSQL, result); - } - - @Test - public void testBuildStatsMinValueSql_ThrowsInvalidFormatException() { - // Setup - Map params = new HashMap<>(); - params.put("xxx", "table0"); // Run the test Assert.assertThrows(InvalidFormatException.class, - () -> InternalSqlTemplate.buildStatsMinValueSql(params)); - } - - @Test - public void testBuildStatsPartitionMinValueSql() throws Exception { - // Setup - String expectSQL = "SELECT MIN(column0) AS min_value" - + " FROM table0 PARTITION (partition0);"; - - Map params = new HashMap<>(); - params.put("table", "table0"); - params.put("column", "column0"); - params.put("partition", "partition0"); - - // Run the test - String result = InternalSqlTemplate.buildStatsPartitionMinValueSql(params); - - // Verify the results - Assert.assertEquals(expectSQL, result); - } - - @Test - public void testBuildStatsPartitionMinValueSql_ThrowsInvalidFormatException() { - // Setup - Map params = new HashMap<>(); - params.put("xxx", "table0"); - - // Run the test - Assert.assertThrows(InvalidFormatException.class, - () -> InternalSqlTemplate.buildStatsPartitionMinValueSql(params)); - } - - @Test - public void testBuildStatsMaxValueSql() throws Exception { - // Setup - String expectSQL = "SELECT MAX(column0) AS max_value FROM table0;"; - - Map params = new HashMap<>(); - params.put("table", "table0"); - params.put("column", "column0"); - - // Run the test - String result = InternalSqlTemplate.buildStatsMaxValueSql(params); - - // Verify the results - Assert.assertEquals(expectSQL, result); - } - - @Test - public void testBuildStatsMaxValueSql_ThrowsInvalidFormatException() { - // Setup - Map params = new HashMap<>(); - params.put("xxx", "table0"); - - // Run the test - Assert.assertThrows(InvalidFormatException.class, - () -> InternalSqlTemplate.buildStatsMaxValueSql(params)); - } - - @Test - public void testBuildStatsPartitionMaxValueSql() throws Exception { - // Setup - String expectSQL = "SELECT MAX(column0) AS max_value FROM" - + " table0 PARTITION (partition0);"; - - Map params = new HashMap<>(); - params.put("table", "table0"); - params.put("column", "column0"); - params.put("partition", "partition0"); - - // Run the test - String result = InternalSqlTemplate.buildStatsPartitionMaxValueSql(params); - - // Verify the results - Assert.assertEquals(expectSQL, result); - } - - @Test - public void testBuildStatsPartitionMaxValueSql_ThrowsInvalidFormatException() { - // Setup - Map params = new HashMap<>(); - params.put("xxx", "table0"); - - // Run the test - Assert.assertThrows(InvalidFormatException.class, - () -> InternalSqlTemplate.buildStatsPartitionMaxValueSql(params)); - } - - @Test - public void testBuildStatsNdvValueSql() throws Exception { - // Setup - String expectSQL = "SELECT NDV(column0) AS ndv FROM table0;"; - - Map params = new HashMap<>(); - params.put("table", "table0"); - params.put("column", "column0"); - - // Run the test - String result = InternalSqlTemplate.buildStatsNdvValueSql(params); - - // Verify the results - Assert.assertEquals(expectSQL, result); - } - - @Test - public void testBuildStatsNdvValueSql_ThrowsInvalidFormatException() { - // Setup - Map params = new HashMap<>(); - params.put("xxx", "table0"); - - // Run the test - Assert.assertThrows(InvalidFormatException.class, - () -> InternalSqlTemplate.buildStatsNdvValueSql(params)); - } - - @Test - public void testBuildStatsPartitionNdvValueSql() throws Exception { - // Setup - String expectSQL = "SELECT NDV(column0) AS ndv FROM table0 PARTITION (partition0);"; - - Map params = new HashMap<>(); - params.put("table", "table0"); - params.put("column", "column0"); - params.put("partition", "partition0"); - - // Run the test - String result = InternalSqlTemplate.buildStatsPartitionNdvValueSql(params); - - // Verify the results - Assert.assertEquals(expectSQL, result); - } - - @Test - public void testBuildStatsPartitionNdvValueSql_ThrowsInvalidFormatException() { - // Setup - Map params = new HashMap<>(); - params.put("xxx", "table0"); - - // Run the test - Assert.assertThrows(InvalidFormatException.class, - () -> InternalSqlTemplate.buildStatsPartitionNdvValueSql(params)); + () -> InternalSqlTemplate.processTemplate(template, params)); } @Test @@ -222,7 +70,25 @@ public class InternalSqlTemplateTest { params.put("column", "column0"); // Run the test - String result = InternalSqlTemplate.buildStatsMinMaxNdvValueSql(params); + String result = InternalSqlTemplate.buildStatsMinMaxNdvValueSql(params, QueryType.FULL); + + // Verify the results + Assert.assertEquals(expectSQL, result); + } + + @Test + public void testBuildStatsMinMaxNdvValueSqlBySample() throws Exception { + // Setup + String expectSQL = "SELECT MIN(column0) AS min_value, MAX(column0) AS max_value, NDV(column0) AS ndv" + + " FROM table0 TABLESAMPLE(20 PERCENT);"; + + Map params = new HashMap<>(); + params.put("table", "table0"); + params.put("column", "column0"); + params.put("percent", "20"); + + // Run the test + String result = InternalSqlTemplate.buildStatsMinMaxNdvValueSql(params, QueryType.SAMPLE); // Verify the results Assert.assertEquals(expectSQL, result); @@ -236,14 +102,14 @@ public class InternalSqlTemplateTest { // Run the test Assert.assertThrows(InvalidFormatException.class, - () -> InternalSqlTemplate.buildStatsMinMaxNdvValueSql(params)); + () -> InternalSqlTemplate.buildStatsMinMaxNdvValueSql(params, QueryType.FULL)); } @Test public void testBuildStatsPartitionMinMaxNdvValueSql() throws Exception { // Setup - String expectSQL = "SELECT MIN(column0) AS min_value, MAX(column0) AS max_value, " - + "NDV(column0) AS ndv FROM table0 PARTITION (partition0);"; + String expectSQL = "SELECT MIN(column0) AS min_value, MAX(column0) AS max_value, NDV(column0) AS ndv" + + " FROM table0 PARTITIONS (partition0);"; Map params = new HashMap<>(); params.put("table", "table0"); @@ -251,7 +117,26 @@ public class InternalSqlTemplateTest { params.put("partition", "partition0"); // Run the test - String result = InternalSqlTemplate.buildStatsPartitionMinMaxNdvValueSql(params); + String result = InternalSqlTemplate.buildStatsPartitionMinMaxNdvValueSql(params, QueryType.FULL); + + // Verify the results + Assert.assertEquals(expectSQL, result); + } + + @Test + public void testBuildStatsPartitionMinMaxNdvValueSqlBySample() throws Exception { + // Setup + String expectSQL = "SELECT MIN(column0) AS min_value, MAX(column0) AS max_value, NDV(column0) AS ndv" + + " FROM table0 PARTITIONS (partition0) TABLESAMPLE(20 PERCENT);"; + + Map params = new HashMap<>(); + params.put("table", "table0"); + params.put("column", "column0"); + params.put("partition", "partition0"); + params.put("percent", "20"); + + // Run the test + String result = InternalSqlTemplate.buildStatsPartitionMinMaxNdvValueSql(params, QueryType.SAMPLE); // Verify the results Assert.assertEquals(expectSQL, result); @@ -265,7 +150,7 @@ public class InternalSqlTemplateTest { // Run the test Assert.assertThrows(InvalidFormatException.class, - () -> InternalSqlTemplate.buildStatsPartitionMinMaxNdvValueSql(params)); + () -> InternalSqlTemplate.buildStatsPartitionMinMaxNdvValueSql(params, QueryType.FULL)); } @Test @@ -275,10 +160,25 @@ public class InternalSqlTemplateTest { Map params = new HashMap<>(); params.put("table", "table0"); - params.put("column", "column0"); // Run the test - String result = InternalSqlTemplate.buildStatsRowCountSql(params); + String result = InternalSqlTemplate.buildStatsRowCountSql(params, QueryType.FULL); + + // Verify the results + Assert.assertEquals(expectSQL, result); + } + + @Test + public void testBuildStatsRowCountSqlBySample() throws Exception { + // Setup + String expectSQL = "SELECT COUNT(1) AS row_count FROM table0 TABLESAMPLE(20 PERCENT);"; + + Map params = new HashMap<>(); + params.put("table", "table0"); + params.put("percent", "20"); + + // Run the test + String result = InternalSqlTemplate.buildStatsRowCountSql(params, QueryType.SAMPLE); // Verify the results Assert.assertEquals(expectSQL, result); @@ -292,7 +192,7 @@ public class InternalSqlTemplateTest { // Run the test Assert.assertThrows(InvalidFormatException.class, - () -> InternalSqlTemplate.buildStatsRowCountSql(params)); + () -> InternalSqlTemplate.buildStatsRowCountSql(params, QueryType.FULL)); } @Test @@ -305,7 +205,25 @@ public class InternalSqlTemplateTest { params.put("partition", "partition0"); // Run the test - String result = InternalSqlTemplate.buildStatsPartitionRowCountSql(params); + String result = InternalSqlTemplate.buildStatsPartitionRowCountSql(params, QueryType.FULL); + + // Verify the results + Assert.assertEquals(expectSQL, result); + } + + @Test + public void testBuildStatsPartitionRowCountSqlBySample() throws Exception { + // Setup + String expectSQL = "SELECT COUNT(1) AS row_count FROM table0" + + " PARTITIONS (partition0) TABLESAMPLE(20 PERCENT);"; + + Map params = new HashMap<>(); + params.put("table", "table0"); + params.put("partition", "partition0"); + params.put("percent", "20"); + + // Run the test + String result = InternalSqlTemplate.buildStatsPartitionRowCountSql(params, QueryType.SAMPLE); // Verify the results Assert.assertEquals(expectSQL, result); @@ -319,117 +237,7 @@ public class InternalSqlTemplateTest { // Run the test Assert.assertThrows(InvalidFormatException.class, - () -> InternalSqlTemplate.buildStatsPartitionRowCountSql(params)); - } - - @Test - public void testBuildStatsMaxSizeSql() throws Exception { - // Setup - String expectSQL = "SELECT MAX(LENGTH(column0)) AS max_size FROM table0;"; - - Map params = new HashMap<>(); - params.put("table", "table0"); - params.put("column", "column0"); - - // Run the test - String result = InternalSqlTemplate.buildStatsMaxSizeSql(params); - - // Verify the results - Assert.assertEquals(expectSQL, result); - } - - @Test - public void testBuildStatsMaxSizeSql_ThrowsInvalidFormatException() { - // Setup - Map params = new HashMap<>(); - params.put("xxx", "table0"); - - // Run the test - Assert.assertThrows(InvalidFormatException.class, - () -> InternalSqlTemplate.buildStatsMaxSizeSql(params)); - } - - @Test - public void testBuildStatsPartitionMaxSizeSql() throws Exception { - // Setup - String expectSQL = "SELECT MAX(LENGTH(column0)) AS max_size FROM table0 PARTITION (partition0);"; - - Map params = new HashMap<>(); - params.put("table", "table0"); - params.put("column", "column0"); - params.put("partition", "partition0"); - - // Run the test - String result = InternalSqlTemplate.buildStatsPartitionMaxSizeSql(params); - - // Verify the results - Assert.assertEquals(expectSQL, result); - } - - @Test - public void testBuildStatsPartitionMaxSizeSql_ThrowsInvalidFormatException() { - // Setup - Map params = new HashMap<>(); - params.put("xxx", "table0"); - - // Run the test - Assert.assertThrows(InvalidFormatException.class, - () -> InternalSqlTemplate.buildStatsPartitionMaxSizeSql(params)); - } - - @Test - public void testBuildStatsAvgSizeSql() throws Exception { - // Setup - String expectSQL = "SELECT AVG(LENGTH(column0)) AS avg_size FROM table0;"; - - Map params = new HashMap<>(); - params.put("table", "table0"); - params.put("column", "column0"); - - // Run the test - String result = InternalSqlTemplate.buildStatsAvgSizeSql(params); - - // Verify the results - Assert.assertEquals(expectSQL, result); - } - - @Test - public void testBuildStatsAvgSizeSql_ThrowsInvalidFormatException() { - // Setup - Map params = new HashMap<>(); - params.put("xxx", "table0"); - - // Run the test - Assert.assertThrows(InvalidFormatException.class, - () -> InternalSqlTemplate.buildStatsAvgSizeSql(params)); - } - - @Test - public void testBuildStatsPartitionAvgSizeSql() throws Exception { - // Setup - String expectSQL = "SELECT AVG(LENGTH(column0)) AS avg_size FROM table0 PARTITION (partition0);"; - - Map params = new HashMap<>(); - params.put("table", "table0"); - params.put("column", "column0"); - params.put("partition", "partition0"); - - // Run the test - String result = InternalSqlTemplate.buildStatsPartitionAvgSizeSql(params); - - // Verify the results - Assert.assertEquals(expectSQL, result); - } - - @Test - public void testBuildStatsPartitionAvgSizeSql_ThrowsInvalidFormatException() { - // Setup - Map params = new HashMap<>(); - params.put("xxx", "table0"); - - // Run the test - Assert.assertThrows(InvalidFormatException.class, - () -> InternalSqlTemplate.buildStatsPartitionAvgSizeSql(params)); + () -> InternalSqlTemplate.buildStatsPartitionRowCountSql(params, QueryType.FULL)); } @Test @@ -443,7 +251,25 @@ public class InternalSqlTemplateTest { params.put("column", "column0"); // Run the test - String result = InternalSqlTemplate.buildStatsMaxAvgSizeSql(params); + String result = InternalSqlTemplate.buildStatsMaxAvgSizeSql(params, QueryType.FULL); + + // Verify the results + Assert.assertEquals(expectSQL, result); + } + + @Test + public void testBuildStatsMaxAvgSizeSqlBySample() throws Exception { + // Setup + String expectSQL = "SELECT MAX(LENGTH(column0)) AS max_size, AVG(LENGTH(column0)) AS avg_size" + + " FROM table0 TABLESAMPLE(20 PERCENT);"; + + Map params = new HashMap<>(); + params.put("table", "table0"); + params.put("column", "column0"); + params.put("percent", "20"); + + // Run the test + String result = InternalSqlTemplate.buildStatsMaxAvgSizeSql(params, QueryType.SAMPLE); // Verify the results Assert.assertEquals(expectSQL, result); @@ -457,14 +283,14 @@ public class InternalSqlTemplateTest { // Run the test Assert.assertThrows(InvalidFormatException.class, - () -> InternalSqlTemplate.buildStatsMaxAvgSizeSql(params)); + () -> InternalSqlTemplate.buildStatsMaxAvgSizeSql(params, QueryType.FULL)); } @Test public void testBuildStatsPartitionMaxAvgSizeSql() throws Exception { // Setup - String expectSQL = "SELECT MAX(LENGTH(column0)) AS max_size, " - + "AVG(LENGTH(column0)) AS avg_size FROM table0 PARTITION (partition0);"; + String expectSQL = "SELECT MAX(LENGTH(column0)) AS max_size, AVG(LENGTH(column0)) AS avg_size" + + " FROM table0 PARTITIONS (partition0);"; Map params = new HashMap<>(); params.put("table", "table0"); @@ -472,7 +298,26 @@ public class InternalSqlTemplateTest { params.put("partition", "partition0"); // Run the test - String result = InternalSqlTemplate.buildStatsPartitionMaxAvgSizeSql(params); + String result = InternalSqlTemplate.buildStatsPartitionMaxAvgSizeSql(params, QueryType.FULL); + + // Verify the results + Assert.assertEquals(expectSQL, result); + } + + @Test + public void testBuildStatsPartitionMaxAvgSizeSqlBySample() throws Exception { + // Setup + String expectSQL = "SELECT MAX(LENGTH(column0)) AS max_size, AVG(LENGTH(column0)) AS avg_size" + + " FROM table0 PARTITIONS (partition0) TABLESAMPLE(20 PERCENT);"; + + Map params = new HashMap<>(); + params.put("table", "table0"); + params.put("column", "column0"); + params.put("partition", "partition0"); + params.put("percent", "20"); + + // Run the test + String result = InternalSqlTemplate.buildStatsPartitionMaxAvgSizeSql(params, QueryType.SAMPLE); // Verify the results Assert.assertEquals(expectSQL, result); @@ -486,7 +331,7 @@ public class InternalSqlTemplateTest { // Run the test Assert.assertThrows(InvalidFormatException.class, - () -> InternalSqlTemplate.buildStatsPartitionMaxAvgSizeSql(params)); + () -> InternalSqlTemplate.buildStatsPartitionMaxAvgSizeSql(params, QueryType.FULL)); } @Test @@ -499,7 +344,25 @@ public class InternalSqlTemplateTest { params.put("column", "column0"); // Run the test - String result = InternalSqlTemplate.buildStatsNumNullsSql(params); + String result = InternalSqlTemplate.buildStatsNumNullsSql(params, QueryType.FULL); + + // Verify the results + Assert.assertEquals(expectSQL, result); + } + + @Test + public void testBuildStatsNumNullsSqlBySample() throws Exception { + // Setup + String expectSQL = "SELECT COUNT(1) AS num_nulls FROM table0" + + " TABLESAMPLE(20 PERCENT) WHERE column0 IS NULL;"; + + Map params = new HashMap<>(); + params.put("table", "table0"); + params.put("column", "column0"); + params.put("percent", "20"); + + // Run the test + String result = InternalSqlTemplate.buildStatsNumNullsSql(params, QueryType.SAMPLE); // Verify the results Assert.assertEquals(expectSQL, result); @@ -513,13 +376,14 @@ public class InternalSqlTemplateTest { // Run the test Assert.assertThrows(InvalidFormatException.class, - () -> InternalSqlTemplate.buildStatsNumNullsSql(params)); + () -> InternalSqlTemplate.buildStatsNumNullsSql(params, QueryType.FULL)); } @Test public void testBuildStatsPartitionNumNullsSql() throws Exception { // Setup - String expectSQL = "SELECT COUNT(1) AS num_nulls FROM table0 PARTITION (partition0) WHERE column0 IS NULL;"; + String expectSQL = "SELECT COUNT(1) AS num_nulls FROM table0" + + " PARTITIONS (partition0) WHERE column0 IS NULL;"; Map params = new HashMap<>(); params.put("table", "table0"); @@ -527,7 +391,26 @@ public class InternalSqlTemplateTest { params.put("partition", "partition0"); // Run the test - String result = InternalSqlTemplate.buildStatsPartitionNumNullsSql(params); + String result = InternalSqlTemplate.buildStatsPartitionNumNullsSql(params, QueryType.FULL); + + // Verify the results + Assert.assertEquals(expectSQL, result); + } + + @Test + public void testBuildStatsPartitionNumNullsSqlBySample() throws Exception { + // Setup + String expectSQL = "SELECT COUNT(1) AS num_nulls" + + " FROM table0 PARTITIONS (partition0) TABLESAMPLE(20 PERCENT) WHERE column0 IS NULL;"; + + Map params = new HashMap<>(); + params.put("table", "table0"); + params.put("column", "column0"); + params.put("partition", "partition0"); + params.put("percent", "20"); + + // Run the test + String result = InternalSqlTemplate.buildStatsPartitionNumNullsSql(params, QueryType.SAMPLE); // Verify the results Assert.assertEquals(expectSQL, result); @@ -541,6 +424,6 @@ public class InternalSqlTemplateTest { // Run the test Assert.assertThrows(InvalidFormatException.class, - () -> InternalSqlTemplate.buildStatsPartitionNumNullsSql(params)); + () -> InternalSqlTemplate.buildStatsPartitionNumNullsSql(params, QueryType.FULL)); } }