[enchancement](statistics) support sampling collection of statistics (#18880)
1. Supports sampling to collect statistics
2. Improved syntax for collecting statistics
3. Support histogram specifies the number of buckets
4. Tweaked some code structure
---
The syntax supports WITH and PROPERTIES, using the same syntax as before.
Column Statistics Collection Syntax:
```SQL
ANALYZE [ SYNC ] TABLE table_name
[ (column_name [, ...]) ]
[ [WITH SYNC] | [WITH INCREMENTAL] | [WITH SAMPLE PERCENT | ROWS ] ]
[ PROPERTIES ('key' = 'value', ...) ];
```
Column histogram collection syntax:
```SQL
ANALYZE [ SYNC ] TABLE table_name
[ (column_name [, ...]) ]
UPDATE HISTOGRAM
[ [ WITH SYNC ][ WITH INCREMENTAL ][ WITH SAMPLE PERCENT | ROWS ][ WITH BUCKETS ] ]
[ PROPERTIES ('key' = 'value', ...) ];
```
Illustrate:
- sync:Collect statistics synchronously. Return after collecting.
- incremental:Collect statistics incrementally. Incremental collection of histogram statistics is not supported.
- sample percent | rows:Collect statistics by sampling. Scale and number of rows can be sampled.
- buckets:Specifies the maximum number of buckets generated when collecting histogram statistics.
- table_name: The purpose table for collecting statistics. Can be of the form `db_name.table_name`.
- column_name: The specified destination column must be a column that exists in `table_name`, and multiple column names are separated by commas.
- properties:Properties used to set statistics tasks. Currently only the following configurations are supported (equivalent to the with statement)
- 'sync' = 'true'
- 'incremental' = 'true'
- 'sample.percent' = '50'
- 'sample.rows' = '1000'
- 'num.buckets' = 10
---
TODO:
- Supplement the complete p0 test
- `Incremental` statistics see #18653
This commit is contained in:
@ -540,6 +540,7 @@ terminal String
|
||||
KW_ROW,
|
||||
KW_ROWS,
|
||||
KW_S3,
|
||||
KW_SAMPLE,
|
||||
KW_SCHEMA,
|
||||
KW_SCHEMAS,
|
||||
KW_SECOND,
|
||||
@ -890,6 +891,10 @@ nonterminal PauseSyncJobStmt pause_sync_job_stmt;
|
||||
nonterminal StopSyncJobStmt stop_sync_job_stmt;
|
||||
nonterminal JobName job_name;
|
||||
|
||||
// analyze
|
||||
nonterminal Map<String, String> with_analysis_properties;
|
||||
nonterminal List<Map<String, String>> opt_with_analysis_properties;
|
||||
|
||||
nonterminal String opt_db, procedure_or_function, opt_comment, opt_engine;
|
||||
nonterminal ColumnDef.DefaultValue opt_default_value;
|
||||
nonterminal Boolean opt_if_exists, opt_if_not_exists;
|
||||
@ -2812,30 +2817,45 @@ show_create_reporitory_stmt ::=
|
||||
|
||||
// analyze statment
|
||||
analyze_stmt ::=
|
||||
KW_ANALYZE opt_sync:sync KW_TABLE table_name:tbl opt_col_list:cols opt_properties:properties
|
||||
// statistics
|
||||
KW_ANALYZE opt_sync:sync KW_TABLE table_name:tbl opt_col_list:cols
|
||||
opt_with_analysis_properties:withAnalysisProperties opt_properties:properties
|
||||
{:
|
||||
boolean is_whole_tbl = (cols == null);
|
||||
boolean is_histogram = false;
|
||||
boolean is_increment = false;
|
||||
RESULT = new AnalyzeStmt(tbl, sync, cols, properties, is_whole_tbl, is_histogram, is_increment);
|
||||
if (properties == null) {
|
||||
properties = Maps.newHashMap();
|
||||
}
|
||||
for (Map<String, String> property : withAnalysisProperties) {
|
||||
properties.putAll(property);
|
||||
}
|
||||
if (!properties.containsKey("sync")) {
|
||||
properties.put("sync", String.valueOf(sync));
|
||||
}
|
||||
// Rule: If no type is specified, see if there is a specified column
|
||||
if (!properties.containsKey("analysis.type")) {
|
||||
if ((cols == null)) {
|
||||
properties.put("analysis.type", "INDEX");
|
||||
} else {
|
||||
properties.put("analysis.type", "COLUMN");
|
||||
}
|
||||
}
|
||||
RESULT = new AnalyzeStmt(tbl, cols, properties);
|
||||
:}
|
||||
| KW_ANALYZE opt_sync:sync KW_INCREMENTAL KW_TABLE table_name:tbl opt_col_list:cols opt_partition_names:partitionNames opt_properties:properties
|
||||
// histogram
|
||||
| KW_ANALYZE opt_sync:sync KW_TABLE table_name:tbl opt_col_list:cols KW_UPDATE KW_HISTOGRAM
|
||||
opt_with_analysis_properties:withAnalysisProperties opt_properties:properties
|
||||
{:
|
||||
boolean is_whole_tbl = (cols == null);
|
||||
boolean is_histogram = false;
|
||||
boolean is_increment = true;
|
||||
RESULT = new AnalyzeStmt(tbl, sync, cols, properties, is_whole_tbl, is_histogram, is_increment);
|
||||
:}
|
||||
| KW_ANALYZE opt_sync:sync KW_TABLE table_name:tbl KW_UPDATE KW_HISTOGRAM KW_ON ident_list:cols opt_partition_names:partitionNames opt_properties:properties
|
||||
{:
|
||||
boolean is_whole_tbl = false;
|
||||
boolean is_histogram = true;
|
||||
boolean is_increment = false;
|
||||
RESULT = new AnalyzeStmt(tbl, sync, cols, properties, is_whole_tbl, is_histogram, is_increment);
|
||||
:}
|
||||
| KW_ANALYZE opt_sync:sync KW_TABLE table_name:tbl KW_UPDATE KW_HISTOGRAM
|
||||
{:
|
||||
RESULT = new AnalyzeStmt(tbl, sync, null, new HashMap<>(), true, true, false);
|
||||
if (properties == null) {
|
||||
properties = Maps.newHashMap();
|
||||
}
|
||||
for (Map<String, String> property : withAnalysisProperties) {
|
||||
properties.putAll(property);
|
||||
}
|
||||
if (!properties.containsKey("sync")) {
|
||||
properties.put("sync", String.valueOf(sync));
|
||||
}
|
||||
// TODO: Support materialized view
|
||||
properties.put("analysis.type", "HISTOGRAM");
|
||||
RESULT = new AnalyzeStmt(tbl, cols, properties);
|
||||
:}
|
||||
;
|
||||
|
||||
@ -5730,6 +5750,53 @@ ident_list ::=
|
||||
:}
|
||||
;
|
||||
|
||||
with_analysis_properties ::=
|
||||
KW_SYNC
|
||||
{:
|
||||
RESULT = new HashMap<String, String>() {{
|
||||
put("sync", "true");
|
||||
}};
|
||||
:}
|
||||
| KW_INCREMENTAL
|
||||
{:
|
||||
RESULT = new HashMap<String, String>() {{
|
||||
put("incremental", "true");
|
||||
}};
|
||||
:}
|
||||
| KW_SAMPLE KW_PERCENT INTEGER_LITERAL:samplePercent
|
||||
{:
|
||||
RESULT = new HashMap<String, String>() {{
|
||||
put("sample.percent", String.valueOf(samplePercent.intValue()));
|
||||
}};
|
||||
:}
|
||||
| KW_SAMPLE KW_ROWS INTEGER_LITERAL:sampleRows
|
||||
{:
|
||||
RESULT = new HashMap<String, String>() {{
|
||||
put("sample.rows", String.valueOf(sampleRows.intValue()));
|
||||
}};
|
||||
:}
|
||||
| KW_BUCKETS INTEGER_LITERAL:numBuckets
|
||||
{:
|
||||
RESULT = new HashMap<String, String>() {{
|
||||
put("num.buckets", String.valueOf(numBuckets.intValue()));
|
||||
}};
|
||||
:}
|
||||
;
|
||||
|
||||
opt_with_analysis_properties ::=
|
||||
/* empty */
|
||||
{:
|
||||
RESULT = Lists.newArrayList();
|
||||
:}
|
||||
| opt_with_analysis_properties:withAnalysisProperties
|
||||
KW_WITH with_analysis_properties:property
|
||||
{:
|
||||
withAnalysisProperties.add(property);
|
||||
RESULT = withAnalysisProperties;
|
||||
:}
|
||||
;
|
||||
|
||||
|
||||
expr_list ::=
|
||||
expr:e
|
||||
{:
|
||||
@ -7368,6 +7435,8 @@ keyword ::=
|
||||
{: RESULT = id; :}
|
||||
| KW_EXPIRED: id
|
||||
{: RESULT = id; :}
|
||||
| KW_SAMPLE:id
|
||||
{: RESULT = id; :}
|
||||
;
|
||||
|
||||
// Identifier that contain keyword
|
||||
|
||||
@ -21,22 +21,22 @@ import org.apache.doris.catalog.Column;
|
||||
import org.apache.doris.catalog.Database;
|
||||
import org.apache.doris.catalog.DatabaseIf;
|
||||
import org.apache.doris.catalog.Env;
|
||||
import org.apache.doris.catalog.OlapTable;
|
||||
import org.apache.doris.catalog.TableIf;
|
||||
import org.apache.doris.catalog.View;
|
||||
import org.apache.doris.common.AnalysisException;
|
||||
import org.apache.doris.common.Config;
|
||||
import org.apache.doris.common.ErrorCode;
|
||||
import org.apache.doris.common.ErrorReport;
|
||||
import org.apache.doris.common.FeNameFormat;
|
||||
import org.apache.doris.common.UserException;
|
||||
import org.apache.doris.common.util.PrintableMap;
|
||||
import org.apache.doris.common.util.Util;
|
||||
import org.apache.doris.datasource.CatalogIf;
|
||||
import org.apache.doris.mysql.privilege.PrivPredicate;
|
||||
import org.apache.doris.qe.ConnectContext;
|
||||
import org.apache.doris.statistics.AnalysisTaskInfo.AnalysisMethod;
|
||||
import org.apache.doris.statistics.AnalysisTaskInfo.AnalysisType;
|
||||
|
||||
import com.google.common.collect.ImmutableSet;
|
||||
import com.google.common.collect.Maps;
|
||||
import com.google.common.collect.Sets;
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
|
||||
@ -44,35 +44,57 @@ import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Optional;
|
||||
import java.util.Set;
|
||||
import java.util.function.Predicate;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
/**
|
||||
* Collect statistics.
|
||||
*
|
||||
* syntax:
|
||||
* ANALYZE [[ db_name.tb_name ] [( column_name [, ...] )], ...] [ PROPERTIES(...) ]
|
||||
* db_name.tb_name: collect table and column statistics from tb_name
|
||||
* column_name: collect column statistics from column_name
|
||||
* properties: properties of statistics jobs
|
||||
* Column Statistics Collection Syntax:
|
||||
* ANALYZE [ SYNC ] TABLE table_name
|
||||
* [ (column_name [, ...]) ]
|
||||
* [ [WITH SYNC] | [WITH INCREMENTAL] | [WITH SAMPLE PERCENT | ROWS ] ]
|
||||
* [ PROPERTIES ('key' = 'value', ...) ];
|
||||
*
|
||||
* Column histogram collection syntax:
|
||||
* ANALYZE [ SYNC ] TABLE table_name
|
||||
* [ (column_name [, ...]) ]
|
||||
* UPDATE HISTOGRAM
|
||||
* [ [ WITH SYNC ][ WITH INCREMENTAL ][ WITH SAMPLE PERCENT | ROWS ][ WITH BUCKETS ] ]
|
||||
* [ PROPERTIES ('key' = 'value', ...) ];
|
||||
*
|
||||
* Illustrate:
|
||||
* - sync:Collect statistics synchronously. Return after collecting.
|
||||
* - incremental:Collect statistics incrementally. Incremental collection of histogram statistics is not supported.
|
||||
* - sample percent | rows:Collect statistics by sampling. Scale and number of rows can be sampled.
|
||||
* - buckets:Specifies the maximum number of buckets generated when collecting histogram statistics.
|
||||
* - table_name: The purpose table for collecting statistics. Can be of the form `db_name.table_name`.
|
||||
* - column_name: The specified destination column must be a column that exists in `table_name`,
|
||||
* and multiple column names are separated by commas.
|
||||
* - properties:Properties used to set statistics tasks. Currently only the following configurations
|
||||
* are supported (equivalent to the with statement)
|
||||
* - 'sync' = 'true'
|
||||
* - 'incremental' = 'true'
|
||||
* - 'sample.percent' = '50'
|
||||
* - 'sample.rows' = '1000'
|
||||
* - 'num.buckets' = 10
|
||||
*/
|
||||
public class AnalyzeStmt extends DdlStmt {
|
||||
// time to wait for collect statistics
|
||||
public static final String CBO_STATISTICS_TASK_TIMEOUT_SEC = "cbo_statistics_task_timeout_sec";
|
||||
// The properties passed in by the user through "with" or "properties('K', 'V')"
|
||||
public static final String PROPERTY_SYNC = "sync";
|
||||
public static final String PROPERTY_INCREMENTAL = "incremental";
|
||||
public static final String PROPERTY_SAMPLE_PERCENT = "sample.percent";
|
||||
public static final String PROPERTY_SAMPLE_ROWS = "sample.rows";
|
||||
public static final String PROPERTY_NUM_BUCKETS = "num.buckets";
|
||||
public static final String PROPERTY_ANALYSIS_TYPE = "analysis.type";
|
||||
|
||||
private static final ImmutableSet<String> PROPERTIES_SET = new ImmutableSet.Builder<String>()
|
||||
.add(CBO_STATISTICS_TASK_TIMEOUT_SEC)
|
||||
.add(PROPERTY_SYNC)
|
||||
.add(PROPERTY_INCREMENTAL)
|
||||
.add(PROPERTY_SAMPLE_PERCENT)
|
||||
.add(PROPERTY_SAMPLE_ROWS)
|
||||
.add(PROPERTY_NUM_BUCKETS)
|
||||
.add(PROPERTY_ANALYSIS_TYPE)
|
||||
.build();
|
||||
|
||||
private static final Predicate<Long> DESIRED_TASK_TIMEOUT_SEC = (v) -> v > 0L;
|
||||
|
||||
public boolean isWholeTbl;
|
||||
public boolean isHistogram;
|
||||
public boolean isIncrement;
|
||||
|
||||
private final TableName tableName;
|
||||
|
||||
private final boolean sync;
|
||||
private final List<String> columnNames;
|
||||
private final Map<String, String> properties;
|
||||
|
||||
@ -81,19 +103,11 @@ public class AnalyzeStmt extends DdlStmt {
|
||||
private TableIf table;
|
||||
|
||||
public AnalyzeStmt(TableName tableName,
|
||||
boolean sync,
|
||||
List<String> columnNames,
|
||||
Map<String, String> properties,
|
||||
Boolean isWholeTbl,
|
||||
Boolean isHistogram,
|
||||
Boolean isIncrement) {
|
||||
List<String> columnNames,
|
||||
Map<String, String> properties) {
|
||||
this.tableName = tableName;
|
||||
this.sync = sync;
|
||||
this.columnNames = columnNames;
|
||||
this.properties = properties;
|
||||
this.isWholeTbl = isWholeTbl;
|
||||
this.isHistogram = isHistogram;
|
||||
this.isIncrement = isIncrement;
|
||||
}
|
||||
|
||||
@Override
|
||||
@ -133,6 +147,15 @@ public class AnalyzeStmt extends DdlStmt {
|
||||
}
|
||||
|
||||
checkProperties();
|
||||
|
||||
// TODO support external table
|
||||
if (properties.containsKey(PROPERTY_SAMPLE_PERCENT)
|
||||
|| properties.containsKey(PROPERTY_SAMPLE_ROWS)) {
|
||||
if (!(table instanceof OlapTable)) {
|
||||
throw new AnalysisException("Sampling statistics "
|
||||
+ "collection of external tables is not supported");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
@ -153,18 +176,88 @@ public class AnalyzeStmt extends DdlStmt {
|
||||
}
|
||||
|
||||
private void checkProperties() throws UserException {
|
||||
if (properties != null) {
|
||||
Optional<String> optional = properties.keySet().stream().filter(
|
||||
entity -> !PROPERTIES_SET.contains(entity)).findFirst();
|
||||
if (optional.isPresent()) {
|
||||
throw new AnalysisException(optional.get() + " is invalid property");
|
||||
}
|
||||
if (properties == null || properties.isEmpty()) {
|
||||
throw new AnalysisException("analysis properties should not be empty");
|
||||
}
|
||||
|
||||
long taskTimeout = ((Long) Util.getLongPropertyOrDefault(
|
||||
properties.get(CBO_STATISTICS_TASK_TIMEOUT_SEC),
|
||||
Config.max_cbo_statistics_task_timeout_sec, DESIRED_TASK_TIMEOUT_SEC,
|
||||
CBO_STATISTICS_TASK_TIMEOUT_SEC + " should > 0")).intValue();
|
||||
properties.put(CBO_STATISTICS_TASK_TIMEOUT_SEC, String.valueOf(taskTimeout));
|
||||
String msgTemplate = "%s = %s is invalid property";
|
||||
Optional<String> optional = properties.keySet().stream().filter(
|
||||
entity -> !PROPERTIES_SET.contains(entity)).findFirst();
|
||||
|
||||
if (optional.isPresent()) {
|
||||
String msg = String.format(msgTemplate, optional.get(), properties.get(optional.get()));
|
||||
throw new AnalysisException(msg);
|
||||
}
|
||||
|
||||
if (properties.containsKey(PROPERTY_SYNC)) {
|
||||
try {
|
||||
Boolean.valueOf(properties.get(PROPERTY_SYNC));
|
||||
} catch (NumberFormatException e) {
|
||||
String msg = String.format(msgTemplate, PROPERTY_SYNC, properties.get(PROPERTY_SYNC));
|
||||
throw new AnalysisException(msg);
|
||||
}
|
||||
}
|
||||
|
||||
if (properties.containsKey(PROPERTY_INCREMENTAL)) {
|
||||
try {
|
||||
Boolean.valueOf(properties.get(PROPERTY_INCREMENTAL));
|
||||
} catch (NumberFormatException e) {
|
||||
String msg = String.format(msgTemplate, PROPERTY_INCREMENTAL, properties.get(PROPERTY_INCREMENTAL));
|
||||
throw new AnalysisException(msg);
|
||||
}
|
||||
}
|
||||
|
||||
if (properties.containsKey(PROPERTY_SAMPLE_PERCENT)
|
||||
&& properties.containsKey(PROPERTY_SAMPLE_ROWS)) {
|
||||
throw new AnalysisException("only one sampling parameter can be specified simultaneously");
|
||||
}
|
||||
|
||||
if (properties.containsKey(PROPERTY_SAMPLE_PERCENT)) {
|
||||
checkNumericProperty(PROPERTY_SAMPLE_PERCENT, properties.get(PROPERTY_SAMPLE_PERCENT),
|
||||
0, 100, false, "should be > 0 and < 100");
|
||||
}
|
||||
|
||||
if (properties.containsKey(PROPERTY_SAMPLE_ROWS)) {
|
||||
checkNumericProperty(PROPERTY_SAMPLE_ROWS, properties.get(PROPERTY_SAMPLE_ROWS),
|
||||
0, Integer.MAX_VALUE, false, "needs at least 1 row");
|
||||
}
|
||||
|
||||
if (properties.containsKey(PROPERTY_NUM_BUCKETS)) {
|
||||
checkNumericProperty(PROPERTY_NUM_BUCKETS, properties.get(PROPERTY_NUM_BUCKETS),
|
||||
1, Integer.MAX_VALUE, true, "needs at least 1 buckets");
|
||||
}
|
||||
|
||||
if (properties.containsKey(PROPERTY_ANALYSIS_TYPE)) {
|
||||
try {
|
||||
AnalysisType.valueOf(properties.get(PROPERTY_ANALYSIS_TYPE));
|
||||
} catch (NumberFormatException e) {
|
||||
String msg = String.format(msgTemplate, PROPERTY_ANALYSIS_TYPE, properties.get(PROPERTY_ANALYSIS_TYPE));
|
||||
throw new AnalysisException(msg);
|
||||
}
|
||||
}
|
||||
|
||||
if (properties.containsKey(PROPERTY_INCREMENTAL)
|
||||
&& AnalysisType.valueOf(properties.get(PROPERTY_ANALYSIS_TYPE)) == AnalysisType.HISTOGRAM) {
|
||||
throw new AnalysisException(PROPERTY_INCREMENTAL + " collection of histograms is not supported");
|
||||
}
|
||||
|
||||
if (properties.containsKey(PROPERTY_NUM_BUCKETS)
|
||||
&& AnalysisType.valueOf(properties.get(PROPERTY_ANALYSIS_TYPE)) != AnalysisType.HISTOGRAM) {
|
||||
throw new AnalysisException(PROPERTY_NUM_BUCKETS + " can only be specified when collecting histograms");
|
||||
}
|
||||
}
|
||||
|
||||
private void checkNumericProperty(String key, String value, int lowerBound, int upperBound,
|
||||
boolean includeBoundary, String errorMsg) throws AnalysisException {
|
||||
if (!StringUtils.isNumeric(value)) {
|
||||
String msg = String.format("%s = %s is an invalid property.", key, value);
|
||||
throw new AnalysisException(msg);
|
||||
}
|
||||
int intValue = Integer.parseInt(value);
|
||||
boolean isOutOfBounds = (includeBoundary && (intValue < lowerBound || intValue > upperBound))
|
||||
|| (!includeBoundary && (intValue <= lowerBound || intValue >= upperBound));
|
||||
if (isOutOfBounds) {
|
||||
throw new AnalysisException(key + " " + errorMsg);
|
||||
}
|
||||
}
|
||||
|
||||
@ -198,8 +291,47 @@ public class AnalyzeStmt extends DdlStmt {
|
||||
}
|
||||
|
||||
public Map<String, String> getProperties() {
|
||||
// TODO add default properties
|
||||
return properties != null ? properties : Maps.newHashMap();
|
||||
return properties;
|
||||
}
|
||||
|
||||
public boolean isSync() {
|
||||
return Boolean.parseBoolean(properties.get(PROPERTY_SYNC));
|
||||
}
|
||||
|
||||
public boolean isIncremental() {
|
||||
return Boolean.parseBoolean(properties.get(PROPERTY_INCREMENTAL));
|
||||
}
|
||||
|
||||
public int getSamplePercent() {
|
||||
if (!properties.containsKey(PROPERTY_SAMPLE_PERCENT)) {
|
||||
return 0;
|
||||
}
|
||||
return Integer.parseInt(properties.get(PROPERTY_SAMPLE_PERCENT));
|
||||
}
|
||||
|
||||
public int getSampleRows() {
|
||||
if (!properties.containsKey(PROPERTY_SAMPLE_ROWS)) {
|
||||
return 0;
|
||||
}
|
||||
return Integer.parseInt(properties.get(PROPERTY_SAMPLE_ROWS));
|
||||
}
|
||||
|
||||
public int getNumBuckets() {
|
||||
if (!properties.containsKey(PROPERTY_NUM_BUCKETS)) {
|
||||
return 0;
|
||||
}
|
||||
return Integer.parseInt(properties.get(PROPERTY_NUM_BUCKETS));
|
||||
}
|
||||
|
||||
public AnalysisType getAnalysisType() {
|
||||
return AnalysisType.valueOf(properties.get(PROPERTY_ANALYSIS_TYPE));
|
||||
}
|
||||
|
||||
public AnalysisMethod getAnalysisMethod() {
|
||||
if (getSamplePercent() > 0 || getSampleRows() > 0) {
|
||||
return AnalysisMethod.SAMPLE;
|
||||
}
|
||||
return AnalysisMethod.FULL;
|
||||
}
|
||||
|
||||
@Override
|
||||
@ -207,27 +339,22 @@ public class AnalyzeStmt extends DdlStmt {
|
||||
StringBuilder sb = new StringBuilder();
|
||||
sb.append("ANALYZE");
|
||||
|
||||
if (isIncrement) {
|
||||
sb.append(" ");
|
||||
sb.append("INCREMENTAL");
|
||||
}
|
||||
|
||||
if (tableName != null) {
|
||||
sb.append(" ");
|
||||
sb.append(tableName.toSql());
|
||||
}
|
||||
|
||||
if (isHistogram) {
|
||||
sb.append(" ");
|
||||
sb.append("UPDATE HISTOGRAM ON");
|
||||
sb.append(" ");
|
||||
sb.append(StringUtils.join(columnNames, ","));
|
||||
} else if (columnNames != null) {
|
||||
if (columnNames != null) {
|
||||
sb.append("(");
|
||||
sb.append(StringUtils.join(columnNames, ","));
|
||||
sb.append(")");
|
||||
}
|
||||
|
||||
if (getAnalysisType().equals(AnalysisType.HISTOGRAM)) {
|
||||
sb.append(" ");
|
||||
sb.append("UPDATE HISTOGRAM");
|
||||
}
|
||||
|
||||
if (properties != null) {
|
||||
sb.append(" ");
|
||||
sb.append("PROPERTIES(");
|
||||
@ -239,8 +366,4 @@ public class AnalyzeStmt extends DdlStmt {
|
||||
|
||||
return sb.toString();
|
||||
}
|
||||
|
||||
public boolean isSync() {
|
||||
return sync;
|
||||
}
|
||||
}
|
||||
|
||||
@ -90,19 +90,17 @@ public class AnalysisManager {
|
||||
}
|
||||
|
||||
// Each analyze stmt corresponding to an analysis job.
|
||||
public void createAnalysisJob(AnalyzeStmt analyzeStmt) throws DdlException {
|
||||
String catalogName = analyzeStmt.getCatalogName();
|
||||
String db = analyzeStmt.getDBName();
|
||||
TableName tbl = analyzeStmt.getTblName();
|
||||
StatisticsUtil.convertTableNameToObjects(tbl);
|
||||
Set<String> colNames = analyzeStmt.getColumnNames();
|
||||
Map<Long, AnalysisTaskInfo> analysisTaskInfos = new HashMap<>();
|
||||
public void createAnalysisJob(AnalyzeStmt stmt) throws DdlException {
|
||||
long jobId = Env.getCurrentEnv().getNextId();
|
||||
createTaskForEachColumns(analyzeStmt, catalogName, db, tbl, colNames, analysisTaskInfos, jobId);
|
||||
createTaskForMVIdx(analyzeStmt, catalogName, db, tbl, analysisTaskInfos, jobId);
|
||||
persistAnalysisJob(catalogName, db, tbl, jobId);
|
||||
AnalysisTaskInfoBuilder taskInfoBuilder = buildCommonTaskInfo(stmt, jobId);
|
||||
Map<Long, AnalysisTaskInfo> analysisTaskInfos = new HashMap<>();
|
||||
|
||||
if (analyzeStmt.isSync()) {
|
||||
// start build analysis tasks
|
||||
createTaskForEachColumns(stmt.getColumnNames(), taskInfoBuilder, analysisTaskInfos);
|
||||
createTaskForMVIdx(stmt.getTable(), taskInfoBuilder, analysisTaskInfos, stmt.getAnalysisType());
|
||||
persistAnalysisJob(taskInfoBuilder);
|
||||
|
||||
if (stmt.isSync()) {
|
||||
syncExecute(analysisTaskInfos.values());
|
||||
return;
|
||||
}
|
||||
@ -111,42 +109,79 @@ public class AnalysisManager {
|
||||
analysisTaskInfos.values().forEach(taskScheduler::schedule);
|
||||
}
|
||||
|
||||
private void persistAnalysisJob(String catalogName, String db, TableName tbl,
|
||||
long jobId) throws DdlException {
|
||||
private AnalysisTaskInfoBuilder buildCommonTaskInfo(AnalyzeStmt stmt, long jobId) {
|
||||
AnalysisTaskInfoBuilder taskInfoBuilder = new AnalysisTaskInfoBuilder();
|
||||
String catalogName = stmt.getCatalogName();
|
||||
String db = stmt.getDBName();
|
||||
TableName tbl = stmt.getTblName();
|
||||
StatisticsUtil.convertTableNameToObjects(tbl);
|
||||
String tblName = tbl.getTbl();
|
||||
int samplePercent = stmt.getSamplePercent();
|
||||
int sampleRows = stmt.getSampleRows();
|
||||
AnalysisType analysisType = stmt.getAnalysisType();
|
||||
AnalysisMethod analysisMethod = stmt.getAnalysisMethod();
|
||||
|
||||
taskInfoBuilder.setJobId(jobId);
|
||||
taskInfoBuilder.setCatalogName(catalogName);
|
||||
taskInfoBuilder.setDbName(db);
|
||||
taskInfoBuilder.setTblName(tblName);
|
||||
taskInfoBuilder.setJobType(JobType.MANUAL);
|
||||
taskInfoBuilder.setState(AnalysisState.PENDING);
|
||||
taskInfoBuilder.setScheduleType(ScheduleType.ONCE);
|
||||
|
||||
if (analysisMethod == AnalysisMethod.SAMPLE) {
|
||||
taskInfoBuilder.setAnalysisMethod(AnalysisMethod.SAMPLE);
|
||||
taskInfoBuilder.setSamplePercent(samplePercent);
|
||||
taskInfoBuilder.setSampleRows(sampleRows);
|
||||
} else {
|
||||
taskInfoBuilder.setAnalysisMethod(AnalysisMethod.FULL);
|
||||
}
|
||||
|
||||
if (analysisType == AnalysisType.HISTOGRAM) {
|
||||
taskInfoBuilder.setAnalysisType(AnalysisType.HISTOGRAM);
|
||||
int numBuckets = stmt.getNumBuckets();
|
||||
int maxBucketNum = numBuckets > 0 ? numBuckets
|
||||
: StatisticConstants.HISTOGRAM_MAX_BUCKET_NUM;
|
||||
taskInfoBuilder.setMaxBucketNum(maxBucketNum);
|
||||
} else {
|
||||
taskInfoBuilder.setAnalysisType(AnalysisType.COLUMN);
|
||||
}
|
||||
|
||||
return taskInfoBuilder;
|
||||
}
|
||||
|
||||
private void persistAnalysisJob(AnalysisTaskInfoBuilder taskInfoBuilder) throws DdlException {
|
||||
try {
|
||||
AnalysisTaskInfo analysisTaskInfo = new AnalysisTaskInfoBuilder().setJobId(
|
||||
jobId).setTaskId(-1)
|
||||
.setCatalogName(catalogName).setDbName(db)
|
||||
.setTblName(tbl.getTbl())
|
||||
.setJobType(JobType.MANUAL)
|
||||
.setAnalysisMethod(AnalysisMethod.FULL).setAnalysisType(AnalysisType.INDEX)
|
||||
.setScheduleType(ScheduleType.ONCE).build();
|
||||
AnalysisTaskInfoBuilder jobInfoBuilder = taskInfoBuilder.copy();
|
||||
AnalysisTaskInfo analysisTaskInfo = jobInfoBuilder.setTaskId(-1).build();
|
||||
StatisticsRepository.persistAnalysisTask(analysisTaskInfo);
|
||||
} catch (Throwable t) {
|
||||
throw new DdlException(t.getMessage(), t);
|
||||
}
|
||||
}
|
||||
|
||||
private void createTaskForMVIdx(AnalyzeStmt analyzeStmt, String catalogName, String db, TableName tbl,
|
||||
Map<Long, AnalysisTaskInfo> analysisTaskInfos, long jobId) throws DdlException {
|
||||
if (!(analyzeStmt.isWholeTbl && analyzeStmt.getTable().getType().equals(TableType.OLAP))) {
|
||||
private void createTaskForMVIdx(TableIf table, AnalysisTaskInfoBuilder taskInfoBuilder,
|
||||
Map<Long, AnalysisTaskInfo> analysisTaskInfos, AnalysisType analysisType) throws DdlException {
|
||||
TableType type = table.getType();
|
||||
if (analysisType != AnalysisType.INDEX || !type.equals(TableType.OLAP)) {
|
||||
// not need to collect statistics for materialized view
|
||||
return;
|
||||
}
|
||||
OlapTable olapTable = (OlapTable) analyzeStmt.getTable();
|
||||
|
||||
taskInfoBuilder.setAnalysisType(analysisType);
|
||||
OlapTable olapTable = (OlapTable) table;
|
||||
|
||||
try {
|
||||
olapTable.readLock();
|
||||
for (MaterializedIndexMeta meta : olapTable.getIndexIdToMeta().values()) {
|
||||
if (meta.getDefineStmt() == null) {
|
||||
continue;
|
||||
}
|
||||
AnalysisTaskInfoBuilder indexTaskInfoBuilder = taskInfoBuilder.copy();
|
||||
long indexId = meta.getIndexId();
|
||||
long taskId = Env.getCurrentEnv().getNextId();
|
||||
AnalysisTaskInfo analysisTaskInfo = new AnalysisTaskInfoBuilder().setJobId(
|
||||
jobId).setTaskId(taskId)
|
||||
.setCatalogName(catalogName).setDbName(db)
|
||||
.setTblName(tbl.getTbl())
|
||||
.setIndexId(meta.getIndexId()).setJobType(JobType.MANUAL)
|
||||
.setAnalysisMethod(AnalysisMethod.FULL).setAnalysisType(AnalysisType.INDEX)
|
||||
.setScheduleType(ScheduleType.ONCE).build();
|
||||
AnalysisTaskInfo analysisTaskInfo = indexTaskInfoBuilder.setIndexId(indexId)
|
||||
.setTaskId(taskId).build();
|
||||
try {
|
||||
StatisticsRepository.persistAnalysisTask(analysisTaskInfo);
|
||||
} catch (Exception e) {
|
||||
@ -159,19 +194,14 @@ public class AnalysisManager {
|
||||
}
|
||||
}
|
||||
|
||||
private void createTaskForEachColumns(AnalyzeStmt analyzeStmt, String catalogName, String db, TableName tbl,
|
||||
Set<String> colNames, Map<Long, AnalysisTaskInfo> analysisTaskInfos,
|
||||
long jobId) throws DdlException {
|
||||
private void createTaskForEachColumns(Set<String> colNames, AnalysisTaskInfoBuilder taskInfoBuilder,
|
||||
Map<Long, AnalysisTaskInfo> analysisTaskInfos) throws DdlException {
|
||||
for (String colName : colNames) {
|
||||
AnalysisTaskInfoBuilder colTaskInfoBuilder = taskInfoBuilder.copy();
|
||||
long indexId = -1;
|
||||
long taskId = Env.getCurrentEnv().getNextId();
|
||||
AnalysisType analType = analyzeStmt.isHistogram ? AnalysisType.HISTOGRAM : AnalysisType.COLUMN;
|
||||
AnalysisTaskInfo analysisTaskInfo = new AnalysisTaskInfoBuilder().setJobId(jobId)
|
||||
.setTaskId(taskId).setCatalogName(catalogName).setDbName(db)
|
||||
.setTblName(tbl.getTbl()).setColName(colName)
|
||||
.setJobType(JobType.MANUAL)
|
||||
.setAnalysisMethod(AnalysisMethod.FULL).setAnalysisType(analType)
|
||||
.setState(AnalysisState.PENDING)
|
||||
.setScheduleType(ScheduleType.ONCE).build();
|
||||
AnalysisTaskInfo analysisTaskInfo = colTaskInfoBuilder.setColName(colName)
|
||||
.setIndexId(indexId).setTaskId(taskId).build();
|
||||
try {
|
||||
StatisticsRepository.persistAnalysisTask(analysisTaskInfo);
|
||||
} catch (Exception e) {
|
||||
|
||||
@ -70,9 +70,11 @@ public class AnalysisTaskInfo {
|
||||
|
||||
public final AnalysisType analysisType;
|
||||
|
||||
// TODO: define constants or get them from configuration properties
|
||||
public final double sampleRate = 1.0;
|
||||
public final int maxBucketNum = 128;
|
||||
public final int samplePercent;
|
||||
|
||||
public final int sampleRows;
|
||||
|
||||
public final int maxBucketNum;
|
||||
|
||||
public String message;
|
||||
|
||||
@ -84,9 +86,9 @@ public class AnalysisTaskInfo {
|
||||
public final ScheduleType scheduleType;
|
||||
|
||||
public AnalysisTaskInfo(long jobId, long taskId, String catalogName, String dbName, String tblName,
|
||||
String colName, Long indexId, JobType jobType,
|
||||
AnalysisMethod analysisMethod, AnalysisType analysisType, String message,
|
||||
int lastExecTimeInMs, AnalysisState state, ScheduleType scheduleType) {
|
||||
String colName, Long indexId, JobType jobType, AnalysisMethod analysisMethod,
|
||||
AnalysisType analysisType, int samplePercent, int sampleRows, int maxBucketNum,
|
||||
String message, int lastExecTimeInMs, AnalysisState state, ScheduleType scheduleType) {
|
||||
this.jobId = jobId;
|
||||
this.taskId = taskId;
|
||||
this.catalogName = catalogName;
|
||||
@ -97,6 +99,9 @@ public class AnalysisTaskInfo {
|
||||
this.jobType = jobType;
|
||||
this.analysisMethod = analysisMethod;
|
||||
this.analysisType = analysisType;
|
||||
this.samplePercent = samplePercent;
|
||||
this.sampleRows = sampleRows;
|
||||
this.maxBucketNum = maxBucketNum;
|
||||
this.message = message;
|
||||
this.lastExecTimeInMs = lastExecTimeInMs;
|
||||
this.state = state;
|
||||
|
||||
@ -33,6 +33,9 @@ public class AnalysisTaskInfoBuilder {
|
||||
private JobType jobType;
|
||||
private AnalysisMethod analysisMethod;
|
||||
private AnalysisType analysisType;
|
||||
private int maxBucketNum;
|
||||
private int samplePercent;
|
||||
private int sampleRows;
|
||||
private String message;
|
||||
private int lastExecTimeInMs;
|
||||
private AnalysisState state;
|
||||
@ -88,6 +91,21 @@ public class AnalysisTaskInfoBuilder {
|
||||
return this;
|
||||
}
|
||||
|
||||
public AnalysisTaskInfoBuilder setMaxBucketNum(int maxBucketNum) {
|
||||
this.maxBucketNum = maxBucketNum;
|
||||
return this;
|
||||
}
|
||||
|
||||
public AnalysisTaskInfoBuilder setSamplePercent(int samplePercent) {
|
||||
this.samplePercent = samplePercent;
|
||||
return this;
|
||||
}
|
||||
|
||||
public AnalysisTaskInfoBuilder setSampleRows(int sampleRows) {
|
||||
this.sampleRows = sampleRows;
|
||||
return this;
|
||||
}
|
||||
|
||||
public AnalysisTaskInfoBuilder setMessage(String message) {
|
||||
this.message = message;
|
||||
return this;
|
||||
@ -109,7 +127,29 @@ public class AnalysisTaskInfoBuilder {
|
||||
}
|
||||
|
||||
public AnalysisTaskInfo build() {
|
||||
return new AnalysisTaskInfo(jobId, taskId, catalogName, dbName, tblName, colName,
|
||||
indexId, jobType, analysisMethod, analysisType, message, lastExecTimeInMs, state, scheduleType);
|
||||
return new AnalysisTaskInfo(jobId, taskId, catalogName, dbName, tblName,
|
||||
colName, indexId, jobType, analysisMethod, analysisType, samplePercent,
|
||||
sampleRows, maxBucketNum, message, lastExecTimeInMs, state, scheduleType);
|
||||
}
|
||||
|
||||
public AnalysisTaskInfoBuilder copy() {
|
||||
return new AnalysisTaskInfoBuilder()
|
||||
.setJobId(jobId)
|
||||
.setTaskId(taskId)
|
||||
.setCatalogName(catalogName)
|
||||
.setDbName(dbName)
|
||||
.setTblName(tblName)
|
||||
.setColName(colName)
|
||||
.setIndexId(indexId)
|
||||
.setJobType(jobType)
|
||||
.setAnalysisMethod(analysisMethod)
|
||||
.setAnalysisType(analysisType)
|
||||
.setSamplePercent(samplePercent)
|
||||
.setSampleRows(sampleRows)
|
||||
.setMaxBucketNum(maxBucketNum)
|
||||
.setMessage(message)
|
||||
.setLastExecTimeInMs(lastExecTimeInMs)
|
||||
.setState(state)
|
||||
.setScheduleType(scheduleType);
|
||||
}
|
||||
}
|
||||
|
||||
@ -24,6 +24,7 @@ import org.apache.doris.catalog.PrimitiveType;
|
||||
import org.apache.doris.catalog.TableIf;
|
||||
import org.apache.doris.datasource.CatalogIf;
|
||||
import org.apache.doris.qe.StmtExecutor;
|
||||
import org.apache.doris.statistics.AnalysisTaskInfo.AnalysisMethod;
|
||||
import org.apache.doris.statistics.AnalysisTaskInfo.AnalysisType;
|
||||
|
||||
import com.google.common.annotations.VisibleForTesting;
|
||||
@ -189,4 +190,15 @@ public abstract class BaseAnalysisTask {
|
||||
return unsupportedType.contains(type);
|
||||
}
|
||||
|
||||
protected String getSampleExpression() {
|
||||
if (info.analysisMethod == AnalysisMethod.FULL) {
|
||||
return "";
|
||||
}
|
||||
// TODO Add sampling methods for external tables
|
||||
if (info.samplePercent > 0) {
|
||||
return String.format("TABLESAMPLE(%d PERCENT)", info.samplePercent);
|
||||
} else {
|
||||
return String.format("TABLESAMPLE(%d ROWS)", info.sampleRows);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -19,8 +19,7 @@ package org.apache.doris.statistics;
|
||||
|
||||
import org.apache.doris.catalog.Env;
|
||||
import org.apache.doris.common.FeConstants;
|
||||
import org.apache.doris.qe.AutoCloseConnectContext;
|
||||
import org.apache.doris.qe.StmtExecutor;
|
||||
import org.apache.doris.statistics.AnalysisTaskInfo.AnalysisMethod;
|
||||
import org.apache.doris.statistics.util.StatisticsUtil;
|
||||
|
||||
import com.google.common.annotations.VisibleForTesting;
|
||||
@ -28,8 +27,6 @@ import org.apache.commons.text.StringSubstitutor;
|
||||
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
/**
|
||||
* Each task analyze one column.
|
||||
@ -49,10 +46,7 @@ public class HistogramTask extends BaseAnalysisTask {
|
||||
+ " HISTOGRAM(`${colName}`, ${maxBucketNum}) AS buckets, "
|
||||
+ " NOW() AS create_time "
|
||||
+ "FROM "
|
||||
+ " `${dbName}`.`${tblName}`";
|
||||
|
||||
private static final String ANALYZE_HISTOGRAM_SQL_TEMPLATE_PART = ANALYZE_HISTOGRAM_SQL_TEMPLATE_TABLE
|
||||
+ " PARTITION (${partName})";
|
||||
+ " `${dbName}`.`${tblName}` ${sampleExpr}";
|
||||
|
||||
@VisibleForTesting
|
||||
public HistogramTask() {
|
||||
@ -71,44 +65,29 @@ public class HistogramTask extends BaseAnalysisTask {
|
||||
params.put("catalogId", String.valueOf(catalog.getId()));
|
||||
params.put("dbId", String.valueOf(db.getId()));
|
||||
params.put("tblId", String.valueOf(tbl.getId()));
|
||||
params.put("idxId", "-1");
|
||||
params.put("idxId", String.valueOf(info.indexId));
|
||||
params.put("colId", String.valueOf(info.colName));
|
||||
params.put("dbName", info.dbName);
|
||||
params.put("tblName", String.valueOf(info.tblName));
|
||||
params.put("colName", String.valueOf(info.colName));
|
||||
params.put("sampleRate", String.valueOf(info.sampleRate));
|
||||
params.put("sampleRate", getSampleRateFunction());
|
||||
params.put("sampleExpr", getSampleExpression());
|
||||
params.put("maxBucketNum", String.valueOf(info.maxBucketNum));
|
||||
params.put("percentValue", String.valueOf((int) (info.sampleRate * 100)));
|
||||
|
||||
String histogramSql;
|
||||
Set<String> partitionNames = tbl.getPartitionNames();
|
||||
|
||||
if (partitionNames.isEmpty()) {
|
||||
StringSubstitutor stringSubstitutor = new StringSubstitutor(params);
|
||||
histogramSql = stringSubstitutor.replace(ANALYZE_HISTOGRAM_SQL_TEMPLATE_TABLE);
|
||||
} else {
|
||||
try {
|
||||
tbl.readLock();
|
||||
String partNames = partitionNames.stream()
|
||||
.filter(x -> tbl.getPartition(x) != null)
|
||||
.map(partName -> "`" + partName + "`")
|
||||
.collect(Collectors.joining(","));
|
||||
params.put("partName", partNames);
|
||||
StringSubstitutor stringSubstitutor = new StringSubstitutor(params);
|
||||
histogramSql = stringSubstitutor.replace(ANALYZE_HISTOGRAM_SQL_TEMPLATE_PART);
|
||||
} finally {
|
||||
tbl.readUnlock();
|
||||
}
|
||||
}
|
||||
|
||||
LOG.info("SQL to collect the histogram:\n {}", histogramSql);
|
||||
|
||||
try (AutoCloseConnectContext r = StatisticsUtil.buildConnectContext()) {
|
||||
r.connectContext.getSessionVariable().disableNereidsPlannerOnce();
|
||||
this.stmtExecutor = new StmtExecutor(r.connectContext, histogramSql);
|
||||
this.stmtExecutor.execute();
|
||||
}
|
||||
|
||||
StringSubstitutor stringSubstitutor = new StringSubstitutor(params);
|
||||
StatisticsUtil.execUpdate(stringSubstitutor.replace(ANALYZE_HISTOGRAM_SQL_TEMPLATE_TABLE));
|
||||
Env.getCurrentEnv().getStatisticsCache().refreshHistogramSync(tbl.getId(), -1, col.getName());
|
||||
}
|
||||
|
||||
private String getSampleRateFunction() {
|
||||
if (info.analysisMethod == AnalysisMethod.FULL) {
|
||||
return "0";
|
||||
}
|
||||
if (info.samplePercent > 0) {
|
||||
return String.valueOf(info.samplePercent / 100.0);
|
||||
} else {
|
||||
double sampRate = (double) info.sampleRows / tbl.getRowCount();
|
||||
return sampRate >= 1 ? "1.0" : String.format("%.4f", sampRate);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -50,11 +50,11 @@ import java.util.Map;
|
||||
public class MVAnalysisTask extends BaseAnalysisTask {
|
||||
|
||||
private static final String ANALYZE_MV_PART = INSERT_PART_STATISTICS
|
||||
+ " FROM (${sql}) mv";
|
||||
+ " FROM (${sql}) mv ${sampleExpr}";
|
||||
|
||||
private static final String ANALYZE_MV_COL = INSERT_COL_STATISTICS
|
||||
+ " (SELECT NDV(`${colName}`) AS ndv "
|
||||
+ " FROM (${sql}) mv) t2\n";
|
||||
+ " FROM (${sql}) mv) t2";
|
||||
|
||||
private MaterializedIndexMeta meta;
|
||||
|
||||
@ -118,9 +118,11 @@ public class MVAnalysisTask extends BaseAnalysisTask {
|
||||
params.put("colName", colName);
|
||||
params.put("tblName", String.valueOf(info.tblName));
|
||||
params.put("sql", sql);
|
||||
params.put("sampleExpr", getSampleExpression());
|
||||
StatisticsUtil.execUpdate(ANALYZE_MV_PART, params);
|
||||
}
|
||||
params.remove("partId");
|
||||
params.remove("sampleExpr");
|
||||
params.put("type", column.getType().toString());
|
||||
StatisticsUtil.execUpdate(ANALYZE_MV_COL, params);
|
||||
Env.getCurrentEnv().getStatisticsCache()
|
||||
|
||||
@ -40,7 +40,7 @@ public class OlapAnalysisTask extends BaseAnalysisTask {
|
||||
|
||||
private static final String ANALYZE_PARTITION_SQL_TEMPLATE = INSERT_PART_STATISTICS
|
||||
+ "FROM `${dbName}`.`${tblName}` "
|
||||
+ "PARTITION ${partName}";
|
||||
+ "PARTITION ${partName} ${sampleExpr}";
|
||||
|
||||
// TODO Currently, NDV is computed for the full table; in fact,
|
||||
// NDV should only be computed for the relevant partition.
|
||||
@ -64,12 +64,13 @@ public class OlapAnalysisTask extends BaseAnalysisTask {
|
||||
params.put("catalogId", String.valueOf(catalog.getId()));
|
||||
params.put("dbId", String.valueOf(db.getId()));
|
||||
params.put("tblId", String.valueOf(tbl.getId()));
|
||||
params.put("idxId", "-1");
|
||||
params.put("idxId", String.valueOf(info.indexId));
|
||||
params.put("colId", String.valueOf(info.colName));
|
||||
params.put("dataSizeFunction", getDataSizeFunction(col));
|
||||
params.put("dbName", info.dbName);
|
||||
params.put("colName", String.valueOf(info.colName));
|
||||
params.put("tblName", String.valueOf(info.tblName));
|
||||
params.put("sampleExpr", getSampleExpression());
|
||||
List<String> partitionAnalysisSQLs = new ArrayList<>();
|
||||
try {
|
||||
tbl.readLock();
|
||||
@ -90,6 +91,7 @@ public class OlapAnalysisTask extends BaseAnalysisTask {
|
||||
}
|
||||
execSQLs(partitionAnalysisSQLs);
|
||||
params.remove("partId");
|
||||
params.remove("sampleExpr");
|
||||
params.put("type", col.getType().toString());
|
||||
StringSubstitutor stringSubstitutor = new StringSubstitutor(params);
|
||||
String sql = stringSubstitutor.replace(ANALYZE_COLUMN_SQL_TEMPLATE);
|
||||
|
||||
@ -68,4 +68,5 @@ public class StatisticConstants {
|
||||
public static final int FETCH_LIMIT = 10000;
|
||||
public static final int FETCH_INTERVAL_IN_MS = 500;
|
||||
|
||||
public static final int HISTOGRAM_MAX_BUCKET_NUM = 128;
|
||||
}
|
||||
|
||||
@ -418,6 +418,7 @@ import org.apache.doris.qe.SqlModeHelper;
|
||||
keywordMap.put("soname", new Integer(SqlParserSymbols.KW_SONAME));
|
||||
keywordMap.put("split", new Integer(SqlParserSymbols.KW_SPLIT));
|
||||
keywordMap.put("sql_block_rule", new Integer(SqlParserSymbols.KW_SQL_BLOCK_RULE));
|
||||
keywordMap.put("sample", new Integer(SqlParserSymbols.KW_SAMPLE));
|
||||
keywordMap.put("start", new Integer(SqlParserSymbols.KW_START));
|
||||
keywordMap.put("stats", new Integer(SqlParserSymbols.KW_STATS));
|
||||
keywordMap.put("status", new Integer(SqlParserSymbols.KW_STATUS));
|
||||
|
||||
@ -75,7 +75,7 @@ public class HistogramTaskTest extends TestWithFeService {
|
||||
|
||||
AnalysisManager analysisManager = Env.getCurrentEnv().getAnalysisManager();
|
||||
StmtExecutor executor = getSqlStmtExecutor(
|
||||
"ANALYZE TABLE t1 UPDATE HISTOGRAM ON col1 PARTITION (p_201701)");
|
||||
"ANALYZE TABLE t1(col1) UPDATE HISTOGRAM");
|
||||
Assertions.assertNotNull(executor);
|
||||
|
||||
ConcurrentMap<Long, Map<Long, AnalysisTaskInfo>> taskMap =
|
||||
|
||||
7
regression-test/data/statistics/show_stats_test.out
Normal file
7
regression-test/data/statistics/show_stats_test.out
Normal file
@ -0,0 +1,7 @@
|
||||
-- This file is automatically generated. You should know what you did if you want to edit this
|
||||
-- !sql --
|
||||
city 6.0 4.0 0.0 42.0 6.0 '上海' '深圳'
|
||||
|
||||
-- !sql --
|
||||
city VARCHAR(20) 0.0 4 [{"lower_expr":"上海","upper_expr":"上海","count":1.0,"pre_sum":0.0,"ndv":1.0},{"lower_expr":"北京","upper_expr":"北京","count":2.0,"pre_sum":1.0,"ndv":1.0},{"lower_expr":"广州","upper_expr":"广州","count":1.0,"pre_sum":3.0,"ndv":1.0},{"lower_expr":"深圳","upper_expr":"深圳","count":2.0,"pre_sum":4.0,"ndv":1.0}]
|
||||
|
||||
@ -34,7 +34,7 @@ suite("alter_column_stats") {
|
||||
sql """INSERT INTO statistics_test VALUES(3, 'c', '2013-01-01')"""
|
||||
|
||||
sql """ANALYZE TABLE statistics_test"""
|
||||
sql """ANALYZE TABLE statistics_test UPDATE HISTOGRAM ON col1,col2"""
|
||||
sql """ANALYZE TABLE statistics_test(col1, col2) UPDATE HISTOGRAM"""
|
||||
|
||||
sleep(9000)
|
||||
|
||||
|
||||
@ -29,6 +29,10 @@ suite("analyze_test") {
|
||||
|
||||
def tblName3 = "${dbName3}.analyze_test_tbl_3"
|
||||
|
||||
def dbName4 = "analyze_test_db_4"
|
||||
|
||||
def tblName4 = "${dbName4}.analyze_test_tbl_4"
|
||||
|
||||
|
||||
sql """
|
||||
DROP DATABASE IF EXISTS ${dbName1};
|
||||
@ -55,6 +59,13 @@ suite("analyze_test") {
|
||||
CREATE DATABASE ${dbName3};
|
||||
"""
|
||||
|
||||
sql """
|
||||
DROP DATABASE IF EXISTS ${dbName4}
|
||||
"""
|
||||
|
||||
sql """
|
||||
CREATE DATABASE ${dbName4};
|
||||
"""
|
||||
|
||||
sql """
|
||||
DROP TABLE IF EXISTS ${tblName1}
|
||||
@ -95,6 +106,18 @@ suite("analyze_test") {
|
||||
"enable_unique_key_merge_on_write"="true"
|
||||
);"""
|
||||
|
||||
sql """
|
||||
DROP TABLE IF EXISTS ${tblName4}
|
||||
"""
|
||||
|
||||
sql """CREATE TABLE ${tblName4} (analyze_test_col1 varchar(11451) not null, analyze_test_col2 int not null, analyze_test_col3 int not null)
|
||||
UNIQUE KEY(analyze_test_col1)
|
||||
DISTRIBUTED BY HASH(analyze_test_col1)
|
||||
BUCKETS 3
|
||||
PROPERTIES(
|
||||
"replication_num"="1",
|
||||
"enable_unique_key_merge_on_write"="true"
|
||||
);"""
|
||||
|
||||
sql """insert into ${tblName1} values(1, 2, 3);"""
|
||||
sql """insert into ${tblName1} values(4, 5, 6);"""
|
||||
@ -114,6 +137,12 @@ suite("analyze_test") {
|
||||
sql """insert into ${tblName3} values(3, 8, 2);"""
|
||||
sql """insert into ${tblName3} values(5, 2, 1);"""
|
||||
|
||||
sql """insert into ${tblName4} values(1, 2, 3);"""
|
||||
sql """insert into ${tblName4} values(4, 5, 6);"""
|
||||
sql """insert into ${tblName4} values(7, 1, 9);"""
|
||||
sql """insert into ${tblName4} values(3, 8, 2);"""
|
||||
sql """insert into ${tblName4} values(5, 2, 1);"""
|
||||
|
||||
sql """
|
||||
delete from __internal_schema.column_statistics where col_id in ('analyze_test_col1', 'analyze_test_col2', 'analyze_test_col3')
|
||||
"""
|
||||
@ -142,6 +171,22 @@ suite("analyze_test") {
|
||||
ALTER TABLE ${tblName3} DROP COLUMN analyze_test_col2;
|
||||
"""
|
||||
|
||||
sql """
|
||||
analyze sync table ${tblName4} with sample rows 5;
|
||||
"""
|
||||
|
||||
sql """
|
||||
analyze table ${tblName4} with sync with incremental with sample percent 20;
|
||||
"""
|
||||
|
||||
sql """
|
||||
analyze sync table ${tblName4} update histogram with sample percent 20 with buckets 2;
|
||||
"""
|
||||
|
||||
sql """
|
||||
analyze table ${tblName4} update histogram with sync with sample rows 20 with buckets 2;
|
||||
"""
|
||||
|
||||
sql """
|
||||
DROP TABLE ${tblName2}
|
||||
"""
|
||||
@ -150,6 +195,10 @@ suite("analyze_test") {
|
||||
DROP DATABASE ${dbName1}
|
||||
"""
|
||||
|
||||
sql """
|
||||
DROP DATABASE ${dbName4}
|
||||
"""
|
||||
|
||||
sql """
|
||||
DROP EXPIRED STATS
|
||||
"""
|
||||
|
||||
78
regression-test/suites/statistics/show_stats_test.groovy
Normal file
78
regression-test/suites/statistics/show_stats_test.groovy
Normal file
@ -0,0 +1,78 @@
|
||||
// Licensed to the Apache Software Foundation (ASF) under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing,
|
||||
// software distributed under the License is distributed on an
|
||||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
suite("test_show_stats") {
|
||||
def dbName = "stats_test"
|
||||
def tblName = "${dbName}.example_tbl"
|
||||
|
||||
sql "DROP DATABASE IF EXISTS ${dbName}"
|
||||
|
||||
sql "CREATE DATABASE IF NOT EXISTS ${dbName};"
|
||||
|
||||
sql "DROP TABLE IF EXISTS ${tblName}"
|
||||
|
||||
sql """
|
||||
CREATE TABLE IF NOT EXISTS ${tblName} (
|
||||
`user_id` LARGEINT NOT NULL,
|
||||
`date` DATE NOT NULL,
|
||||
`city` VARCHAR(20),
|
||||
`age` SMALLINT,
|
||||
`sex` TINYINT,
|
||||
`last_visit_date` DATETIME REPLACE,
|
||||
`cost` BIGINT SUM,
|
||||
`max_dwell_time` INT MAX,
|
||||
`min_dwell_time` INT MIN
|
||||
) ENGINE=OLAP
|
||||
AGGREGATE KEY(`user_id`, `date`, `city`, `age`, `sex`)
|
||||
PARTITION BY LIST(`date`)
|
||||
(
|
||||
PARTITION `p_201701` VALUES IN ("2017-10-01"),
|
||||
PARTITION `p_201702` VALUES IN ("2017-10-02"),
|
||||
PARTITION `p_201703` VALUES IN ("2017-10-03"),
|
||||
PARTITION `default`
|
||||
)
|
||||
DISTRIBUTED BY HASH(`user_id`) BUCKETS 1
|
||||
PROPERTIES (
|
||||
"replication_num" = "1"
|
||||
);
|
||||
"""
|
||||
|
||||
sql """
|
||||
INSERT INTO ${tblName} (`user_id`, `date`, `city`, `age`,
|
||||
`sex`, `last_visit_date`, `cost`,
|
||||
`max_dwell_time`, `min_dwell_time`)
|
||||
VALUES (10000, "2017-10-01", "北京", 20, 0, "2017-10-01 07:00:00", 15, 2, 2),
|
||||
(10000, "2017-10-01", "北京", 20, 0, "2017-10-01 06:00:00", 20, 10, 10),
|
||||
(10001, "2017-10-01", "北京", 30, 1, "2017-10-01 17:05:45", 2, 22, 22),
|
||||
(10002, "2017-10-02", "上海", 20, 1, "2017-10-02 12:59:12", 200, 5, 5),
|
||||
(10003, "2017-10-02", "广州", 32, 0, "2017-10-02 11:20:00", 30, 11, 11),
|
||||
(10004, "2017-10-01", "深圳", 35, 0, "2017-10-01 10:00:15", 100, 3, 3),
|
||||
(10004, "2017-10-03", "深圳", 35, 0, "2017-10-03 10:20:22", 11, 6, 6);
|
||||
"""
|
||||
|
||||
sql "ANALYZE sync TABLE ${tblName};"
|
||||
|
||||
sql "ANALYZE sync TABLE ${tblName} UPDATE HISTOGRAM;"
|
||||
|
||||
qt_sql "SHOW COLUMN STATS ${tblName}(city);"
|
||||
|
||||
qt_sql "SHOW COLUMN HISTOGRAM ${tblName}(city);"
|
||||
|
||||
sql "DROP DATABASE IF EXISTS ${dbName}"
|
||||
|
||||
sql "CREATE DATABASE IF NOT EXISTS ${dbName};"
|
||||
}
|
||||
Reference in New Issue
Block a user