Add statistics struct and Support manually inject statistics (#6420)

* Add statistics struct and Support manually inject statistics

This PR mainly developed the data structure used by statistical information
and the function of manually modifying the statistical information.
We use a statistics package alone to store statistical information,
and use the 'statistics manager' as a unified entry for statistical information.
For detailed data structure and explanation, please refer to the comments on the class.

Manually modify statistics include: Manually modify table statistics and column statistics.
The syntax is explained in the issue #6370.

* Show table and column statistics

'SHOW TABLE STATS' used to show the statistics of table.
'SHOW COLUMN STATS' used to show the statistics of columns.

Currently, only the tables and columns for setting statistics
will be displayed in the results.
This commit is contained in:
EmmyMiao87
2021-08-16 17:20:05 +08:00
committed by GitHub
parent 4be06a470f
commit 63a0d9d23a
16 changed files with 1010 additions and 14 deletions

View File

@ -263,7 +263,7 @@ terminal String KW_ADD, KW_ADMIN, KW_AFTER, KW_AGGREGATE, KW_ALIAS, KW_ALL, KW_A
KW_RIGHT, KW_ROLE, KW_ROLES, KW_ROLLBACK, KW_ROLLUP, KW_ROUTINE, KW_ROW, KW_ROWS,
KW_S3, KW_SCHEMA, KW_SCHEMAS, KW_SECOND, KW_SELECT, KW_SEMI, KW_SERIALIZABLE, KW_SESSION, KW_SET, KW_SETS, KW_SET_VAR, KW_SHOW, KW_SIGNED,
KW_SKEW,
KW_SMALLINT, KW_SNAPSHOT, KW_SONAME, KW_SPLIT, KW_START, KW_STATUS, KW_STOP, KW_STORAGE, KW_STREAM, KW_STRING, KW_STRUCT,
KW_SMALLINT, KW_SNAPSHOT, KW_SONAME, KW_SPLIT, KW_START, KW_STATUS, KW_STATS, KW_STOP, KW_STORAGE, KW_STREAM, KW_STRING, KW_STRUCT,
KW_SUM, KW_SUPERUSER, KW_SYNC, KW_SYSTEM,
KW_TABLE, KW_TABLES, KW_TABLET, KW_TASK, KW_TEMPORARY, KW_TERMINATED, KW_THAN, KW_TIME, KW_THEN, KW_TIMESTAMP, KW_TINYINT,KW_TRASH,
KW_TO, KW_TRANSACTION, KW_TRIGGERS, KW_TRIM, KW_TRUE, KW_TRUNCATE, KW_TYPE, KW_TYPES,
@ -351,7 +351,7 @@ nonterminal ArrayList<String> ident_list;
nonterminal PartitionNames opt_partition_names, partition_names;
nonterminal ClusterName cluster_name;
nonterminal ClusterName des_cluster_name;
nonterminal TableName table_name;
nonterminal TableName table_name, opt_table_name;
nonterminal FunctionName function_name;
nonterminal EncryptKeyName encryptkey_name;
nonterminal Expr pre_filter_clause;
@ -415,7 +415,8 @@ nonterminal SetVar option_value, option_value_follow_option_type, option_value_n
nonterminal List<SetVar> option_value_list, option_value_list_continued, start_option_value_list,
start_option_value_list_following_option_type, user_property_list;
nonterminal Map<String, String> key_value_map, opt_key_value_map, opt_properties, opt_ext_properties, opt_enable_feature_properties;
nonterminal Map<String, String> key_value_map, opt_key_value_map, opt_properties,
opt_ext_properties, opt_enable_feature_properties, properties;
nonterminal ColumnDef column_definition;
nonterminal IndexDef index_definition;
nonterminal ArrayList<ColumnDef> column_definition_list;
@ -832,6 +833,20 @@ alter_stmt ::=
{:
RESULT = new AlterSqlBlockRuleStmt(ruleName, properties);
:}
| KW_ALTER KW_TABLE table_name:tbl KW_SET KW_STATS LPAREN key_value_map:map RPAREN
{:
RESULT = new AlterTableStatsStmt(tbl, map);
:}
| KW_ALTER KW_TABLE table_name:tbl KW_MODIFY KW_COLUMN ident:columnName
KW_SET KW_STATS LPAREN key_value_map:map RPAREN
{:
RESULT = new AlterColumnStatsStmt(tbl, columnName, map);
:}
| KW_ALTER KW_TABLE table_name:tbl KW_SET LPAREN key_value_map:properties RPAREN
{:
ModifyTablePropertiesClause clause = new ModifyTablePropertiesClause(properties);
RESULT = new AlterTableStmt(tbl, Lists.newArrayList(clause));
:}
;
opt_datasource_properties ::=
@ -986,10 +1001,6 @@ alter_table_clause ::=
{:
RESULT = new ReorderColumnsClause(cols, rollup, properties);
:}
| KW_SET LPAREN key_value_map:properties RPAREN
{:
RESULT = new ModifyTablePropertiesClause(properties);
:}
| KW_ADD opt_tmp:isTempPartition single_partition_desc:desc opt_distribution:distribution opt_properties:properties
{:
RESULT = new AddPartitionClause(desc, distribution, properties, isTempPartition);
@ -1066,9 +1077,9 @@ opt_enable_feature_properties ::=
{:
RESULT = null;
:}
| KW_WITH KW_PROPERTIES LPAREN key_value_map:map RPAREN
| KW_WITH properties:properties
{:
RESULT = map;
RESULT = properties;
:}
;
@ -2271,9 +2282,9 @@ opt_properties ::=
{:
RESULT = null;
:}
| KW_PROPERTIES LPAREN key_value_map:map RPAREN
| properties:properties
{:
RESULT = map;
RESULT = properties;
:}
;
@ -2281,7 +2292,14 @@ opt_ext_properties ::=
{:
RESULT = null;
:}
| KW_BROKER KW_PROPERTIES LPAREN key_value_map:map RPAREN
| KW_BROKER properties:properties
{:
RESULT = properties;
:}
;
properties ::=
KW_PROPERTIES LPAREN key_value_map:map RPAREN
{:
RESULT = map;
:}
@ -2769,6 +2787,16 @@ show_param ::=
{:
RESULT = new ShowSyncJobStmt(dbName);
:}
/* show table stats */
| KW_TABLE KW_STATS opt_table_name:tbl
{:
RESULT = new ShowTableStatsStmt(tbl);
:}
/* show column stats */
| KW_COLUMN KW_STATS table_name:tbl
{:
RESULT = new ShowColumnStatsStmt(tbl);
:}
;
opt_tmp ::=
@ -3818,6 +3846,16 @@ star_expr ::=
:}
;
opt_table_name ::=
{:
RESULT = null;
:}
| table_name:tbl
{:
RESULT = tbl;
:}
;
table_name ::=
ident:tbl
{: RESULT = new TableName(null, tbl); :}
@ -5295,6 +5333,8 @@ keyword ::=
{: RESULT = id; :}
| KW_STATUS:id
{: RESULT = id; :}
| KW_STATS:id
{: RESULT = id; :}
| KW_STORAGE:id
{: RESULT = id; :}
| KW_STRING:id

View File

@ -0,0 +1,87 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
package org.apache.doris.analysis;
import org.apache.doris.catalog.Catalog;
import org.apache.doris.common.AnalysisException;
import org.apache.doris.common.ErrorCode;
import org.apache.doris.common.ErrorReport;
import org.apache.doris.common.UserException;
import org.apache.doris.mysql.privilege.PrivPredicate;
import org.apache.doris.qe.ConnectContext;
import org.apache.doris.statistics.ColumnStats;
import com.google.common.collect.ImmutableSet;
import java.util.Map;
import java.util.Optional;
public class AlterColumnStatsStmt extends DdlStmt {
private static final ImmutableSet<String> CONFIGURABLE_PROPERTIES_SET = new ImmutableSet.Builder<String>()
.add(ColumnStats.NDV)
.add(ColumnStats.AVG_SIZE)
.add(ColumnStats.MAX_SIZE)
.add(ColumnStats.NUM_NULLS)
.add(ColumnStats.MIN_VALUE)
.add(ColumnStats.MAX_VALUE)
.build();
private TableName tableName;
private String columnName;
private Map<String, String> properties;
public AlterColumnStatsStmt(TableName tableName, String columnName, Map<String, String> properties) {
this.tableName = tableName;
this.columnName = columnName;
this.properties = properties;
}
@Override
public void analyze(Analyzer analyzer) throws UserException {
super.analyze(analyzer);
// check table name
tableName.analyze(analyzer);
// check properties
Optional<String> optional = properties.keySet().stream().filter(
entity -> !CONFIGURABLE_PROPERTIES_SET.contains(entity.toLowerCase())).findFirst();
if (optional.isPresent()) {
throw new AnalysisException(optional.get() + " is invalid statistic");
}
// check auth
if (!Catalog.getCurrentCatalog().getAuth().checkTblPriv(ConnectContext.get(), tableName.getDb(), tableName.getTbl(),
PrivPredicate.ALTER)) {
ErrorReport.reportAnalysisException(ErrorCode.ERR_TABLEACCESS_DENIED_ERROR, "ALTER COLUMN STATS",
ConnectContext.get().getQualifiedUser(),
ConnectContext.get().getRemoteIP(),
tableName.getTbl());
}
}
public TableName getTableName() {
return tableName;
}
public String getColumnName() {
return columnName;
}
public Map<String, String> getProperties() {
return properties;
}
}

View File

@ -0,0 +1,77 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
package org.apache.doris.analysis;
import org.apache.doris.catalog.Catalog;
import org.apache.doris.common.AnalysisException;
import org.apache.doris.common.ErrorCode;
import org.apache.doris.common.ErrorReport;
import org.apache.doris.common.UserException;
import org.apache.doris.mysql.privilege.PrivPredicate;
import org.apache.doris.qe.ConnectContext;
import org.apache.doris.statistics.TableStats;
import com.google.common.collect.ImmutableSet;
import java.util.Map;
import java.util.Optional;
public class AlterTableStatsStmt extends DdlStmt {
private static final ImmutableSet<String> CONFIGURABLE_PROPERTIES_SET = new ImmutableSet.Builder<String>()
.add(TableStats.DATA_SIZE)
.add(TableStats.ROW_COUNT)
.build();
private TableName tableName;
private Map<String, String> properties;
public AlterTableStatsStmt(TableName tableName, Map<String, String> properties) {
this.tableName = tableName;
this.properties = properties;
}
@Override
public void analyze(Analyzer analyzer) throws UserException {
super.analyze(analyzer);
// check table name
tableName.analyze(analyzer);
// check properties
Optional<String> optional = properties.keySet().stream().filter(
entity -> !CONFIGURABLE_PROPERTIES_SET.contains(entity.toLowerCase())).findFirst();
if (optional.isPresent()) {
throw new AnalysisException(optional.get() + " is invalid statistic");
}
// check auth
if (!Catalog.getCurrentCatalog().getAuth().checkTblPriv(ConnectContext.get(), tableName.getDb(), tableName.getTbl(),
PrivPredicate.ALTER)) {
ErrorReport.reportAnalysisException(ErrorCode.ERR_TABLEACCESS_DENIED_ERROR, "ALTER TABLE STATS",
ConnectContext.get().getQualifiedUser(),
ConnectContext.get().getRemoteIP(),
tableName.getTbl());
}
}
public TableName getTableName() {
return tableName;
}
public Map<String, String> getProperties() {
return properties;
}
}

View File

@ -0,0 +1,67 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
package org.apache.doris.analysis;
import org.apache.doris.catalog.Column;
import org.apache.doris.catalog.ScalarType;
import org.apache.doris.common.AnalysisException;
import org.apache.doris.common.UserException;
import org.apache.doris.qe.ShowResultSetMetaData;
import org.apache.doris.statistics.ColumnStats;
import com.google.common.collect.ImmutableList;
public class ShowColumnStatsStmt extends ShowStmt {
private static final ImmutableList<String> TITLE_NAMES =
new ImmutableList.Builder<String>()
.add("column_name")
.add(ColumnStats.NDV)
.add(ColumnStats.AVG_SIZE)
.add(ColumnStats.MAX_SIZE)
.add(ColumnStats.NUM_NULLS)
.add(ColumnStats.MIN_VALUE)
.add(ColumnStats.MAX_VALUE)
.build();
private TableName tableName;
public ShowColumnStatsStmt(TableName tableName) {
this.tableName = tableName;
}
public TableName getTableName() {
return tableName;
}
@Override
public void analyze(Analyzer analyzer) throws AnalysisException, UserException {
super.analyze(analyzer);
tableName.analyze(analyzer);
}
@Override
public ShowResultSetMetaData getMetaData() {
ShowResultSetMetaData.Builder builder = ShowResultSetMetaData.builder();
for (String title : TITLE_NAMES) {
builder.addColumn(new Column(title, ScalarType.createVarchar(30)));
}
return builder.build();
}
}

View File

@ -0,0 +1,90 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
package org.apache.doris.analysis;
import org.apache.doris.catalog.Column;
import org.apache.doris.catalog.ScalarType;
import org.apache.doris.common.ErrorCode;
import org.apache.doris.common.ErrorReport;
import org.apache.doris.common.UserException;
import org.apache.doris.qe.ShowResultSetMetaData;
import org.apache.doris.statistics.TableStats;
import com.google.common.collect.ImmutableList;
import org.apache.parquet.Preconditions;
import org.apache.parquet.Strings;
public class ShowTableStatsStmt extends ShowStmt {
private static final ImmutableList<String> TITLE_NAMES =
new ImmutableList.Builder<String>()
.add("table_name")
.add(TableStats.ROW_COUNT)
.add(TableStats.DATA_SIZE)
.build();
private TableName tableName;
// after analyzed
// There is only on attribute for both @tableName and @dbName at the same time.
private String dbName;
public ShowTableStatsStmt(TableName tableName) {
this.tableName = tableName;
}
public String getTableName() {
Preconditions.checkArgument(isAnalyzed(), "The db name must be obtained after the parsing is complete");
if (tableName == null) {
return null;
}
return tableName.getTbl();
}
public String getDbName() {
Preconditions.checkArgument(isAnalyzed(), "The db name must be obtained after the parsing is complete");
if (tableName == null) {
return dbName;
}
return tableName.getDb();
}
@Override
public void analyze(Analyzer analyzer) throws UserException {
super.analyze(analyzer);
if (tableName == null) {
dbName = analyzer.getDefaultDb();
if (Strings.isNullOrEmpty(dbName)) {
ErrorReport.reportAnalysisException(ErrorCode.ERR_NO_DB_ERROR);
}
return;
}
tableName.analyze(analyzer);
}
@Override
public ShowResultSetMetaData getMetaData() {
ShowResultSetMetaData.Builder builder = ShowResultSetMetaData.builder();
for (String title : TITLE_NAMES) {
builder.addColumn(new Column(title, ScalarType.createVarchar(30)));
}
return builder.build();
}
}

View File

@ -209,6 +209,7 @@ import org.apache.doris.qe.JournalObservable;
import org.apache.doris.qe.SessionVariable;
import org.apache.doris.qe.VariableMgr;
import org.apache.doris.service.FrontendOptions;
import org.apache.doris.statistics.StatisticsManager;
import org.apache.doris.system.Backend;
import org.apache.doris.system.Backend.BackendState;
import org.apache.doris.system.Frontend;
@ -393,6 +394,7 @@ public class Catalog {
private DeployManager deployManager;
private TabletStatMgr tabletStatMgr;
private StatisticsManager statisticsManager;
private PaloAuth auth;
@ -545,7 +547,9 @@ public class Catalog {
this.resourceMgr = new ResourceMgr();
this.globalTransactionMgr = new GlobalTransactionMgr(this);
this.tabletStatMgr = new TabletStatMgr();
this.statisticsManager = new StatisticsManager();
this.auth = new PaloAuth();
this.domainResolver = new DomainResolver(auth);
@ -690,6 +694,10 @@ public class Catalog {
return getCurrentCatalog().getAuditEventProcessor();
}
public StatisticsManager getStatisticsManager() {
return statisticsManager;
}
// Use tryLock to avoid potential dead lock
private boolean tryLock(boolean mustLock) {
while (true) {

View File

@ -347,6 +347,30 @@ public class Util {
return result;
}
public static float getFloatPropertyOrDefault(String valStr, float defaultVal, Predicate<Float> pred,
String hintMsg) throws AnalysisException {
if (Strings.isNullOrEmpty(valStr)) {
return defaultVal;
}
float result = defaultVal;
try {
result = Float.valueOf(valStr);
} catch (NumberFormatException e) {
throw new AnalysisException(hintMsg);
}
if (pred == null) {
return result;
}
if (!pred.test(result)) {
throw new AnalysisException(hintMsg);
}
return result;
}
public static boolean getBooleanPropertyOrDefault(String valStr, boolean defaultVal, String hintMsg)
throws AnalysisException {
if (Strings.isNullOrEmpty(valStr)) {

View File

@ -23,11 +23,13 @@ import org.apache.doris.analysis.AdminRepairTableStmt;
import org.apache.doris.analysis.AdminSetConfigStmt;
import org.apache.doris.analysis.AdminSetReplicaStatusStmt;
import org.apache.doris.analysis.AlterClusterStmt;
import org.apache.doris.analysis.AlterColumnStatsStmt;
import org.apache.doris.analysis.AlterDatabaseQuotaStmt;
import org.apache.doris.analysis.AlterDatabaseRename;
import org.apache.doris.analysis.AlterRoutineLoadStmt;
import org.apache.doris.analysis.AlterSqlBlockRuleStmt;
import org.apache.doris.analysis.AlterSystemStmt;
import org.apache.doris.analysis.AlterTableStatsStmt;
import org.apache.doris.analysis.AlterTableStmt;
import org.apache.doris.analysis.AlterViewStmt;
import org.apache.doris.analysis.BackupStmt;
@ -131,6 +133,10 @@ public class DdlExecutor {
catalog.dropMaterializedView((DropMaterializedViewStmt) ddlStmt);
} else if (ddlStmt instanceof AlterTableStmt) {
catalog.alterTable((AlterTableStmt) ddlStmt);
} else if (ddlStmt instanceof AlterTableStatsStmt) {
catalog.getStatisticsManager().alterTableStatistics((AlterTableStatsStmt) ddlStmt);
} else if (ddlStmt instanceof AlterColumnStatsStmt) {
catalog.getStatisticsManager().alterColumnStatistics((AlterColumnStatsStmt) ddlStmt);
} else if (ddlStmt instanceof AlterViewStmt) {
catalog.alterView((AlterViewStmt) ddlStmt);
} else if (ddlStmt instanceof CancelAlterTableStmt) {

View File

@ -31,6 +31,7 @@ import org.apache.doris.analysis.ShowBackupStmt;
import org.apache.doris.analysis.ShowBrokerStmt;
import org.apache.doris.analysis.ShowClusterStmt;
import org.apache.doris.analysis.ShowCollationStmt;
import org.apache.doris.analysis.ShowColumnStatsStmt;
import org.apache.doris.analysis.ShowColumnStmt;
import org.apache.doris.analysis.ShowCreateDbStmt;
import org.apache.doris.analysis.ShowCreateFunctionStmt;
@ -48,7 +49,6 @@ import org.apache.doris.analysis.ShowFrontendsStmt;
import org.apache.doris.analysis.ShowFunctionsStmt;
import org.apache.doris.analysis.ShowGrantsStmt;
import org.apache.doris.analysis.ShowIndexStmt;
import org.apache.doris.analysis.ShowEncryptKeysStmt;
import org.apache.doris.analysis.ShowLoadProfileStmt;
import org.apache.doris.analysis.ShowLoadStmt;
import org.apache.doris.analysis.ShowLoadWarningsStmt;
@ -73,12 +73,13 @@ import org.apache.doris.analysis.ShowStmt;
import org.apache.doris.analysis.ShowStreamLoadStmt;
import org.apache.doris.analysis.ShowSyncJobStmt;
import org.apache.doris.analysis.ShowTableIdStmt;
import org.apache.doris.analysis.ShowTableStatsStmt;
import org.apache.doris.analysis.ShowTableStatusStmt;
import org.apache.doris.analysis.ShowTableStmt;
import org.apache.doris.analysis.ShowTabletStmt;
import org.apache.doris.analysis.ShowTransactionStmt;
import org.apache.doris.analysis.ShowTrashStmt;
import org.apache.doris.analysis.ShowTrashDiskStmt;
import org.apache.doris.analysis.ShowTrashStmt;
import org.apache.doris.analysis.ShowUserPropertyStmt;
import org.apache.doris.analysis.ShowVariablesStmt;
import org.apache.doris.analysis.ShowViewStmt;
@ -152,6 +153,7 @@ import org.apache.doris.system.Backend;
import org.apache.doris.system.SystemInfoService;
import org.apache.doris.thrift.TUnit;
import org.apache.doris.transaction.GlobalTransactionMgr;
import com.google.common.base.Preconditions;
import com.google.common.base.Strings;
import com.google.common.collect.Lists;
@ -317,6 +319,10 @@ public class ShowExecutor {
handleShowSyncJobs();
} else if (stmt instanceof ShowSqlBlockRuleStmt) {
handleShowSqlBlockRule();
} else if (stmt instanceof ShowTableStatsStmt) {
handleShowTableStats();
} else if (stmt instanceof ShowColumnStatsStmt) {
handleShowColumnStats();
} else {
handleEmtpy();
}
@ -2092,6 +2098,20 @@ public class ShowExecutor {
resultSet = new ShowResultSet(showStmt.getMetaData(), results);
}
private void handleShowTableStats() throws AnalysisException {
ShowTableStatsStmt showTableStatsStmt = (ShowTableStatsStmt) stmt;
List<List<String>> results = Catalog.getCurrentCatalog().getStatisticsManager()
.showTableStatsList(showTableStatsStmt.getDbName(), showTableStatsStmt.getTableName());
resultSet = new ShowResultSet(showTableStatsStmt.getMetaData(), results);
}
private void handleShowColumnStats() throws AnalysisException {
ShowColumnStatsStmt showColumnStatsStmt = (ShowColumnStatsStmt) stmt;
List<List<String>> results = Catalog.getCurrentCatalog().getStatisticsManager()
.showColumnStatsList(showColumnStatsStmt.getTableName());
resultSet = new ShowResultSet(showColumnStatsStmt.getMetaData(), results);
}
public void handleShowSqlBlockRule() throws AnalysisException {
ShowSqlBlockRuleStmt showStmt = (ShowSqlBlockRuleStmt) stmt;
List<List<String>> rows = Lists.newArrayList();

View File

@ -0,0 +1,165 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
package org.apache.doris.statistics;
import org.apache.doris.analysis.BoolLiteral;
import org.apache.doris.analysis.DateLiteral;
import org.apache.doris.analysis.DecimalLiteral;
import org.apache.doris.analysis.FloatLiteral;
import org.apache.doris.analysis.IntLiteral;
import org.apache.doris.analysis.LargeIntLiteral;
import org.apache.doris.analysis.LiteralExpr;
import org.apache.doris.analysis.StringLiteral;
import org.apache.doris.catalog.PrimitiveType;
import org.apache.doris.catalog.ScalarType;
import org.apache.doris.catalog.Type;
import org.apache.doris.common.AnalysisException;
import org.apache.doris.common.util.Util;
import com.google.common.base.Preconditions;
import java.util.List;
import java.util.Map;
import java.util.function.Predicate;
import com.clearspring.analytics.util.Lists;
/**
* There are the statistics of column.
* The column stats are mainly used to provide input for the Optimizer's cost model.
* <p>
* The description of column stats are following:
* 1. @ndv: The number distinct values of column.
* 2. @avgSize: The average size of column. The unit is bytes.
* 3. @maxSize: The max size of column. The unit is bytes.
* 4. @numNulls: The number of nulls.
* 5. @minValue: The min value of column.
* 6. @maxValue: The max value of column.
* <p>
* The granularity of the statistics is whole table.
* For example:
* "@ndv = 10" means that the number distinct values is 10 in the whole table.
*/
public class ColumnStats {
public static final String NDV = "ndv";
public static final String AVG_SIZE = "avg_size";
public static final String MAX_SIZE = "max_size";
public static final String NUM_NULLS = "num_nulls";
public static final String MIN_VALUE = "min_value";
public static final String MAX_VALUE = "max_value";
private static final Predicate<Long> DESIRED_NDV_PRED = (v) -> v >= -1L;
private static final Predicate<Float> DESIRED_AVG_SIZE_PRED = (v) -> (v == -1) || (v >= 0);
private static final Predicate<Long> DESIRED_MAX_SIZE_PRED = (v) -> v >= -1L;
private static final Predicate<Long> DESIRED_NUM_NULLS_PRED = (v) -> v >= -1L;
private long ndv = -1;
private float avgSize = -1; // in bytes
private long maxSize = -1; // in bytes
private long numNulls = -1;
private LiteralExpr minValue;
private LiteralExpr maxValue;
public void updateStats(Type columnType, Map<String, String> statsNameToValue) throws AnalysisException {
for (Map.Entry<String, String> entry : statsNameToValue.entrySet()) {
String statsName = entry.getKey();
if (statsName.equalsIgnoreCase(NDV)) {
ndv = Util.getLongPropertyOrDefault(entry.getValue(), ndv,
DESIRED_NDV_PRED, NDV + " should >= -1");
} else if (statsName.equalsIgnoreCase(AVG_SIZE)) {
avgSize = Util.getFloatPropertyOrDefault(entry.getValue(), avgSize,
DESIRED_AVG_SIZE_PRED, AVG_SIZE + " should (>=0) or (=-1)");
} else if (statsName.equalsIgnoreCase(MAX_SIZE)) {
maxSize = Util.getLongPropertyOrDefault(entry.getValue(), maxSize,
DESIRED_MAX_SIZE_PRED, MAX_SIZE + " should >=-1");
} else if (statsName.equalsIgnoreCase(NUM_NULLS)) {
numNulls = Util.getLongPropertyOrDefault(entry.getValue(), numNulls,
DESIRED_NUM_NULLS_PRED, NUM_NULLS + " should >=-1");
} else if (statsName.equalsIgnoreCase(MIN_VALUE)) {
minValue = validateColumnValue(columnType, entry.getValue());
} else if (statsName.equalsIgnoreCase(MAX_VALUE)) {
maxValue = validateColumnValue(columnType, entry.getValue());
}
}
}
public List<String> getShowInfo() {
List<String> result = Lists.newArrayList();
result.add(Long.toString(ndv));
result.add(Float.toString(avgSize));
result.add(Long.toString(maxSize));
result.add(Long.toString(numNulls));
if (minValue != null) {
result.add(minValue.getStringValue());
} else {
result.add("N/A");
}
if (maxValue != null) {
result.add(maxValue.getStringValue());
} else {
result.add("N/A");
}
return result;
}
private LiteralExpr validateColumnValue(Type type, String columnValue) throws AnalysisException {
Preconditions.checkArgument(type.isScalarType());
ScalarType scalarType = (ScalarType) type;
// check if default value is valid.
// if not, some literal constructor will throw AnalysisException
PrimitiveType primitiveType = scalarType.getPrimitiveType();
switch (primitiveType) {
case BOOLEAN:
return new BoolLiteral(columnValue);
case TINYINT:
case SMALLINT:
case INT:
case BIGINT:
return new IntLiteral(columnValue, type);
case LARGEINT:
return new LargeIntLiteral(columnValue);
case FLOAT:
// the min max value will loose precision when value type is double.
case DOUBLE:
return new FloatLiteral(columnValue);
case DECIMALV2:
DecimalLiteral decimalLiteral = new DecimalLiteral(columnValue);
decimalLiteral.checkPrecisionAndScale(scalarType.getScalarPrecision(), scalarType.getScalarScale());
return decimalLiteral;
case DATE:
case DATETIME:
return new DateLiteral(columnValue, type);
case CHAR:
case VARCHAR:
if (columnValue.length() > scalarType.getLength()) {
throw new AnalysisException("Min/Max value is longer than length of column type: "
+ columnValue);
}
return new StringLiteral(columnValue);
case HLL:
case BITMAP:
case ARRAY:
case MAP:
case STRUCT:
default:
throw new AnalysisException("Unsupported setting this type: " + type + " of min max value");
}
}
}

View File

@ -0,0 +1,46 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
package org.apache.doris.statistics;
import com.google.common.collect.Maps;
import java.util.Map;
/**
* There are the statistics of OlapTable.
* The @OlapTableStats are mainly used to provide input for the Optimizer's cost model.
*
* There are three kinds of statistics of OlapTable.
* @rowCount: The row count of OlapTable. There are two ways to obtain value:
* 1. The sum row count of @TabletStats which maybe an inaccurate value.
* 2. count(*) of OlapTable which is an accurate value.
* @dataSize: The data size of OlapTable. This is an inaccurate value,
* which is obtained by summing the @dataSize of @TabletStats.
* @idToTabletStats: <@Long tabletId, @TabletStats tabletStats>
* Each tablet in the OlapTable will have corresponding @TabletStats.
* Those @TabletStats are recorded in @idToTabletStats form of MAP.
* This facilitates the optimizer to quickly find the corresponding
* @TabletStats based on the tablet id.
* At the same time, both @rowCount and @dataSize can also be obtained
* from the sum of all @TabletStats.
*
*/
public class OlapTableStats extends TableStats {
private Map<Long, TabletStats> idToTabletStats = Maps.newHashMap();
}

View File

@ -0,0 +1,73 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
package org.apache.doris.statistics;
import org.apache.doris.catalog.Type;
import org.apache.doris.common.AnalysisException;
import com.google.common.collect.Maps;
import java.util.Map;
/**
* There are the statistics of all of tables.
* The @Statistics are mainly used to provide input for the Optimizer's cost model.
*
* @idToTableStats: <@Long tableId, @TableStats tableStats>
* Each table will have corresponding @TableStats.
* Those @TableStats are recorded in @idToTableStats form of MAP.
* This facilitates the optimizer to quickly find the corresponding
* @TableStats based on the table id.
*/
public class Statistics {
private Map<Long, TableStats> idToTableStats = Maps.newConcurrentMap();
public void updateTableStats(long tableId, Map<String, String> statsNameToValue)
throws AnalysisException {
TableStats tableStats = idToTableStats.get(tableId);
if (tableStats == null) {
tableStats = new TableStats();
idToTableStats.put(tableId, tableStats);
}
tableStats.updateTableStats(statsNameToValue);
}
public void updateColumnStats(long tableId, String columnName, Type columnType,
Map<String, String> statsNameToValue)
throws AnalysisException {
TableStats tableStats = idToTableStats.get(tableId);
if (tableStats == null) {
tableStats = new TableStats();
idToTableStats.put(tableId, tableStats);
}
tableStats.updateColumnStats(columnName, columnType, statsNameToValue);
}
public TableStats getTableStats(long tableId) {
return idToTableStats.get(tableId);
}
public Map<String, ColumnStats> getColumnStats(long tableId) {
TableStats tableStats = getTableStats(tableId);
if (tableStats == null) {
return null;
}
return tableStats.getNameToColumnStats();
}
}

View File

@ -0,0 +1,152 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
package org.apache.doris.statistics;
import org.apache.doris.analysis.AlterColumnStatsStmt;
import org.apache.doris.analysis.AlterTableStatsStmt;
import org.apache.doris.analysis.TableName;
import org.apache.doris.catalog.Catalog;
import org.apache.doris.catalog.Column;
import org.apache.doris.catalog.Database;
import org.apache.doris.catalog.Table;
import org.apache.doris.common.AnalysisException;
import org.apache.doris.common.ErrorCode;
import org.apache.doris.common.ErrorReport;
import org.apache.doris.mysql.privilege.PrivPredicate;
import org.apache.doris.qe.ConnectContext;
import java.util.List;
import java.util.Map;
import com.clearspring.analytics.util.Lists;
public class StatisticsManager {
private Statistics statistics;
public StatisticsManager() {
statistics = new Statistics();
}
public void alterTableStatistics(AlterTableStatsStmt stmt)
throws AnalysisException {
Table table = validateTableName(stmt.getTableName());
statistics.updateTableStats(table.getId(), stmt.getProperties());
}
public void alterColumnStatistics(AlterColumnStatsStmt stmt) throws AnalysisException {
Table table = validateTableName(stmt.getTableName());
String columnName = stmt.getColumnName();
Column column = table.getColumn(columnName);
if (column == null) {
ErrorReport.reportAnalysisException(ErrorCode.ERR_BAD_FIELD_ERROR, columnName, table.getName());
}
// match type and column value
statistics.updateColumnStats(table.getId(), columnName, column.getType(), stmt.getProperties());
}
public List<List<String>> showTableStatsList(String dbName, String tableName)
throws AnalysisException {
Database db = Catalog.getCurrentCatalog().getDb(dbName);
if (db == null) {
ErrorReport.reportAnalysisException(ErrorCode.ERR_BAD_DB_ERROR, dbName);
}
List<List<String>> result = Lists.newArrayList();
if (tableName != null) {
Table table = db.getTable(tableName);
// check meta
if (table == null) {
ErrorReport.reportAnalysisException(ErrorCode.ERR_BAD_TABLE_ERROR, tableName);
}
// check priv
if (!Catalog.getCurrentCatalog().getAuth().checkTblPriv(ConnectContext.get(), dbName, tableName,
PrivPredicate.SHOW)) {
ErrorReport.reportAnalysisException(ErrorCode.ERR_TABLEACCESS_DENIED_ERROR, "SHOW CREATE TABLE",
ConnectContext.get().getQualifiedUser(),
ConnectContext.get().getRemoteIP(),
tableName);
}
// get stats
result.add(showTableStats(table));
} else {
for (Table table : db.getTables()) {
if (!Catalog.getCurrentCatalog().getAuth().checkTblPriv(ConnectContext.get(), dbName, table.getName(),
PrivPredicate.SHOW)) {
continue;
}
try {
result.add(showTableStats(table));
} catch (AnalysisException e) {
// ignore no stats table
}
}
}
return result;
}
public List<List<String>> showColumnStatsList(TableName tableName) throws AnalysisException {
// check meta
Table table = validateTableName(tableName);
// check priv
if (!Catalog.getCurrentCatalog().getAuth().checkTblPriv(ConnectContext.get(), tableName.getDb(),
tableName.getTbl(), PrivPredicate.SHOW)) {
ErrorReport.reportAnalysisException(ErrorCode.ERR_TABLEACCESS_DENIED_ERROR, "SHOW CREATE TABLE",
ConnectContext.get().getQualifiedUser(),
ConnectContext.get().getRemoteIP(),
tableName.getTbl());
}
// get stats
List<List<String>> result = Lists.newArrayList();
Map<String, ColumnStats> nameToColumnStats = statistics.getColumnStats(table.getId());
if (nameToColumnStats == null) {
throw new AnalysisException("There is no column statistics in this table:" + table.getName());
}
for (Map.Entry<String, ColumnStats> entry : nameToColumnStats.entrySet()) {
List<String> row = Lists.newArrayList();
row.add(entry.getKey());
row.addAll(entry.getValue().getShowInfo());
result.add(row);
}
return result;
}
private List<String> showTableStats(Table table) throws AnalysisException {
TableStats tableStats = statistics.getTableStats(table.getId());
if (tableStats == null) {
throw new AnalysisException("There is no statistics in this table:" + table.getName());
}
List<String> row = Lists.newArrayList();
row.add(table.getName());
row.addAll(tableStats.getShowInfo());
return row;
}
private Table validateTableName(TableName dbTableName) throws AnalysisException {
String dbName = dbTableName.getDb();
String tableName = dbTableName.getTbl();
Database db = Catalog.getCurrentCatalog().getDb(dbName);
if (db == null) {
ErrorReport.reportAnalysisException(ErrorCode.ERR_BAD_DB_ERROR, dbName);
}
Table table = db.getTable(tableName);
if (table == null) {
ErrorReport.reportAnalysisException(ErrorCode.ERR_BAD_TABLE_ERROR, tableName);
}
return table;
}
}

View File

@ -0,0 +1,97 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
package org.apache.doris.statistics;
import org.apache.doris.catalog.Type;
import org.apache.doris.common.AnalysisException;
import org.apache.doris.common.util.Util;
import com.google.common.collect.Maps;
import java.util.List;
import java.util.Map;
import java.util.function.Predicate;
import com.clearspring.analytics.util.Lists;
/**
* There are the statistics of table.
* The table stats are mainly used to provide input for the Optimizer's cost model.
* <p>
* The description of table stats are following:
* 1. @rowCount: The row count of table.
* 2. @dataSize: The data size of table.
* 3. @nameToColumnStats: <@String columnName, @ColumnStats columnStats>
* Each column in the Table will have corresponding @ColumnStats.
* Those @ColumnStats are recorded in @nameToColumnStats form of MAP.
* This facilitates the optimizer to quickly find the corresponding
* @ColumnStats based on the column name.
*
* @rowCount: The row count of table.
* @dataSize: The data size of table.
* <p>
* The granularity of the statistics is whole table.
* For example:
* "@rowCount = 1000" means that the row count is 1000 in the whole table.
*/
public class TableStats {
public static final String ROW_COUNT = "row_count";
public static final String DATA_SIZE = "data_size";
private static final Predicate<Long> DESIRED_ROW_COUNT_PRED = (v) -> v >= -1L;
private static final Predicate<Long> DESIRED_DATA_SIZE_PRED = (v) -> v >= -1L;
private long rowCount = -1;
private long dataSize = -1;
private Map<String, ColumnStats> nameToColumnStats = Maps.newConcurrentMap();
public void updateTableStats(Map<String, String> statsNameToValue) throws AnalysisException {
for (Map.Entry<String, String> entry : statsNameToValue.entrySet()) {
String statsName = entry.getKey();
if (statsName.equalsIgnoreCase(ROW_COUNT)) {
rowCount = Util.getLongPropertyOrDefault(entry.getValue(), rowCount,
DESIRED_ROW_COUNT_PRED, ROW_COUNT + " should >= -1");
} else if (statsName.equalsIgnoreCase(DATA_SIZE)) {
dataSize = Util.getLongPropertyOrDefault(entry.getValue(), dataSize,
DESIRED_DATA_SIZE_PRED, DATA_SIZE + " should >= -1");
}
}
}
public void updateColumnStats(String columnName, Type columnType, Map<String, String> statsNameToValue)
throws AnalysisException {
ColumnStats columnStats = nameToColumnStats.get(columnName);
if (columnStats == null) {
columnStats = new ColumnStats();
nameToColumnStats.put(columnName, columnStats);
}
columnStats.updateStats(columnType, statsNameToValue);
}
public List<String> getShowInfo() {
List<String> result = Lists.newArrayList();
result.add(Long.toString(rowCount));
result.add(Long.toString(dataSize));
return result;
}
public Map<String, ColumnStats> getNameToColumnStats() {
return nameToColumnStats;
}
}

View File

@ -0,0 +1,43 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
package org.apache.doris.statistics;
/**
* There are the statistics of one tablet.
* The tablet stats are mainly used to provide input for the Optimizer's cost model.
*
* The description of tablet stats are following:
* 1. @rowCount: The row count of tablet.
* 2. @dataSize: The data size of tablet.
*
* @rowCount: The row count of tablet. There are two ways to update:
* 1. The rowCount from tablet meta. The value obtained by this update method
* may be an inaccurate value.
* 2. The result of count(*) query from one tablet. The value obtained by this update method
* is accurate.
* @dataSize: The data size of tablet. This is a inaccurate value of one tablet.
*
* The granularity of the statistics is one tablet.
* For example:
* "@rowCount = 10" means that the row count is 1000 in one tablet.
*/
public class TabletStats {
private long rowCount;
private long dataSize;
}

View File

@ -353,6 +353,7 @@ import org.apache.doris.qe.SqlModeHelper;
keywordMap.put("split", new Integer(SqlParserSymbols.KW_SPLIT));
keywordMap.put("start", new Integer(SqlParserSymbols.KW_START));
keywordMap.put("status", new Integer(SqlParserSymbols.KW_STATUS));
keywordMap.put("stats", new Integer(SqlParserSymbols.KW_STATS));
keywordMap.put("stop", new Integer(SqlParserSymbols.KW_STOP));
keywordMap.put("storage", new Integer(SqlParserSymbols.KW_STORAGE));
keywordMap.put("stream", new Integer(SqlParserSymbols.KW_STREAM));