[fix](statistics) ColumnStatistics was changed unexpectedly when show stats (#14068)

The logic of show stats would change the internal collected ColumnStat unexpectedly which would cause inaccurate cost and inefficient plan
This commit is contained in:
Kikyou1997
2022-11-08 20:26:37 +08:00
committed by GitHub
parent a58ac48a6e
commit ecfdf0320d
4 changed files with 3 additions and 91 deletions

View File

@ -1766,10 +1766,10 @@ public class Config extends ConfigBase {
public static int be_exec_version = max_be_exec_version;
@ConfField(mutable = false)
public static int statistic_job_scheduler_execution_interval_ms = 60 * 1000;
public static int statistic_job_scheduler_execution_interval_ms = 1000;
@ConfField(mutable = false)
public static int statistic_task_scheduler_execution_interval_ms = 60 * 1000;
public static int statistic_task_scheduler_execution_interval_ms = 1000;
/*
* mtmv scheduler framework is still under dev, remove this config when it is graduate.

View File

@ -1,46 +0,0 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
package org.apache.doris.statistics;
import com.google.common.collect.Maps;
import java.util.Map;
/**
* There are the statistics of OlapTable.
* The @OlapTableStats are mainly used to provide input for the Optimizer's cost model.
*
* There are three kinds of statistics of OlapTable.
* @rowCount: The row count of OlapTable. There are two ways to obtain value:
* 1. The sum row count of @TabletStats which maybe an inaccurate value.
* 2. count(*) of OlapTable which is an accurate value.
* @dataSize: The data size of OlapTable. This is an inaccurate value,
* which is obtained by summing the @dataSize of @TabletStats.
* @idToTabletStats: <@Long tabletId, @TabletStats tabletStats>
* Each tablet in the OlapTable will have corresponding @TabletStats.
* Those @TabletStats are recorded in @idToTabletStats form of MAP.
* This facilitates the optimizer to quickly find the corresponding
* @TabletStats based on the tablet id.
* At the same time, both @rowCount and @dataSize can also be obtained
* from the sum of all @TabletStats.
*
*/
public class OlapTableStats extends TableStats {
private Map<Long, TabletStats> idToTabletStats = Maps.newHashMap();
}

View File

@ -225,7 +225,7 @@ public class TableStats {
for (PartitionStats partitionStats : nameToPartitionStats.values()) {
partitionStats.getNameToColumnStats().forEach((colName, columnStats) -> {
if (!aggColumnStats.containsKey(colName)) {
aggColumnStats.put(colName, columnStats);
aggColumnStats.put(colName, columnStats.copy());
} else {
ColumnStat tblColStats = aggColumnStats.get(colName);
mergePartitionColumnStats(tblColStats, columnStats);

View File

@ -1,42 +0,0 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
package org.apache.doris.statistics;
/**
* There are the statistics of one tablet.
* The tablet stats are mainly used to provide input for the Optimizer's cost model.
*
* The description of tablet stats are following:
* 1. @rowCount: The row count of tablet.
* 2. @dataSize: The data size of tablet.
*
* @rowCount: The row count of tablet. There are two ways to update:
* 1. The rowCount from tablet meta. The value obtained by this update method
* may be an inaccurate value.
* 2. The result of count(*) query from one tablet. The value obtained by this update method
* is accurate.
* @dataSize: The data size of tablet. This is a inaccurate value of one tablet.
*
* The granularity of the statistics is one tablet.
* For example:
* "@rowCount = 10" means that the row count is 1000 in one tablet.
*/
public class TabletStats {
private long rowCount;
private long dataSize;
}