fix column store index back cost

This commit is contained in:
zzg19950727
2024-02-09 15:35:27 +00:00
committed by ob-robot
parent ce0d373f22
commit 772d84b21e
5 changed files with 357 additions and 135 deletions

View File

@ -73,7 +73,9 @@ int ObCostTableScanInfo::assign(const ObCostTableScanInfo &est_cost_info)
LOG_WARN("failed to assign access columns", K(ret));
} else if (OB_FAIL(access_columns_.assign(est_cost_info.access_columns_))) {
LOG_WARN("failed to assign access columns", K(ret));
} else if (OB_FAIL(column_group_infos_.assign(est_cost_info.column_group_infos_))) {
} else if (OB_FAIL(index_scan_column_group_infos_.assign(est_cost_info.index_scan_column_group_infos_))) {
LOG_WARN("failed to to assign column group infos", K(ret));
} else if (OB_FAIL(index_back_column_group_infos_.assign(est_cost_info.index_back_column_group_infos_))) {
LOG_WARN("failed to to assign column group infos", K(ret));
} else {
table_id_ = est_cost_info.table_id_;
@ -103,6 +105,7 @@ int ObCostTableScanInfo::assign(const ObCostTableScanInfo &est_cost_info)
sample_info_ = est_cost_info.sample_info_;
use_column_store_ = est_cost_info.use_column_store_;
at_most_one_range_ = est_cost_info.at_most_one_range_;
index_back_with_column_store_ = est_cost_info.index_back_with_column_store_;
// no need to copy table scan param
}
return ret;
@ -1368,18 +1371,24 @@ int ObOptEstCostModel::cost_basic_table(const ObCostTableScanInfo &est_cost_info
int64_t part_count = table_meta_info->part_count_;
part_count = part_count > 0 ? part_count : 1;
double row_count_per_part = row_count / part_count;
double index_scan_cost = 0;
double index_back_cost = 0;
double prefix_filter_sel = est_cost_info.join_filter_sel_;
// calc scan one partition cost
if (!est_cost_info.use_column_store_ &&
OB_FAIL(cost_row_store_basic_table(est_cost_info,
row_count_per_part,
cost))) {
LOG_WARN("Failed to estimate cost", K(ret), K(est_cost_info));
} else if (est_cost_info.use_column_store_ &&
OB_FAIL(cost_column_store_basic_table(est_cost_info,
row_count_per_part,
cost))) {
LOG_WARN("Failed to estimate cost", K(ret), K(est_cost_info), K(row_count_per_part));
if (OB_FAIL(cost_index_scan(est_cost_info,
row_count_per_part,
prefix_filter_sel,
index_scan_cost))) {
LOG_WARN("failed to calc index scan cost", K(ret));
} else if (est_cost_info.index_meta_info_.is_index_back_ &&
OB_FAIL(cost_index_back(est_cost_info,
row_count_per_part,
prefix_filter_sel,
index_back_cost))) {
LOG_WARN("failed to calc index back cost", K(ret));
} else {
cost += index_scan_cost;
cost += index_back_cost;
// calc one parallel scan cost
cost *= part_cnt_per_dop;
LOG_TRACE("OPT:[ESTIMATE FINISH]", K(cost), K(part_cnt_per_dop), K(est_cost_info));
@ -1388,76 +1397,153 @@ int ObOptEstCostModel::cost_basic_table(const ObCostTableScanInfo &est_cost_info
return ret;
}
int ObOptEstCostModel::cost_row_store_basic_table(const ObCostTableScanInfo &est_cost_info,
double row_count,
double &cost)
int ObOptEstCostModel::cost_index_scan(const ObCostTableScanInfo &est_cost_info,
double row_count,
double &prefix_filter_sel,
double &index_scan_cost)
{
int ret = OB_SUCCESS;
double index_back_cost = 0;
if (OB_FAIL(cost_index_scan(est_cost_info,
row_count,
cost))) {
LOG_WARN("failed to calc index scan cost", K(ret));
} else if (!est_cost_info.index_meta_info_.is_index_back_) {
LOG_TRACE("OPT:[COST BASIC TABLE SCAN WITH ROW STORE]", K(row_count), K(cost));
} else if (OB_FAIL(cost_index_back(est_cost_info,
row_count,
index_back_cost))) {
LOG_WARN("failed to calc index back cost", K(ret));
} else {
cost += index_back_cost;
LOG_TRACE("OPT:[COST BASIC TABLE SCAN WITH ROW STORE]", K(row_count), K(index_back_cost), K(cost));
if (est_cost_info.use_column_store_ &&
OB_FAIL(cost_column_store_index_scan(est_cost_info,
row_count,
prefix_filter_sel,
index_scan_cost))) {
LOG_WARN("failed to calc column store index scan cost", K(ret));
} else if (!est_cost_info.use_column_store_ &&
OB_FAIL(cost_row_store_index_scan(est_cost_info,
row_count,
index_scan_cost))) {
LOG_WARN("failed to calc row store index scan cost", K(ret));
}
return ret;
}
int ObOptEstCostModel::cost_column_store_basic_table(const ObCostTableScanInfo &est_cost_info,
double row_count,
double &cost)
int ObOptEstCostModel::cost_index_back(const ObCostTableScanInfo &est_cost_info,
double row_count,
double &prefix_filter_sel,
double &index_back_cost)
{
int ret = OB_SUCCESS;
ObCostTableScanInfo column_group_est_cost_info(OB_INVALID_ID, OB_INVALID_ID, OB_INVALID_ID);
double prefix_filter_sel = est_cost_info.join_filter_sel_;
cost = 0.0;
if (OB_FAIL(column_group_est_cost_info.assign(est_cost_info))) {
LOG_WARN("failed to assign est cost info", K(ret));
} else {
column_group_est_cost_info.access_column_items_.reuse();
column_group_est_cost_info.prefix_filters_.reuse();
column_group_est_cost_info.postfix_filters_.reuse();
column_group_est_cost_info.use_column_store_ = true;
column_group_est_cost_info.join_filter_sel_ = 1.0;
if (est_cost_info.index_back_with_column_store_ &&
OB_FAIL(cost_column_store_index_back(est_cost_info,
row_count,
prefix_filter_sel,
index_back_cost))) {
LOG_WARN("failed to calc column store index back cost", K(ret));
} else if (!est_cost_info.index_back_with_column_store_ &&
OB_FAIL(cost_row_store_index_back(est_cost_info,
row_count,
index_back_cost))) {
LOG_WARN("failed to calc row store index back cost", K(ret));
}
// calc scan cost for each column group
for (int64_t i = 0; OB_SUCC(ret) && i <est_cost_info.column_group_infos_.count(); ++i) {
// prepare est cost info for column group
const ObCostColumnGroupInfo &cg_info = est_cost_info.column_group_infos_.at(i);
double cg_row_count = row_count * prefix_filter_sel * cg_info.skip_filter_sel_ * cg_info.skip_rate_;
column_group_est_cost_info.index_meta_info_.index_micro_block_count_ = cg_info.micro_block_count_;
column_group_est_cost_info.table_filters_.reuse();
double column_group_cost = 0.0;
if (OB_FAIL(column_group_est_cost_info.table_filters_.assign(cg_info.filters_))) {
LOG_WARN("failed to assign filters", K(ret));
} else if (OB_FAIL(column_group_est_cost_info.access_column_items_.assign(cg_info.access_column_items_))) {
LOG_WARN("failed to assign filters", K(ret));
} else if (OB_FAIL(cost_index_scan(column_group_est_cost_info,
row_count * prefix_filter_sel,
column_group_cost))) {
LOG_WARN("failed to calc index scan cost", K(ret), K(cg_row_count), K(column_group_est_cost_info));
return ret;
}
int ObOptEstCostModel::cost_column_store_index_scan(const ObCostTableScanInfo &est_cost_info,
double row_count,
double &prefix_filter_sel,
double &index_scan_cost)
{
int ret = OB_SUCCESS;
SMART_VAR(ObCostTableScanInfo, column_group_est_cost_info, OB_INVALID_ID, OB_INVALID_ID, OB_INVALID_ID) {
if (OB_FAIL(column_group_est_cost_info.assign(est_cost_info))) {
LOG_WARN("failed to assign est cost info", K(ret));
} else {
// TODO: index back cost
cost += column_group_cost;
prefix_filter_sel *= cg_info.filter_sel_;
LOG_TRACE("OPT:[COST COLUMN GROUP]", K(row_count), K(prefix_filter_sel), K(column_group_cost));
column_group_est_cost_info.access_column_items_.reuse();
column_group_est_cost_info.prefix_filters_.reuse();
column_group_est_cost_info.postfix_filters_.reuse();
column_group_est_cost_info.use_column_store_ = true;
column_group_est_cost_info.join_filter_sel_ = 1.0;
}
// calc scan cost for each column group
for (int64_t i = 0; OB_SUCC(ret) && i < est_cost_info.index_scan_column_group_infos_.count(); ++i) {
// prepare est cost info for column group
const ObCostColumnGroupInfo &cg_info = est_cost_info.index_scan_column_group_infos_.at(i);
double cg_row_count = row_count * prefix_filter_sel * cg_info.skip_filter_sel_ * cg_info.skip_rate_;
column_group_est_cost_info.index_meta_info_.index_micro_block_count_ = cg_info.micro_block_count_;
column_group_est_cost_info.table_filters_.reuse();
double column_group_cost = 0.0;
if (OB_FAIL(column_group_est_cost_info.postfix_filters_.assign(cg_info.filters_))) {
LOG_WARN("failed to assign filters", K(ret));
} else if (!est_cost_info.index_meta_info_.is_index_back_ &&
OB_FAIL(column_group_est_cost_info.access_column_items_.assign(cg_info.access_column_items_))) {
LOG_WARN("failed to assign filters", K(ret));
} else if (est_cost_info.index_meta_info_.is_index_back_ &&
OB_FAIL(column_group_est_cost_info.index_access_column_items_.assign(cg_info.access_column_items_))) {
LOG_WARN("failed to assign filters", K(ret));
} else if (OB_FAIL(cost_row_store_index_scan(column_group_est_cost_info,
row_count * prefix_filter_sel,
column_group_cost))) {
LOG_WARN("failed to calc index scan cost", K(ret), K(cg_row_count), K(column_group_est_cost_info));
} else {
index_scan_cost += column_group_cost;
prefix_filter_sel *= cg_info.filter_sel_;
LOG_TRACE("OPT:[COST ONE COLUMN GROUP]", K(row_count), K(prefix_filter_sel), K(column_group_cost));
}
}
}
LOG_TRACE("OPT:[COST BASIC TABLE SCAN WITH COLUMN STORE]", K(row_count), K(cost));
LOG_TRACE("OPT:[COST INDEX SCAN WITH COLUMN STORE]", K(row_count), K(index_scan_cost));
return ret;
}
int ObOptEstCostModel::cost_index_scan(const ObCostTableScanInfo &est_cost_info,
double row_count,
double &cost)
int ObOptEstCostModel::cost_column_store_index_back(const ObCostTableScanInfo &est_cost_info,
double row_count,
double &prefix_filter_sel,
double &index_back_cost)
{
int ret = OB_SUCCESS;
SMART_VAR(ObCostTableScanInfo, column_group_est_cost_info, OB_INVALID_ID, OB_INVALID_ID, OB_INVALID_ID) {
double network_cost = 0.0;
index_back_cost = 0.0;
if (OB_FAIL(column_group_est_cost_info.assign(est_cost_info))) {
LOG_WARN("failed to assign est cost info", K(ret));
} else {
column_group_est_cost_info.access_column_items_.reuse();
column_group_est_cost_info.prefix_filters_.reuse();
column_group_est_cost_info.postfix_filters_.reuse();
column_group_est_cost_info.use_column_store_ = true;
column_group_est_cost_info.join_filter_sel_ = 1.0;
}
// calc scan cost for each column group
for (int64_t i = 0; OB_SUCC(ret) && i < est_cost_info.index_back_column_group_infos_.count(); ++i) {
// prepare est cost info for column group
const ObCostColumnGroupInfo &cg_info = est_cost_info.index_back_column_group_infos_.at(i);
double cg_row_count = row_count * prefix_filter_sel * cg_info.skip_filter_sel_ * cg_info.skip_rate_;
column_group_est_cost_info.index_meta_info_.index_micro_block_count_ = cg_info.micro_block_count_;
column_group_est_cost_info.table_filters_.reuse();
double column_group_cost = 0.0;
if (OB_FAIL(column_group_est_cost_info.table_filters_.assign(cg_info.filters_))) {
LOG_WARN("failed to assign filters", K(ret));
} else if (OB_FAIL(column_group_est_cost_info.access_column_items_.assign(cg_info.access_column_items_))) {
LOG_WARN("failed to assign filters", K(ret));
} else if (OB_FAIL(cost_range_get(column_group_est_cost_info,
false,
row_count * prefix_filter_sel,
column_group_cost))) {
LOG_WARN("failed to calc index scan cost", K(ret), K(cg_row_count), K(column_group_est_cost_info));
} else {
index_back_cost += column_group_cost;
prefix_filter_sel *= cg_info.filter_sel_;
LOG_TRACE("OPT:[COST ONE COLUMN GROUP]", K(row_count), K(prefix_filter_sel), K(column_group_cost));
}
}
if (OB_FAIL(ret)) {
} else if (est_cost_info.index_meta_info_.is_global_index_ &&
OB_FAIL(cost_global_index_back_with_rp(row_count * prefix_filter_sel,
est_cost_info,
network_cost))) {
LOG_WARN("failed to get newwork transform cost for global index", K(ret));
} else {
index_back_cost += network_cost;
}
LOG_TRACE("OPT:[COST INDEX BACK WITH COLUMN STORE]", K(row_count), K(network_cost), K(index_back_cost));
}
return ret;
}
int ObOptEstCostModel::cost_row_store_index_scan(const ObCostTableScanInfo &est_cost_info,
double row_count,
double &index_scan_cost)
{
int ret = OB_SUCCESS;
//refine row count for index skip scan
@ -1469,7 +1555,7 @@ int ObOptEstCostModel::cost_index_scan(const ObCostTableScanInfo &est_cost_info,
if (OB_FAIL(cost_range_get(est_cost_info,
true,
row_count,
cost))) {
index_scan_cost))) {
LOG_WARN("Failed to estimate get cost", K(ret));
}
} else if (ObSimpleBatch::T_SCAN == est_cost_info.batch_type_ ||
@ -1477,7 +1563,7 @@ int ObOptEstCostModel::cost_index_scan(const ObCostTableScanInfo &est_cost_info,
if (OB_FAIL(cost_range_scan(est_cost_info,
true,
row_count,
cost))) {
index_scan_cost))) {
LOG_WARN("Failed to estimate scan cost", K(ret));
}
} else {
@ -1488,21 +1574,21 @@ int ObOptEstCostModel::cost_index_scan(const ObCostTableScanInfo &est_cost_info,
if (OB_FAIL(ret)) {
} else if (est_cost_info.index_meta_info_.is_geo_index_) {
double spatial_cost = row_count * cost_params_.get_spatial_per_row_cost(sys_stat_);
cost += spatial_cost;
index_scan_cost += spatial_cost;
LOG_TRACE("OPT::[COST SPATIAL INDEX SCAN]", K(spatial_cost), K(ret));
}
//add index skip scan cost
if (OB_FAIL(ret)) {
} else if (!est_cost_info.ss_ranges_.empty()) {
cost = cost * est_cost_info.ss_prefix_ndv_;
LOG_TRACE("OPT::[COST INDEX SKIP SCAN]", K(est_cost_info.ss_prefix_ndv_), K(cost));
index_scan_cost *= est_cost_info.ss_prefix_ndv_;
LOG_TRACE("OPT::[COST INDEX SKIP SCAN]", K(est_cost_info.ss_prefix_ndv_), K(index_scan_cost));
}
return ret;
}
int ObOptEstCostModel::cost_index_back(const ObCostTableScanInfo &est_cost_info,
double row_count,
double &cost)
int ObOptEstCostModel::cost_row_store_index_back(const ObCostTableScanInfo &est_cost_info,
double row_count,
double &index_back_cost)
{
int ret = OB_SUCCESS;
double network_cost = 0.0;
@ -1511,7 +1597,7 @@ int ObOptEstCostModel::cost_index_back(const ObCostTableScanInfo &est_cost_info,
if (OB_FAIL(cost_range_get(est_cost_info,
false,
index_back_row_count,
cost))) {
index_back_cost))) {
LOG_WARN("Failed to estimate get cost", K(ret));
} else if (est_cost_info.index_meta_info_.is_global_index_ &&
OB_FAIL(cost_global_index_back_with_rp(index_back_row_count,
@ -1519,9 +1605,9 @@ int ObOptEstCostModel::cost_index_back(const ObCostTableScanInfo &est_cost_info,
network_cost))) {
LOG_WARN("failed to get newwork transform cost for global index", K(ret));
} else {
cost += network_cost;
LOG_TRACE("OPT:[COST INDEX BACK]", K(index_back_row_count),
K(network_cost), K(cost));
index_back_cost += network_cost;
LOG_TRACE("OPT:[COST ROW STORE INDEX BACK]", K(index_back_row_count),
K(network_cost), K(index_back_cost));
}
return ret;
}
@ -1623,10 +1709,10 @@ int ObOptEstCostModel::cost_range_get(const ObCostTableScanInfo &est_cost_info,
int ObOptEstCostModel::range_get_io_cost(const ObCostTableScanInfo &est_cost_info,
bool is_scan_index,
double row_count,
double &cost)
double &io_cost)
{
int ret = OB_SUCCESS;
cost = 0.0;
io_cost = 0.0;
const ObIndexMetaInfo &index_meta_info = est_cost_info.index_meta_info_;
const ObTableMetaInfo *table_meta_info = est_cost_info.table_meta_info_;
if (OB_ISNULL(table_meta_info) || row_count < 0) {
@ -1638,6 +1724,8 @@ int ObOptEstCostModel::range_get_io_cost(const ObCostTableScanInfo &est_cost_inf
double num_micro_blocks = 0;
if (is_scan_index) {
num_micro_blocks = index_meta_info.get_micro_block_numbers();
} else if (est_cost_info.index_back_with_column_store_) {
num_micro_blocks = index_meta_info.get_micro_block_numbers();
} else {
num_micro_blocks = table_meta_info->get_micro_block_numbers();
}
@ -1659,11 +1747,11 @@ int ObOptEstCostModel::range_get_io_cost(const ObCostTableScanInfo &est_cost_inf
}
}
if (num_micro_blocks_read < 1) {
cost = 0;
io_cost = 0;
} else {
cost = first_block_cost + cost_params_.get_micro_block_rnd_cost(sys_stat_) * (num_micro_blocks_read-1);
io_cost = first_block_cost + cost_params_.get_micro_block_rnd_cost(sys_stat_) * (num_micro_blocks_read-1);
}
LOG_TRACE("OPT:[COST RANGE GET IO]", K(is_scan_index), K(row_count), K(cost), K(num_micro_blocks),
LOG_TRACE("OPT:[COST RANGE GET IO]", K(is_scan_index), K(row_count), K(io_cost), K(num_micro_blocks),
K(num_micro_blocks_read), K(first_block_cost));
}
return ret;
@ -1672,10 +1760,10 @@ int ObOptEstCostModel::range_get_io_cost(const ObCostTableScanInfo &est_cost_inf
int ObOptEstCostModel::range_scan_io_cost(const ObCostTableScanInfo &est_cost_info,
bool is_scan_index,
double row_count,
double &cost)
double &io_cost)
{
int ret = OB_SUCCESS;
cost = 0.0;
io_cost = 0.0;
const ObIndexMetaInfo &index_meta_info = est_cost_info.index_meta_info_;
const ObTableMetaInfo *table_meta_info = est_cost_info.table_meta_info_;
if (OB_ISNULL(table_meta_info) || row_count < 0) {
@ -1709,11 +1797,11 @@ int ObOptEstCostModel::range_scan_io_cost(const ObCostTableScanInfo &est_cost_in
}
}
if (num_micro_blocks_read < 1) {
cost = first_block_cost;
io_cost = first_block_cost;
} else {
cost = first_block_cost + cost_params_.get_micro_block_seq_cost(sys_stat_) * (num_micro_blocks_read-1);
io_cost = first_block_cost + cost_params_.get_micro_block_seq_cost(sys_stat_) * (num_micro_blocks_read-1);
}
LOG_TRACE("OPT:[COST RANGE SCAN IO]", K(is_scan_index), K(row_count), K(cost), K(num_micro_blocks),
LOG_TRACE("OPT:[COST RANGE SCAN IO]", K(is_scan_index), K(row_count), K(io_cost), K(num_micro_blocks),
K(num_micro_blocks_read), K(first_block_cost));
}
return ret;
@ -1723,7 +1811,7 @@ int ObOptEstCostModel::range_scan_cpu_cost(const ObCostTableScanInfo &est_cost_i
bool is_scan_index,
double row_count,
bool is_get,
double &cost)
double &cpu_cost)
{
int ret = OB_SUCCESS;
double project_cost = 0.0;
@ -1779,11 +1867,10 @@ int ObOptEstCostModel::range_scan_cpu_cost(const ObCostTableScanInfo &est_cost_i
range_count = 1;
}
range_cost = range_count * cost_params_.get_range_cost(sys_stat_);
cost = row_count * cost_params_.get_cpu_tuple_cost(sys_stat_);
cost += range_cost + qual_cost + project_cost;
cpu_cost = row_count * cost_params_.get_cpu_tuple_cost(sys_stat_);
cpu_cost += range_cost + qual_cost + project_cost;
LOG_TRACE("OPT: [RANGE SCAN CPU COST]", K(is_scan_index), K(is_get),
K(cost), K(qual_cost), K(project_cost), K(range_cost), K(row_count));
K(cpu_cost), K(qual_cost), K(project_cost), K(range_cost), K(row_count));
}
return ret;
}