only gather global stats for partition tabled when doing online stats gathering

This commit is contained in:
Larry955
2023-08-04 06:12:22 +00:00
committed by ob-robot
parent 3cf9332af1
commit ec768b2565
3 changed files with 86 additions and 270 deletions

View File

@ -209,6 +209,9 @@ int ObOptOSGColumnStat::update_column_stat_info(const ObDatum *datum,
LOG_WARN("failed to inner merge max val"); LOG_WARN("failed to inner merge max val");
} }
} }
if (OB_SUCC(ret)) {
col_stat_->calc_avg_len();
}
return ret; return ret;
} }

View File

@ -145,10 +145,7 @@ int ObOptimizerStatsGatheringOp::inner_open()
} else { } else {
arena_.set_tenant_id(tenant_id_); arena_.set_tenant_id(tenant_id_);
piece_msg_.set_tenant_id(tenant_id_); piece_msg_.set_tenant_id(tenant_id_);
// map size = max(part num + subpart num, 100) int64_t map_size = MY_SPEC.column_ids_.count();
int64_t map_size = tab_schema->get_all_part_num() + (MY_SPEC.is_two_level_part() ?
tab_schema->get_partition_num() : 0);
map_size = map_size < DEFAULT_HASH_MAP_BUCKETS_COUNT ? DEFAULT_HASH_MAP_BUCKETS_COUNT : map_size;
if (OB_FAIL(table_stats_map_.create(map_size, if (OB_FAIL(table_stats_map_.create(map_size,
"TabStatBucket", "TabStatBucket",
"TabStatNode"))) { "TabStatNode"))) {
@ -157,13 +154,6 @@ int ObOptimizerStatsGatheringOp::inner_open()
"ColStatBucket", "ColStatBucket",
"ColStatNode"))) { "ColStatNode"))) {
LOG_WARN("fail to create column stats map", K(ret)); LOG_WARN("fail to create column stats map", K(ret));
} else if (OB_FAIL(part_map_.create(map_size,
"PartMapBucket",
"PartMapNode"))) {
LOG_WARN("fail to create part map", K(ret));
} else if (MY_SPEC.is_part_table() &&
OB_FAIL(pl::ObDbmsStats::get_table_partition_map(*tab_schema, part_map_))) {
LOG_WARN("fail to init part map", K(ret));
} }
LOG_TRACE("succeed to open optmizer_stats_gathering op", LOG_TRACE("succeed to open optmizer_stats_gathering op",
K(ret), K(map_size), K(MY_SPEC.column_ids_.count()), K(MY_SPEC.table_id_)); K(ret), K(map_size), K(MY_SPEC.column_ids_.count()), K(MY_SPEC.table_id_));
@ -288,43 +278,6 @@ int ObOptimizerStatsGatheringOp::build_piece_msg(ObOptStatsGatherPieceMsg &piece
return ret; return ret;
} }
// generate global part id, part id and subpart id(if necessary).
int ObOptimizerStatsGatheringOp::generate_part_ids(PartIds &part_ids)
{
int ret = OB_SUCCESS;
if (MY_SPEC.is_part_table()) {
ObObjectID partition_id = OB_INVALID_ID;
ObTabletID tablet_id;
if (OB_FAIL(ObExprCalcPartitionBase::calc_part_and_tablet_id(MY_SPEC.calc_part_id_expr_,
eval_ctx_,
partition_id,
tablet_id))) {
LOG_WARN("calc part and tablet id by expr failed", K(ret));
} else {
OSGPartInfo part_info;
part_ids.part_id_ = partition_id;
part_ids.global_part_id_ = -1;
if (!MY_SPEC.is_two_level_part()) {
} else if (OB_FAIL(part_map_.get_refactored(partition_id, part_info))) {
if (ret != OB_HASH_NOT_EXIST) {
LOG_WARN("fail to find hash map", K(ret));
} else {
LOG_WARN("fail to get first part id", K(ret), K(part_ids));
}
} else {
part_ids.first_part_id_ = part_info.part_id_;
}
LOG_TRACE("succeed to generate part ids", K(part_ids),
K(MY_SPEC.is_two_level_part()), K(partition_id), K(tablet_id));
}
} else {
part_ids.part_id_ = (int64_t)MY_SPEC.table_id_;
part_ids.global_part_id_ = (int64_t)MY_SPEC.table_id_;
}
return ret;
}
int ObOptimizerStatsGatheringOp::get_tab_stat_by_key(ObOptTableStat::Key &key, ObOptTableStat *&tab_stat) int ObOptimizerStatsGatheringOp::get_tab_stat_by_key(ObOptTableStat::Key &key, ObOptTableStat *&tab_stat)
{ {
int ret = OB_SUCCESS; int ret = OB_SUCCESS;
@ -381,20 +334,24 @@ int ObOptimizerStatsGatheringOp::get_col_stat_by_key(ObOptColumnStat::Key &key,
return ret; return ret;
} }
int ObOptimizerStatsGatheringOp::calc_column_stats(ObExpr *expr, int ObOptimizerStatsGatheringOp::calc_column_stats(ObExpr *expr, uint64_t column_id, int64_t &row_len)
uint64_t column_id,
PartIds &part_ids,
StatItems &all_stats,
int64_t &row_len)
{ {
int ret = OB_SUCCESS; int ret = OB_SUCCESS;
ObDatum *datum = NULL; ObDatum *datum = NULL;
int64_t col_len = 0; int64_t col_len = 0;
ObOptOSGColumnStat *global_col_stat = NULL;
ObOptColumnStat::Key global_col_stats_key(tenant_id_, MY_SPEC.table_id_, MY_SPEC.table_id_, column_id);
if (MY_SPEC.is_part_table()) {
global_col_stats_key.partition_id_ = -1;
}
if (OB_ISNULL(expr) || OB_ISNULL(expr->basic_funcs_)) { if (OB_ISNULL(expr) || OB_ISNULL(expr->basic_funcs_)) {
ret = OB_ERR_UNEXPECTED; ret = OB_ERR_UNEXPECTED;
LOG_WARN("get unexpected null pointer", K(ret)); LOG_WARN("get unexpected null pointer", K(ret));
} else if (OB_FAIL(get_col_stats_by_partinfo(part_ids, column_id, all_stats))) { } else if (OB_FAIL(get_col_stat_by_key(global_col_stats_key, global_col_stat))) {
LOG_WARN("fail to get column stat", K(ret)); LOG_WARN("fail to get global table stat", K(ret));
} else if (OB_ISNULL(global_col_stat)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("get unexpected null", K(ret));
} else if (!ObColumnStatParam::is_valid_opt_col_type(expr->obj_meta_.get_type())) { } else if (!ObColumnStatParam::is_valid_opt_col_type(expr->obj_meta_.get_type())) {
// do nothing yet, shoul use the plain stats. // do nothing yet, shoul use the plain stats.
} else if (OB_FAIL(expr->eval(eval_ctx_, datum))) { } else if (OB_FAIL(expr->eval(eval_ctx_, datum))) {
@ -404,14 +361,58 @@ int ObOptimizerStatsGatheringOp::calc_column_stats(ObExpr *expr,
LOG_WARN("get unexpected null"); LOG_WARN("get unexpected null");
} else if (OB_FAIL(ObExprSysOpOpnsize::calc_sys_op_opnsize(expr, datum, col_len))) { } else if (OB_FAIL(ObExprSysOpOpnsize::calc_sys_op_opnsize(expr, datum, col_len))) {
LOG_WARN("fail to calc sys op opnsize", K(ret)); LOG_WARN("fail to calc sys op opnsize", K(ret));
} else if (OB_FAIL(set_col_stats(all_stats, } else if (OB_FALSE_IT(global_col_stat->col_stat_->set_stat_level(StatLevel::TABLE_LEVEL))) {
datum, } else if (OB_FAIL(global_col_stat->update_column_stat_info(datum, expr->obj_meta_,
expr->obj_meta_,
expr->basic_funcs_->null_first_cmp_))) { expr->basic_funcs_->null_first_cmp_))) {
LOG_WARN("failed to set col stats"); LOG_WARN("fail to set global column stat", K(ret));
} else { } else {
row_len += col_len; row_len += col_len;
LOG_TRACE("succed to calc column stat", K(*expr), K(row_len), K(*datum)); LOG_TRACE("succed to calc column stat", KPC(expr), K(row_len), KPC(datum));
}
return ret;
}
int ObOptimizerStatsGatheringOp::calc_columns_stats(int64_t &row_len)
{
int ret = OB_SUCCESS;
if (MY_SPEC.column_ids_.count() != MY_SPEC.col_conv_exprs_.count() + MY_SPEC.generated_column_exprs_.count()) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("column ids doesn't match the output", K(ret));
} else {
for (int64_t i = 0; OB_SUCC(ret) && i < MY_SPEC.col_conv_exprs_.count(); i++) {
uint64_t column_id = MY_SPEC.column_ids_.at(i);
if (OB_FAIL(calc_column_stats(MY_SPEC.col_conv_exprs_.at(i), column_id, row_len))) {
LOG_WARN("fail to calc column stats", K(ret));
}
}
//generated column
for (int64_t i = 0; OB_SUCC(ret) && i < MY_SPEC.generated_column_exprs_.count(); i++) {
uint64_t column_id = MY_SPEC.column_ids_.at(i + MY_SPEC.col_conv_exprs_.count());
if (OB_FAIL(calc_column_stats(MY_SPEC.generated_column_exprs_.at(i), column_id, row_len))) {
LOG_WARN("fail to calc column stats", K(ret));
}
}
}
return ret;
}
int ObOptimizerStatsGatheringOp::calc_table_stats(int64_t &row_len)
{
int ret = OB_SUCCESS;
ObOptTableStat *global_tab_stat = NULL;
ObOptTableStat::Key global_key(tenant_id_, MY_SPEC.table_id_, (int64_t)MY_SPEC.table_id_);
if (MY_SPEC.is_part_table()) {
global_key.partition_id_ = -1;
}
if (OB_FAIL(get_tab_stat_by_key(global_key, global_tab_stat))) {
LOG_WARN("fail to get global table stat", K(ret));
} else if (OB_ISNULL(global_tab_stat)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("get unexpected null", K(ret));
} else {
global_tab_stat->add_avg_row_size(row_len);
global_tab_stat->add_row_count(1);
global_tab_stat->set_object_type(StatLevel::TABLE_LEVEL);
} }
return ret; return ret;
} }
@ -419,142 +420,11 @@ int ObOptimizerStatsGatheringOp::calc_column_stats(ObExpr *expr,
int ObOptimizerStatsGatheringOp::calc_stats() int ObOptimizerStatsGatheringOp::calc_stats()
{ {
int ret = OB_SUCCESS; int ret = OB_SUCCESS;
StatItems all_stats;
PartIds part_ids;
if (MY_SPEC.column_ids_.count() != MY_SPEC.col_conv_exprs_.count() + MY_SPEC.generated_column_exprs_.count()) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("column ids doesn't match the output", K(ret));
} else if (OB_FAIL(generate_part_ids(part_ids))) {
LOG_WARN("fail to generated part ids", K(ret));
} else if (OB_FAIL(get_tab_stats_by_partinfo(part_ids, all_stats))) {
LOG_WARN("fail to get table stat", K(ret));
} else {
int64_t row_len = 0; int64_t row_len = 0;
for (int64_t i = 0; OB_SUCC(ret) && i < MY_SPEC.col_conv_exprs_.count(); i++) { if (OB_FAIL(calc_columns_stats(row_len))) {
uint64_t column_id = MY_SPEC.column_ids_.at(i); LOG_WARN("failed to calc column stats", K(ret));
if (OB_FAIL(calc_column_stats(MY_SPEC.col_conv_exprs_.at(i), column_id, part_ids, all_stats, row_len))) { } else if (OB_FAIL(calc_table_stats(row_len))) {
LOG_WARN("fail to calc column stats", K(ret)); LOG_WARN("failed to calc table stats", K(ret));
}
}
//generated column
for (int64_t i = 0; OB_SUCC(ret) && i < MY_SPEC.generated_column_exprs_.count(); i++) {
uint64_t column_id = MY_SPEC.column_ids_.at(i + MY_SPEC.col_conv_exprs_.count());
if (OB_FAIL(calc_column_stats(MY_SPEC.generated_column_exprs_.at(i), column_id,
part_ids, all_stats, row_len))) {
LOG_WARN("fail to calc column stats", K(ret));
}
}
if (OB_SUCC(ret) && OB_FAIL(set_tab_stats(all_stats, row_len))) {
LOG_WARN("fail to set col stats", K(ret));
}
}
return ret;
}
int ObOptimizerStatsGatheringOp::get_tab_stats_by_partinfo(PartIds &part_ids, StatItems &all_stats)
{
int ret = OB_SUCCESS;
ObOptTableStat::Key global_key(tenant_id_, MY_SPEC.table_id_, part_ids.global_part_id_);
ObOptTableStat::Key part_key(tenant_id_, MY_SPEC.table_id_, part_ids.part_id_);
ObOptTableStat::Key first_part_key(tenant_id_, MY_SPEC.table_id_, part_ids.first_part_id_);
if (OB_FAIL(get_tab_stat_by_key(global_key, all_stats.global_tab_stat_))) {
LOG_WARN("fail to get global table stat", K(ret));
} else if (MY_SPEC.is_part_table() && OB_FAIL(get_tab_stat_by_key(part_key, all_stats.part_tab_stat_))) {
LOG_WARN("fail to get part table stat", K(ret));
} else if (MY_SPEC.is_two_level_part() && OB_FAIL(get_tab_stat_by_key(first_part_key, all_stats.first_part_tab_stat_))) {
LOG_WARN("fail to get first part table stat", K(ret));
}
return ret;
}
int ObOptimizerStatsGatheringOp::get_col_stats_by_partinfo(PartIds &part_ids, uint64_t column_id, StatItems &all_stats)
{
int ret = OB_SUCCESS;
ObOptColumnStat::Key global_col_stats_key(tenant_id_, MY_SPEC.table_id_, part_ids.global_part_id_, column_id);
ObOptColumnStat::Key part_col_stats_key(tenant_id_, MY_SPEC.table_id_, part_ids.part_id_, column_id);
ObOptColumnStat::Key first_part_col_stats_key(tenant_id_, MY_SPEC.table_id_, part_ids.first_part_id_, column_id);
if (OB_FAIL(get_col_stat_by_key(global_col_stats_key, all_stats.global_col_stat_))) {
LOG_WARN("fail to get global table stat", K(ret));
} else if (MY_SPEC.is_part_table() && OB_FAIL(get_col_stat_by_key(part_col_stats_key, all_stats.part_col_stat_))) {
LOG_WARN("fail to get part table stat", K(ret));
} else if (MY_SPEC.is_two_level_part() && OB_FAIL(get_col_stat_by_key(first_part_col_stats_key, all_stats.first_part_col_stat_))) {
LOG_WARN("fail to get first part table stat", K(ret));
}
return ret;
}
int ObOptimizerStatsGatheringOp::set_col_stats(StatItems &all_stats,
ObDatum *datum,
const ObObjMeta &meta,
const ObDatumCmpFuncType cmp_func)
{
int ret = OB_SUCCESS;
if (OB_ISNULL(datum) || OB_ISNULL(all_stats.global_col_stat_) || OB_ISNULL(all_stats.global_col_stat_->col_stat_)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("get unexpected null", K(all_stats.global_col_stat_), K(datum));
} else {
all_stats.global_col_stat_->col_stat_->set_stat_level(StatLevel::TABLE_LEVEL);
if (OB_FAIL(all_stats.global_col_stat_->update_column_stat_info(datum, meta, cmp_func))) {
LOG_WARN("fail to set global column stat", K(ret));
}
}
if (OB_SUCC(ret) && MY_SPEC.is_part_table()) {
if (OB_ISNULL(all_stats.part_col_stat_) || OB_ISNULL(all_stats.part_col_stat_->col_stat_)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("get unexpected null", K(all_stats.part_col_stat_));
} else {
all_stats.part_col_stat_->col_stat_->set_stat_level(StatLevel::PARTITION_LEVEL);
if (OB_FAIL(all_stats.part_col_stat_->update_column_stat_info(datum, meta, cmp_func))) {
LOG_WARN("fail to set part column stat", K(ret));
}
}
}
if (OB_SUCC(ret) && MY_SPEC.is_two_level_part()) {
if (OB_ISNULL(all_stats.first_part_col_stat_) || OB_ISNULL(all_stats.first_part_col_stat_->col_stat_)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("get unexpected null", K(all_stats.first_part_col_stat_));
} else {
all_stats.first_part_col_stat_->col_stat_->set_stat_level(StatLevel::PARTITION_LEVEL);
all_stats.part_col_stat_->col_stat_->set_stat_level(StatLevel::SUBPARTITION_LEVEL);
if (OB_FAIL(all_stats.first_part_col_stat_->update_column_stat_info(datum, meta, cmp_func))) {
LOG_WARN("fail to set first part column stat", K(ret));
}
}
}
return ret;
}
int ObOptimizerStatsGatheringOp::set_tab_stats(StatItems &all_stats, int64_t row_len)
{
int ret = OB_SUCCESS;
if (OB_ISNULL(all_stats.global_tab_stat_)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("get unexpected null");
} else {
all_stats.global_tab_stat_->add_avg_row_size(row_len);
all_stats.global_tab_stat_->add_row_count(1);
all_stats.global_tab_stat_->set_object_type(StatLevel::TABLE_LEVEL);
}
if (OB_SUCC(ret) && MY_SPEC.is_part_table()) {
if (OB_ISNULL(all_stats.part_tab_stat_)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("get unexpected null");
} else {
all_stats.part_tab_stat_->add_avg_row_size(row_len);
all_stats.part_tab_stat_->add_row_count(1);
all_stats.part_tab_stat_->set_object_type(StatLevel::PARTITION_LEVEL);
}
}
if (OB_SUCC(ret) && MY_SPEC.is_two_level_part()) {
if (OB_ISNULL(all_stats.first_part_tab_stat_)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("get unexpected null");
} else {
all_stats.first_part_tab_stat_->add_avg_row_size(row_len);
all_stats.first_part_tab_stat_->add_row_count(1);
all_stats.first_part_tab_stat_->set_object_type(StatLevel::PARTITION_LEVEL);
all_stats.part_tab_stat_->set_object_type(StatLevel::SUBPARTITION_LEVEL);
}
} }
return ret; return ret;
} }
@ -734,11 +604,6 @@ int ObOptimizerStatsGatheringOp::generate_stat_param(ObTableStatParam &param)
param.global_tablet_id_ = MY_SPEC.table_id_; param.global_tablet_id_ = MY_SPEC.table_id_;
param.part_stat_param_.need_modify_ = false; param.part_stat_param_.need_modify_ = false;
param.subpart_stat_param_.need_modify_ = false; param.subpart_stat_param_.need_modify_ = false;
} else {
param.global_part_id_ = -1;
param.global_tablet_id_ = -1;
}
for (int64_t i = 0; OB_SUCC(ret) && i < MY_SPEC.column_ids_.count(); i++) { for (int64_t i = 0; OB_SUCC(ret) && i < MY_SPEC.column_ids_.count(); i++) {
ObColumnStatParam col_param; ObColumnStatParam col_param;
col_param.column_id_ = MY_SPEC.column_ids_.at(i); col_param.column_id_ = MY_SPEC.column_ids_.at(i);
@ -755,31 +620,6 @@ int ObOptimizerStatsGatheringOp::generate_stat_param(ObTableStatParam &param)
LOG_WARN("fail to push back column param", K(ret)); LOG_WARN("fail to push back column param", K(ret));
} }
} }
if (OB_FAIL(ret)) {
} else if (MY_SPEC.is_part_table() && !MY_SPEC.is_two_level_part()) {
param.part_stat_param_.need_modify_ = true;
param.subpart_stat_param_.need_modify_ = false;
} else if (MY_SPEC.is_part_table() && MY_SPEC.is_two_level_part()){
//default is true
}
if (OB_SUCC(ret) && MY_SPEC.is_part_table()) {
FOREACH_X(it, part_map_, OB_SUCC(ret)) {
PartInfo tmp_part_info;
tmp_part_info.part_id_ = it->first;
tmp_part_info.tablet_id_ = it->second.tablet_id_;
if (it->first != it->second.part_id_) {
//subpart
if (OB_FAIL(param.subpart_infos_.push_back(tmp_part_info))) {
LOG_WARN("fail to push back part info", K(ret));
}
} else {
//first level parttion
if (OB_FAIL(param.part_infos_.push_back(tmp_part_info))) {
LOG_WARN("fail to push back part info", K(ret));
}
}
}
} }
} }
return ret; return ret;

View File

@ -72,23 +72,6 @@ public:
TO_STRING_KV(K(global_part_id_), K(part_id_), K(first_part_id_)); TO_STRING_KV(K(global_part_id_), K(part_id_), K(first_part_id_));
}; };
// store global/part/subpart table/column stat.
struct StatItems {
StatItems () : global_tab_stat_(nullptr),
part_tab_stat_(nullptr),
first_part_tab_stat_(nullptr),
global_col_stat_(nullptr),
part_col_stat_(nullptr),
first_part_col_stat_(nullptr) {}
ObOptTableStat *global_tab_stat_;
ObOptTableStat *part_tab_stat_;
ObOptTableStat *first_part_tab_stat_;
ObOptOSGColumnStat *global_col_stat_;
ObOptOSGColumnStat *part_col_stat_;
ObOptOSGColumnStat *first_part_col_stat_;
};
public: public:
ObOptimizerStatsGatheringOp(ObExecContext &exec_ctx, const ObOpSpec &spec, ObOpInput *input); ObOptimizerStatsGatheringOp(ObExecContext &exec_ctx, const ObOpSpec &spec, ObOpInput *input);
virtual int inner_open() override; virtual int inner_open() override;
@ -116,21 +99,11 @@ private:
int send_stats(); int send_stats();
int calc_stats(); int calc_stats();
// calc stats for each column // calc stats for each column
int calc_column_stats(ObExpr *expr, int calc_column_stats(ObExpr *expr, uint64_t column_id, int64_t &row_len);
uint64_t column_id, int calc_columns_stats(int64_t &row_len);
PartIds &part_ids, int calc_table_stats(int64_t &row_len);
StatItems &all_stats,
int64_t &row_len);
// generate stat_param that is used to write inner_table. // generate stat_param that is used to write inner_table.
int generate_stat_param(ObTableStatParam &param); int generate_stat_param(ObTableStatParam &param);
int generate_part_ids(PartIds &part_ids);
int get_tab_stats_by_partinfo(PartIds &part_ids, StatItems &stat_item);
int get_col_stats_by_partinfo(PartIds &part_ids, uint64_t column_id, StatItems &stat_item);
// get stat by part_ids
int set_col_stats(StatItems &all_stat, ObDatum *datum, const ObObjMeta &meta, const ObDatumCmpFuncType cmp_func);
int set_tab_stats(StatItems &all_stat, int64_t row_len);
// get tab stat by key(tenant_id, table_id, partition_id), if NOT_EXISTS, alloc a new one. // get tab stat by key(tenant_id, table_id, partition_id), if NOT_EXISTS, alloc a new one.
int get_tab_stat_by_key(ObOptTableStat::Key &key, ObOptTableStat *&tab_stat); int get_tab_stat_by_key(ObOptTableStat::Key &key, ObOptTableStat *&tab_stat);