diff --git a/src/observer/ob_service.cpp b/src/observer/ob_service.cpp index f4af30cdcf..46a01e25f3 100644 --- a/src/observer/ob_service.cpp +++ b/src/observer/ob_service.cpp @@ -2816,8 +2816,8 @@ int ObService::estimate_tablet_block_count(const obrpc::ObEstBlockArg &arg, if (!inited_) { ret = OB_INVALID_ARGUMENT; LOG_WARN("service is not inited", K(ret)); - } else if (OB_FAIL(sql::ObStorageEstimator::estimate_block_count(arg, res))) { - LOG_WARN("failed to estimate partition rowcount", K(ret)); + } else if (OB_FAIL(sql::ObStorageEstimator::estimate_block_count_and_row_count(arg, res))) { + LOG_WARN("failed to estimate block count and row count", K(ret)); } return ret; } diff --git a/src/share/ob_rpc_struct.cpp b/src/share/ob_rpc_struct.cpp index 201e2d4709..72ed45c415 100755 --- a/src/share/ob_rpc_struct.cpp +++ b/src/share/ob_rpc_struct.cpp @@ -8696,10 +8696,12 @@ int ObEstBlockResElement::assign(const ObEstBlockResElement &other) int ret = OB_SUCCESS; macro_block_count_ = other.macro_block_count_; micro_block_count_ = other.micro_block_count_; + sstable_row_count_ = other.sstable_row_count_; + memtable_row_count_ = other.memtable_row_count_; return ret; } -OB_SERIALIZE_MEMBER(ObEstBlockResElement, macro_block_count_, micro_block_count_); +OB_SERIALIZE_MEMBER(ObEstBlockResElement, macro_block_count_, micro_block_count_, sstable_row_count_, memtable_row_count_); int ObEstBlockRes::assign(const ObEstBlockRes &other) { diff --git a/src/share/ob_rpc_struct.h b/src/share/ob_rpc_struct.h index f293d343e6..48f6ed8e13 100755 --- a/src/share/ob_rpc_struct.h +++ b/src/share/ob_rpc_struct.h @@ -9334,10 +9334,12 @@ struct ObEstBlockResElement public: int64_t macro_block_count_; int64_t micro_block_count_; + int64_t sstable_row_count_; + int64_t memtable_row_count_; bool is_valid() const { return true; } int assign(const ObEstBlockResElement &other); - ObEstBlockResElement() : macro_block_count_(0), micro_block_count_(0) {} - TO_STRING_KV(K(macro_block_count_), K(micro_block_count_)); + ObEstBlockResElement() : macro_block_count_(0), micro_block_count_(0), sstable_row_count_(0), memtable_row_count_(0) {} + TO_STRING_KV(K(macro_block_count_), K(micro_block_count_), K(sstable_row_count_), K(memtable_row_count_)); }; struct ObEstBlockRes diff --git a/src/share/stat/ob_basic_stats_estimator.cpp b/src/share/stat/ob_basic_stats_estimator.cpp index 92435744bd..f0e2b68275 100644 --- a/src/share/stat/ob_basic_stats_estimator.cpp +++ b/src/share/stat/ob_basic_stats_estimator.cpp @@ -153,9 +153,9 @@ int ObBasicStatsEstimator::estimate_block_count(ObExecContext &ctx, } else if (param.part_level_ == share::schema::PARTITION_LEVEL_TWO && OB_FAIL(generate_first_part_idx_map(param.all_part_infos_, first_part_idx_map))) { LOG_WARN("failed to generate first part idx map", K(ret)); - } else if (OB_FAIL(do_estimate_block_count(ctx, param.tenant_id_, table_id, tablet_ids, - partition_ids, estimate_result))) { - LOG_WARN("failed to do estimate block count", K(ret)); + } else if (OB_FAIL(do_estimate_block_count_and_row_count(ctx, param.tenant_id_, table_id, tablet_ids, + partition_ids, estimate_result))) { + LOG_WARN("failed to do estimate block count and row count", K(ret)); } else { for (int64_t i = 0; OB_SUCC(ret) && i < estimate_result.count(); ++i) { BolckNumPair block_num_pair; @@ -213,12 +213,12 @@ int ObBasicStatsEstimator::estimate_block_count(ObExecContext &ctx, return ret; } -int ObBasicStatsEstimator::do_estimate_block_count(ObExecContext &ctx, - const uint64_t tenant_id, - const uint64_t table_id, - const ObIArray &tablet_ids, - const ObIArray &partition_ids, - ObIArray &estimate_res) +int ObBasicStatsEstimator::do_estimate_block_count_and_row_count(ObExecContext &ctx, + const uint64_t tenant_id, + const uint64_t table_id, + const ObIArray &tablet_ids, + const ObIArray &partition_ids, + ObIArray &estimate_res) { int ret = OB_SUCCESS; common::ObSEArray candi_tablet_locs; @@ -279,8 +279,8 @@ int ObBasicStatsEstimator::do_estimate_block_count(ObExecContext &ctx, } else {/*do nothing*/} } if (OB_SUCC(ret)) {//begin storage estimate block count - if (OB_FAIL(stroage_estimate_block_count(ctx, cur_selected_addr, arg, result))) { - LOG_WARN("failed to stroage estimate block count", K(ret)); + if (OB_FAIL(stroage_estimate_block_count_and_row_count(ctx, cur_selected_addr, arg, result))) { + LOG_WARN("failed to stroage estimate block count and row count", K(ret)); } else { for (int64_t i = 0; OB_SUCC(ret) && i < selected_tablet_idx.count(); ++i) { int64_t idx = selected_tablet_idx.at(i); @@ -293,6 +293,8 @@ int ObBasicStatsEstimator::do_estimate_block_count(ObExecContext &ctx, estimate_res.at(idx).part_id_ = partition_ids.at(idx); estimate_res.at(idx).macro_block_count_ = result.tablet_params_res_.at(i).macro_block_count_; estimate_res.at(idx).micro_block_count_ = result.tablet_params_res_.at(i).micro_block_count_; + estimate_res.at(idx).sstable_row_count_ = result.tablet_params_res_.at(i).sstable_row_count_; + estimate_res.at(idx).memtable_row_count_ = result.tablet_params_res_.at(i).memtable_row_count_; } } LOG_TRACE("succeed to estimate block count", K(selected_tablet_idx), K(partition_ids), @@ -305,17 +307,17 @@ int ObBasicStatsEstimator::do_estimate_block_count(ObExecContext &ctx, return ret; } -int ObBasicStatsEstimator::stroage_estimate_block_count(ObExecContext &ctx, - const ObAddr &addr, - const obrpc::ObEstBlockArg &arg, - obrpc::ObEstBlockRes &result) +int ObBasicStatsEstimator::stroage_estimate_block_count_and_row_count(ObExecContext &ctx, + const ObAddr &addr, + const obrpc::ObEstBlockArg &arg, + obrpc::ObEstBlockRes &result) { int ret = OB_SUCCESS; if (addr == ctx.get_addr()) { - if (OB_FAIL(ObStorageEstimator::estimate_block_count(arg, result))) { + if (OB_FAIL(ObStorageEstimator::estimate_block_count_and_row_count(arg, result))) { LOG_WARN("failed to estimate partition rows", K(ret)); } else { - LOG_TRACE("succeed to stroage estimate block count", K(addr), K(arg), K(result)); + LOG_TRACE("succeed to stroage estimate block count and row count", K(addr), K(arg), K(result)); } } else { obrpc::ObSrvRpcProxy *rpc_proxy = NULL; diff --git a/src/share/stat/ob_basic_stats_estimator.h b/src/share/stat/ob_basic_stats_estimator.h index 5f59ae0343..ebca63e5a3 100644 --- a/src/share/stat/ob_basic_stats_estimator.h +++ b/src/share/stat/ob_basic_stats_estimator.h @@ -24,13 +24,17 @@ namespace common struct EstimateBlockRes { - EstimateBlockRes() : part_id_(), macro_block_count_(0), micro_block_count_(0) {} + EstimateBlockRes() : part_id_(), macro_block_count_(0), micro_block_count_(0), sstable_row_count_(0), memtable_row_count_(0) {} ObObjectID part_id_; int64_t macro_block_count_; int64_t micro_block_count_; + int64_t sstable_row_count_; + int64_t memtable_row_count_; TO_STRING_KV(K(part_id_), K(macro_block_count_), - K(micro_block_count_)); + K(micro_block_count_), + K(sstable_row_count_), + K(memtable_row_count_)); }; class ObBasicStatsEstimator : public ObStatsEstimator @@ -87,12 +91,12 @@ public: static int gen_tablet_list(const ObTableStatParam ¶m, ObSqlString &tablet_list); - static int do_estimate_block_count(ObExecContext &ctx, - const uint64_t tenant_id, - const uint64_t table_id, - const ObIArray &tablet_ids, - const ObIArray &partition_ids, - ObIArray &estimate_res); + static int do_estimate_block_count_and_row_count(ObExecContext &ctx, + const uint64_t tenant_id, + const uint64_t table_id, + const ObIArray &tablet_ids, + const ObIArray &partition_ids, + ObIArray &estimate_res); static int get_tablet_locations(ObExecContext &ctx, const uint64_t ref_table_id, @@ -100,10 +104,10 @@ public: const ObIArray &partition_ids, ObCandiTabletLocIArray &candi_tablet_locs); - static int stroage_estimate_block_count(ObExecContext &ctx, - const ObAddr &addr, - const obrpc::ObEstBlockArg &arg, - obrpc::ObEstBlockRes &result); + static int stroage_estimate_block_count_and_row_count(ObExecContext &ctx, + const ObAddr &addr, + const obrpc::ObEstBlockArg &arg, + obrpc::ObEstBlockRes &result); static int get_all_tablet_id_and_object_id(const ObTableStatParam ¶m, ObIArray &tablet_ids, diff --git a/src/sql/optimizer/ob_dynamic_sampling.cpp b/src/sql/optimizer/ob_dynamic_sampling.cpp index d7e8f1bdc1..6fd551fa44 100644 --- a/src/sql/optimizer/ob_dynamic_sampling.cpp +++ b/src/sql/optimizer/ob_dynamic_sampling.cpp @@ -478,13 +478,6 @@ int ObDynamicSampling::do_estimate_table_rowcount(const ObDSTableParam ¶m, b LOG_WARN("failed to calc sample block ratio", K(ret)); } else if (OB_FAIL(add_block_info_for_stat_items())) { LOG_WARN("failed to add block info for stat items", K(ret)); - } else if (macro_block_num_ <= 1 && - sample_block_ratio_ == 100.0 && - param.partition_infos_.count() <= 1 && - param.max_ds_timeout_ <= param.est_rowcount_ * 2) { - //do nothing, rows in memtable is too many, don't dynamic sampling - LOG_TRACE("just skip dynamic sampling, because the rows in memtable is too many", K(param), - K(macro_block_num_)); } else if (OB_FAIL(estimte_rowcount(param.max_ds_timeout_, param.degree_, throw_ds_error))) { LOG_WARN("failed to estimate rowcount", K(ret)); } @@ -688,9 +681,6 @@ int ObDynamicSampling::add_basic_hint_info(ObSqlString &basic_hint_str, //use defualt stat } else if (OB_FAIL(basic_hint_str.append(" OPT_PARAM(\'USE_DEFAULT_OPT_STAT\',\'TRUE\') "))) { LOG_WARN("failed to append", K(ret)); - //use force block sample - } else if (OB_FAIL(basic_hint_str.append(" OPT_PARAM(\'USE_FORCE_BLOCK_SAMPLE\',\'TRUE\') "))) { - LOG_WARN("failed to append", K(ret)); } else if (OB_FAIL(basic_hint_str.append("*/"))) {//hint end LOG_WARN("failed to append", K(ret)); } else { @@ -792,24 +782,36 @@ int ObDynamicSampling::calc_table_sample_block_ratio(const ObDSTableParam ¶m } else if (OB_UNLIKELY((sample_micro_cnt = get_dynamic_sampling_micro_block_num(param)) < 1)) { ret = OB_ERR_UNEXPECTED; LOG_WARN("get unexpected error", K(ret), K(param)); - } else if (OB_FAIL(estimate_table_micro_block_count(param))) { - LOG_WARN("failed to estimate table micro block count", K(ret)); - } else if (macro_block_num_ <= 1 || - sample_micro_cnt >= micro_block_num_ || - (param.partition_infos_.count() <= 1 && param.est_rowcount_ < MAX_FULL_SCAN_ROW_COUNT)) { - sample_block_ratio_ = 100.0; - seed_ = param.degree_ > 1 ? 0 : 1; + } else if (OB_FAIL(estimate_table_block_count_and_row_count(param))) { + LOG_WARN("failed to estimate table block count and row count", K(ret)); } else { - sample_block_ratio_ = 100.0 * sample_micro_cnt / micro_block_num_; - if (param.degree_ > 1) {//adjust sample ratio according to the degree. + //1.retire to memtable sample + if (sstable_row_count_ < memtable_row_count_) { + double sample_row_cnt = MAGIC_MAX_AUTO_SAMPLE_SIZE * (1.0 * sample_micro_cnt / OB_DS_BASIC_SAMPLE_MICRO_CNT); + if (memtable_row_count_ < sample_row_cnt) { + sample_block_ratio_ = 100.0; + } else { + sample_block_ratio_ = 100.0 * sample_row_cnt / memtable_row_count_; + } + } else { + //2.use the block sample + if (sample_micro_cnt >= micro_block_num_) { + sample_block_ratio_ = 100.0; + } else { + sample_block_ratio_ = 100.0 * sample_micro_cnt / micro_block_num_; + } + } + //3.try adjust sample block ratio according to the degree + if (param.degree_ > 1 && sample_block_ratio_ < 100.0) {//adjust sample ratio according to the degree. sample_block_ratio_ = sample_block_ratio_ * param.degree_; sample_block_ratio_ = sample_block_ratio_ < 100.0 ? sample_block_ratio_ : 100.0; } + //4.adjsut the seed. seed_ = (param.degree_ > 1 || param.partition_infos_.count() > 1) ? 0 : 1; } LOG_TRACE("succeed to calc table sample block ratio", K(param), K(seed_), K(sample_micro_cnt), K(sample_block_ratio_), K(micro_block_num_), - K(macro_block_num_)); + K(sstable_row_count_), K(memtable_row_count_)); return ret; } @@ -841,7 +843,7 @@ int64_t ObDynamicSampling::get_dynamic_sampling_micro_block_num(const ObDSTableP return sample_micro_cnt; } -int ObDynamicSampling::estimate_table_micro_block_count(const ObDSTableParam ¶m) +int ObDynamicSampling::estimate_table_block_count_and_row_count(const ObDSTableParam ¶m) { int ret = OB_SUCCESS; macro_block_num_ = 0; @@ -854,21 +856,24 @@ int ObDynamicSampling::estimate_table_micro_block_count(const ObDSTableParam &pa LOG_WARN("get unexpected null", K(ret), K(ctx_->get_exec_ctx()), K(ctx_->get_session_info())); } else if (OB_FAIL(get_all_tablet_id_and_object_id(param, tablet_ids, partition_ids))) { LOG_WARN("failed to get all tablet id and object id", K(ret)); - } else if (OB_FAIL(ObBasicStatsEstimator::do_estimate_block_count(*ctx_->get_exec_ctx(), - ctx_->get_session_info()->get_effective_tenant_id(), - param.table_id_, - tablet_ids, - partition_ids, - estimate_result))) { - LOG_WARN("failed to do estimate block count", K(ret)); + } else if (OB_FAIL(ObBasicStatsEstimator::do_estimate_block_count_and_row_count(*ctx_->get_exec_ctx(), + ctx_->get_session_info()->get_effective_tenant_id(), + param.table_id_, + tablet_ids, + partition_ids, + estimate_result))) { + LOG_WARN("failed to do estimate block count and row count", K(ret)); } else { for (int64_t i = 0; i < estimate_result.count(); ++i) { macro_block_num_ += estimate_result.at(i).macro_block_count_; micro_block_num_ += estimate_result.at(i).micro_block_count_; + sstable_row_count_ += estimate_result.at(i).sstable_row_count_; + memtable_row_count_ += estimate_result.at(i).memtable_row_count_; } LOG_TRACE("Succeed to estimate micro block count", K(micro_block_num_), K(macro_block_num_), K(tablet_ids), K(partition_ids), - K(estimate_result), K(param)); + K(estimate_result), K(param), + K(sstable_row_count_), K(memtable_row_count_)); } return ret; } @@ -1306,7 +1311,6 @@ int ObDynamicSamplingUtils::get_ds_table_param(ObOptimizerContext &ctx, ds_table_param.tenant_id_ = ctx.get_session_info()->get_effective_tenant_id(); ds_table_param.table_id_ = table_meta->get_ref_table_id(); ds_table_param.ds_level_ = ds_level; - ds_table_param.est_rowcount_ = table_meta->get_rows(); ds_table_param.sample_block_cnt_ = sample_block_cnt; ds_table_param.max_ds_timeout_ = get_dynamic_sampling_max_timeout(ctx); ds_table_param.is_virtual_table_ = is_virtual_table(table_meta->get_ref_table_id()) && diff --git a/src/sql/optimizer/ob_dynamic_sampling.h b/src/sql/optimizer/ob_dynamic_sampling.h index 8900fefded..711cb50a96 100644 --- a/src/sql/optimizer/ob_dynamic_sampling.h +++ b/src/sql/optimizer/ob_dynamic_sampling.h @@ -49,7 +49,6 @@ struct ObDSTableParam alias_name_(), is_virtual_table_(false), ds_level_(ObDynamicSamplingLevel::NO_DYNAMIC_SAMPLING), - est_rowcount_(0), sample_block_cnt_(0), max_ds_timeout_(0), degree_(1), @@ -68,7 +67,6 @@ struct ObDSTableParam ObString alias_name_; bool is_virtual_table_; int64_t ds_level_; - int64_t est_rowcount_; int64_t sample_block_cnt_; int64_t max_ds_timeout_; int64_t degree_; @@ -82,7 +80,6 @@ struct ObDSTableParam K(alias_name_), K(is_virtual_table_), K(ds_level_), - K(est_rowcount_), K(max_ds_timeout_), K(degree_), K(sample_block_cnt_), @@ -217,6 +214,8 @@ public: partition_list_(), macro_block_num_(0), micro_block_num_(0), + sstable_row_count_(0), + memtable_row_count_(0), sample_block_ratio_(0.0), seed_(0), sample_block_(), @@ -271,7 +270,7 @@ private: int estimte_rowcount(int64_t max_ds_timeout, int64_t degree, bool &throw_ds_error); int pack(ObSqlString &raw_sql_str); int gen_select_filed(ObSqlString &select_fields); - int estimate_table_micro_block_count(const ObDSTableParam ¶m); + int estimate_table_block_count_and_row_count(const ObDSTableParam ¶m); int get_all_tablet_id_and_object_id(const ObDSTableParam ¶m, ObIArray &tablet_ids, ObIArray &partition_ids); @@ -328,6 +327,8 @@ private: ObString partition_list_; int64_t macro_block_num_; int64_t micro_block_num_; + int64_t sstable_row_count_; + int64_t memtable_row_count_; double sample_block_ratio_; int64_t seed_; ObString sample_block_; diff --git a/src/sql/optimizer/ob_join_order.cpp b/src/sql/optimizer/ob_join_order.cpp index 9ebc0d16a7..2e91af7ce3 100644 --- a/src/sql/optimizer/ob_join_order.cpp +++ b/src/sql/optimizer/ob_join_order.cpp @@ -1642,12 +1642,6 @@ int ObJoinOrder::create_one_access_path(const uint64_t table_id, //block(row) sample scan do not support DAS TSC ap->use_das_ = false; } - bool is_exists_opt = false; - if (OB_FAIL(OPT_CTX.get_global_hint().opt_params_.get_bool_opt_param(ObOptParamHint::USE_FORCE_BLOCK_SAMPLE, - ap->sample_info_.force_block_, - is_exists_opt))) { - LOG_WARN("fail to check use force block sample", K(ret)); - } } else if (get_plan()->get_optimizer_context().is_online_ddl() && get_plan()->get_optimizer_context().get_root_stmt()->is_insert_stmt()) { // online ddl plan use sample table scan diff --git a/src/sql/optimizer/ob_storage_estimator.cpp b/src/sql/optimizer/ob_storage_estimator.cpp index cbd87514c5..068b854b2d 100644 --- a/src/sql/optimizer/ob_storage_estimator.cpp +++ b/src/sql/optimizer/ob_storage_estimator.cpp @@ -57,23 +57,23 @@ int ObStorageEstimator::estimate_row_count(const obrpc::ObEstPartArg &arg, return ret; } -int ObStorageEstimator::estimate_block_count(const obrpc::ObEstBlockArg &arg, - obrpc::ObEstBlockRes &res) +int ObStorageEstimator::estimate_block_count_and_row_count(const obrpc::ObEstBlockArg &arg, + obrpc::ObEstBlockRes &res) { int ret = OB_SUCCESS; for (int64_t i = 0; OB_SUCC(ret) && i < arg.tablet_params_arg_.count(); ++i) { obrpc::ObEstBlockResElement est_res; - if (OB_FAIL(storage_estimate_blockcount(arg.tablet_params_arg_.at(i), est_res))) { - LOG_WARN("failed to estimate tablet block count", K(ret)); + if (OB_FAIL(storage_estimate_block_count_and_row_count(arg.tablet_params_arg_.at(i), est_res))) { + LOG_WARN("failed to estimate tablet block count and row count", K(ret)); } else if (OB_FAIL(res.tablet_params_res_.push_back(est_res))) { LOG_WARN("failed to push back result", K(ret)); } else { - LOG_TRACE("[OPT EST]: block count stat", K(est_res), K(i), "param", arg.tablet_params_arg_.at(i)); + LOG_TRACE("[OPT EST]: block count and row count stat", K(est_res), K(i), "param", arg.tablet_params_arg_.at(i)); } } #if !defined(NDEBUG) if (OB_SUCC(ret)) { - LOG_INFO("[OPT EST] blockcount estimation result", K(arg), K(res)); + LOG_INFO("[OPT EST] block count and row count estimation result", K(arg), K(res)); } #endif return ret; @@ -150,17 +150,21 @@ int ObStorageEstimator::storage_estimate_partition_batch_rowcount( return ret; } -int ObStorageEstimator::storage_estimate_blockcount( +int ObStorageEstimator::storage_estimate_block_count_and_row_count( const obrpc::ObEstBlockArgElement &arg, obrpc::ObEstBlockResElement &res) { int ret = OB_SUCCESS; int64_t macro_block_count = 0; int64_t micro_block_count = 0; + int64_t sstable_row_count = 0; + int64_t memtable_row_count = 0; if (!arg.is_valid()) { res.macro_block_count_ = macro_block_count; res.micro_block_count_ = micro_block_count; + res.sstable_row_count_ = sstable_row_count; + res.memtable_row_count_ = memtable_row_count; } else { const uint64_t tenant_id = arg.tenant_id_; MTL_SWITCH(tenant_id) { @@ -168,16 +172,20 @@ int ObStorageEstimator::storage_estimate_blockcount( if (OB_ISNULL(access_service = MTL(ObAccessService *))) { ret = OB_ERR_UNEXPECTED; LOG_WARN("get unexpected null", K(ret), K(access_service)); - } else if (OB_FAIL(access_service->estimate_block_count(arg.ls_id_, - arg.tablet_id_, - macro_block_count, - micro_block_count))) { - LOG_WARN("OPT:[STORAGE EST BLOCK COUNT FAILED]", "storage_ret", ret); + } else if (OB_FAIL(access_service->estimate_block_count_and_row_count(arg.ls_id_, + arg.tablet_id_, + macro_block_count, + micro_block_count, + sstable_row_count, + memtable_row_count))) { + LOG_WARN("OPT:[STORAGE EST BLOCK COUNT AND ROW COUNT FAILED]", "storage_ret", ret); } else { - LOG_TRACE("storage estimate row count result", K(macro_block_count), - K(micro_block_count), K(ret)); + LOG_TRACE("storage estimate block count and row count result", K(macro_block_count), + K(micro_block_count), K(sstable_row_count), K(memtable_row_count), K(ret)); res.macro_block_count_ = macro_block_count; res.micro_block_count_ = micro_block_count; + res.sstable_row_count_ = sstable_row_count; + res.memtable_row_count_ = memtable_row_count; } } } diff --git a/src/sql/optimizer/ob_storage_estimator.h b/src/sql/optimizer/ob_storage_estimator.h index d2c0e73fb3..b172dba402 100644 --- a/src/sql/optimizer/ob_storage_estimator.h +++ b/src/sql/optimizer/ob_storage_estimator.h @@ -41,8 +41,8 @@ public: static int estimate_row_count(const obrpc::ObEstPartArg &arg, obrpc::ObEstPartRes &res); - static int estimate_block_count(const obrpc::ObEstBlockArg &arg, - obrpc::ObEstBlockRes &res); + static int estimate_block_count_and_row_count(const obrpc::ObEstBlockArg &arg, + obrpc::ObEstBlockRes &res); private: // compute memtable whole range row counts @@ -74,11 +74,11 @@ private: double &physical_row_count); /** - * @brief storage_estimate_blockcount + * @brief storage_estimate_block_count_and_row_count * estimate the blockcount of tablet by using storage interface */ - static int storage_estimate_blockcount(const obrpc::ObEstBlockArgElement &arg, - obrpc::ObEstBlockResElement &res); + static int storage_estimate_block_count_and_row_count(const obrpc::ObEstBlockArgElement &arg, + obrpc::ObEstBlockResElement &res); }; } diff --git a/src/sql/resolver/dml/ob_hint.cpp b/src/sql/resolver/dml/ob_hint.cpp index 0c14db02dd..cf68ef7b47 100644 --- a/src/sql/resolver/dml/ob_hint.cpp +++ b/src/sql/resolver/dml/ob_hint.cpp @@ -729,11 +729,6 @@ bool ObOptParamHint::is_param_val_valid(const OptParamType param_type, const ObO || 0 == val.get_varchar().case_compare("false")); break; } - case USE_FORCE_BLOCK_SAMPLE: { - is_valid = val.is_varchar() && (0 == val.get_varchar().case_compare("true") - || 0 == val.get_varchar().case_compare("false")); - break; - } case ENABLE_IN_RANGE_OPTIMIZATION: { is_valid = val.is_varchar() && (0 == val.get_varchar().case_compare("true") || 0 == val.get_varchar().case_compare("false")); diff --git a/src/sql/resolver/dml/ob_hint.h b/src/sql/resolver/dml/ob_hint.h index 7356cc836a..6ae0c5f82b 100644 --- a/src/sql/resolver/dml/ob_hint.h +++ b/src/sql/resolver/dml/ob_hint.h @@ -90,7 +90,6 @@ struct ObOptParamHint DEF(ENABLE_NEWSORT,) \ DEF(USE_PART_SORT_MGB,) \ DEF(USE_DEFAULT_OPT_STAT,) \ - DEF(USE_FORCE_BLOCK_SAMPLE,) \ DEF(ENABLE_IN_RANGE_OPTIMIZATION,) \ DEF(XSOLAPI_GENERATE_WITH_CLAUSE,) \ diff --git a/src/storage/CMakeLists.txt b/src/storage/CMakeLists.txt index b4abbd0f85..44c52c21cc 100755 --- a/src/storage/CMakeLists.txt +++ b/src/storage/CMakeLists.txt @@ -412,11 +412,12 @@ ob_set_subtarget(ob_storage access access/ob_i_sample_iterator.cpp access/ob_block_sample_iterator.cpp access/ob_row_sample_iterator.cpp + access/ob_rows_info.cpp + access/ob_sample_iter_helper.cpp access/ob_table_read_info.cpp access/ob_table_scan_range.cpp access/ob_table_access_param.cpp access/ob_table_access_context.cpp - access/ob_rows_info.cpp access/ob_fuse_row_cache_fetcher.cpp access/ob_dml_param.cpp ) diff --git a/src/storage/access/ob_block_sample_iterator.h b/src/storage/access/ob_block_sample_iterator.h index 61806950e3..0c33a98466 100644 --- a/src/storage/access/ob_block_sample_iterator.h +++ b/src/storage/access/ob_block_sample_iterator.h @@ -149,7 +149,7 @@ class ObBlockSampleIterator : public ObISampleIterator public: explicit ObBlockSampleIterator(const common::SampleInfo &sample_info); virtual ~ObBlockSampleIterator(); - void reuse(); + virtual void reuse(); virtual void reset() override; int open(ObMultipleScanMerge &scan_merge, ObTableAccessContext &access_ctx, diff --git a/src/storage/access/ob_i_sample_iterator.h b/src/storage/access/ob_i_sample_iterator.h index cd43df084b..1a34115138 100644 --- a/src/storage/access/ob_i_sample_iterator.h +++ b/src/storage/access/ob_i_sample_iterator.h @@ -25,6 +25,7 @@ class ObISampleIterator : public ObQueryRowIterator public: explicit ObISampleIterator(const common::SampleInfo &sample_info); virtual ~ObISampleIterator(); + virtual void reuse() = 0; protected: bool return_this_sample(const int64_t num) const; protected: diff --git a/src/storage/access/ob_row_sample_iterator.cpp b/src/storage/access/ob_row_sample_iterator.cpp index 6252f7f5da..6ea39d8bf1 100644 --- a/src/storage/access/ob_row_sample_iterator.cpp +++ b/src/storage/access/ob_row_sample_iterator.cpp @@ -68,5 +68,22 @@ void ObRowSampleIterator::reset() row_num_ = 0; } +int ObMemtableRowSampleIterator::get_next_row(blocksstable::ObDatumRow *&row) +{ + int ret = OB_SUCCESS; + if (OB_ISNULL(iterator_)) { + ret = OB_NOT_INIT; + STORAGE_LOG(WARN, "row sample iterator is not inited", K(ret), KP_(iterator)); + } else if (OB_FAIL(iterator_->get_next_row(row))) { + if (OB_ITER_END != ret) { + STORAGE_LOG(WARN, "multiple merge failed to get next row", K(ret)); + } else { + STORAGE_LOG(INFO, "total sample row count", K(row_num_)); + } + } else { + row_num_++; + } + return ret; +} } } diff --git a/src/storage/access/ob_row_sample_iterator.h b/src/storage/access/ob_row_sample_iterator.h index bdbecc7e3f..449fba70e2 100644 --- a/src/storage/access/ob_row_sample_iterator.h +++ b/src/storage/access/ob_row_sample_iterator.h @@ -27,7 +27,7 @@ public: explicit ObRowSampleIterator(const common::SampleInfo &sample_info); virtual ~ObRowSampleIterator(); int open(ObQueryRowIterator &iterator); - void reuse(); + virtual void reuse(); virtual int get_next_row(blocksstable::ObDatumRow *&row) override; virtual void reset() override; private: @@ -35,6 +35,39 @@ private: int64_t row_num_; }; +class ObMemtableRowSampleIterator : public ObISampleIterator +{ +public: + // must larger than 1 + static const int64_t SAMPLE_MEMTABLE_RANGE_COUNT = 10; +public: + explicit ObMemtableRowSampleIterator(const SampleInfo &sample_info) + : ObISampleIterator(sample_info), iterator_(nullptr), row_num_(0) {} + virtual ~ObMemtableRowSampleIterator() {} + + int open(ObQueryRowIterator &iterator) + { + int ret = OB_SUCCESS; + iterator_ = &iterator; + row_num_ = 0; + return ret; + } + + virtual void reuse() { row_num_ = 0; } + + virtual void reset() override + { + iterator_ = nullptr; + row_num_ = 0; + } + + virtual int get_next_row(blocksstable::ObDatumRow *&row) override; + +private: + ObQueryRowIterator *iterator_; + int64_t row_num_; +}; + } } diff --git a/src/storage/access/ob_sample_iter_helper.cpp b/src/storage/access/ob_sample_iter_helper.cpp new file mode 100644 index 0000000000..524ddf47e1 --- /dev/null +++ b/src/storage/access/ob_sample_iter_helper.cpp @@ -0,0 +1,226 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#include "storage/access/ob_sample_iter_helper.h" +#include "storage/memtable/ob_memtable.h" +#include "storage/access/ob_multiple_multi_scan_merge.h" + +namespace oceanbase { +namespace storage { + +using namespace blocksstable; + +int ObGetSampleIterHelper::check_scan_range_count(bool &res, ObIArray &sample_ranges) +{ + int ret = OB_SUCCESS; + need_scan_multiple_range_ = false; + if (scan_param_.sample_info_.is_block_sample()) { + bool retire_to_memtable_row_sample = false; + if (OB_FAIL(get_table_param_.tablet_iter_.refresh_read_tables_from_tablet( + main_table_ctx_.store_ctx_->mvcc_acc_ctx_.get_snapshot_version().get_val_for_tx(), + false /*allow_not_ready*/))) { + STORAGE_LOG(WARN, "Fail to read tables", K(ret)); + } else if (OB_FAIL(can_retire_to_memtable_row_sample_(retire_to_memtable_row_sample, sample_ranges))) { + STORAGE_LOG(WARN, "Fail to try to retire to row sample", K(ret)); + } + + if (retire_to_memtable_row_sample) { + need_scan_multiple_range_ = true; + } + } else if (scan_param_.sample_info_.is_row_sample()) { + if (OB_FAIL(sample_ranges.assign(table_scan_range_.get_ranges()))) { + STORAGE_LOG(WARN, + "copy assign from table scan range to sample ranges failed", + KR(ret), + K(table_scan_range_.get_ranges())); + } else { + need_scan_multiple_range_ = true; + } + } else { + // invalid sample type + } + + if (OB_SUCC(ret)) { + res = need_scan_multiple_range_; + } + return ret; +} + +int ObGetSampleIterHelper::can_retire_to_memtable_row_sample_(bool &retire, ObIArray &sample_ranges) +{ + int ret = OB_SUCCESS; + + retire = false; + if (get_table_param_.is_valid()) { + int64_t memtable_row_count = 0; + int64_t sstable_row_count = 0; + common::ObSEArray memtables; + + // iter all tables to estimate row count + while (OB_SUCC(ret)) { + ObSSTableMetaHandle sst_meta_hdl; + ObITable *table = nullptr; + if (OB_FAIL(get_table_param_.tablet_iter_.table_iter()->get_next(table))) { + if (OB_LIKELY(OB_ITER_END == ret)) { + ret = OB_SUCCESS; + break; + } else { + STORAGE_LOG(WARN, "Fail to get next table iter", K(ret), K(get_table_param_.tablet_iter_.table_iter())); + } + } else if (table->is_memtable()) { + if (OB_FAIL(memtables.push_back(table))) { + STORAGE_LOG(WARN, "push back memtable failed", KR(ret)); + } else { + memtable_row_count += static_cast(table)->get_physical_row_cnt(); + } + } else if (table->is_sstable()) { + if (OB_FAIL(static_cast(table)->get_meta(sst_meta_hdl))) { + STORAGE_LOG(WARN, "fail to get sstable meta handle", K(ret)); + } else { + sstable_row_count += sst_meta_hdl.get_sstable_meta().get_row_count(); + } + } + } + + // decide if this block sample need to retire and get sample ranges if need retire + if (OB_FAIL(ret)) { + } else if (FALSE_IT(get_table_param_.tablet_iter_.table_iter()->resume())) { + } else if (sstable_row_count < memtable_row_count && memtable_row_count > 0) { + if (OB_FAIL(get_memtable_sample_ranges_(memtables, sample_ranges))) { + STORAGE_LOG(WARN, "get memtable sample ranges failed", KR(ret), K(memtables)); + } else { + retire = true; + } + + STORAGE_LOG(INFO, + "retire to memtable row sample", + KR(ret), + K(retire), + K(sstable_row_count), + K(memtable_row_count), + K(table_scan_range_)); + } + } + + return ret; +} + +int ObGetSampleIterHelper::get_memtable_sample_ranges_(const ObIArray &memtables, + ObIArray &sample_ranges) +{ + int ret = OB_SUCCESS; + int split_failed_count = 0; + sample_ranges.reuse(); + + // get split ranges from all memtables + for (int64_t i = 0; OB_SUCC(ret) && i < memtables.count(); i++) { + memtable::ObMemtable *memtable = static_cast(memtables.at(i)); + int tmp_ret = OB_SUCCESS; + if (OB_ISNULL(memtable)) { + ret = OB_ERR_UNEXPECTED; + } else if (OB_TMP_FAIL(memtable->split_ranges_for_sample(table_scan_range_.get_ranges().at(0), + scan_param_.sample_info_.percent_, + *(scan_param_.allocator_), + sample_ranges))) { + STORAGE_LOG(WARN, "split range failed", KR(tmp_ret)); + split_failed_count++; + } else { + // split succeed + } + } + + // if we can not split ranges from all memtables, just push a whole range into sample ranges array + if (split_failed_count == memtables.count()) { + if (sample_ranges.count() != 0) { + STORAGE_LOG(WARN, "unexpected sample memtable ranges", K(sample_ranges)); + sample_ranges.reuse(); + } + + blocksstable::ObDatumRange datum_range; + datum_range.set_whole_range(); + if (OB_FAIL(sample_ranges.push_back(datum_range))) { + STORAGE_LOG(WARN, "push back datum range to sample memtable ranges failed", KR(ret), K(memtables)); + } + } + return ret; +} + +#define CONSTRUCT_SAMPLE_ITER(ITER_TYPE, ITER_PTR) \ + do { \ + void *buf = nullptr; \ + if (OB_FAIL(ret)) { \ + } else if (nullptr == ITER_PTR) { \ + if (OB_ISNULL(buf = scan_param_.allocator_->alloc(sizeof(ITER_TYPE)))) { \ + ret = OB_ALLOCATE_MEMORY_FAILED; \ + STORAGE_LOG(WARN, "Fail to allocate memory", K(ret)); \ + } else { \ + ITER_PTR = new (buf) ITER_TYPE(scan_param_.sample_info_); \ + } \ + } \ + } while (0) + +#define OPEN_SAMPLE_ITER(ITER_TYPE, ITER_PTR) \ + do { \ + if (OB_FAIL(ret)) { \ + } else if (OB_FAIL(static_cast(ITER_PTR)->open(*main_iter))) { \ + STORAGE_LOG(WARN, "failed to open memtable_row_sample_iterator", KR(ret)); \ + } else { \ + main_iter = ITER_PTR; \ + } \ + } while (0) + +int ObGetSampleIterHelper::get_sample_iter(ObMemtableRowSampleIterator *&sample_iter, + ObQueryRowIterator *&main_iter, + ObMultipleScanMerge *scan_merge) +{ + int ret = OB_SUCCESS; + CONSTRUCT_SAMPLE_ITER(ObMemtableRowSampleIterator, sample_iter); + OPEN_SAMPLE_ITER(ObMemtableRowSampleIterator, sample_iter); + return ret; +} +int ObGetSampleIterHelper::get_sample_iter(ObRowSampleIterator *&sample_iter, + ObQueryRowIterator *&main_iter, + ObMultipleScanMerge *scan_merge) +{ + int ret = OB_SUCCESS; + CONSTRUCT_SAMPLE_ITER(ObRowSampleIterator, sample_iter); + OPEN_SAMPLE_ITER(ObRowSampleIterator, sample_iter); + return ret; +} + +int ObGetSampleIterHelper::get_sample_iter(ObBlockSampleIterator *&sample_iter, + ObQueryRowIterator *&main_iter, + ObMultipleScanMerge *scan_merge) +{ + int ret = OB_SUCCESS; + CONSTRUCT_SAMPLE_ITER(ObBlockSampleIterator, sample_iter); + // TODO : @yuanzhe block sample uses a different initialization logic and different open interface + if (OB_FAIL(ret)) { + } else if (OB_FAIL(static_cast(sample_iter) + ->open(*scan_merge, + main_table_ctx_, + table_scan_range_.get_ranges().at(0), + get_table_param_, + scan_param_.scan_flag_.is_reverse_scan()))) { + STORAGE_LOG(WARN, "failed to open block_sample_iterator_", K(ret)); + } else { + main_iter = sample_iter; + main_table_ctx_.use_fuse_row_cache_ = false; + } + return ret; +} + +#undef CONSTRUCT_SAMPLE_ITER +#undef OPEN_SAMPLE_ITER + +} // namespace storage +} // namespace oceanbase diff --git a/src/storage/access/ob_sample_iter_helper.h b/src/storage/access/ob_sample_iter_helper.h new file mode 100644 index 0000000000..50122adee1 --- /dev/null +++ b/src/storage/access/ob_sample_iter_helper.h @@ -0,0 +1,69 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#ifndef OCEANBASE_STORAGE_OB_SAMPLE_ITER_HELPER_H +#define OCEANBASE_STORAGE_OB_SAMPLE_ITER_HELPER_H + +#include "storage/access/ob_block_sample_iterator.h" +#include "storage/access/ob_row_sample_iterator.h" + +namespace oceanbase { +namespace storage { +class ObMultipleMultiScanMerge; + +class ObGetSampleIterHelper { +public: + ObGetSampleIterHelper(const ObTableScanRange &table_scan_range, + ObTableAccessContext &main_table_ctx, + ObTableScanParam &scan_param, + ObGetTableParam &get_table_param) + : table_scan_range_(table_scan_range), + main_table_ctx_(main_table_ctx), + scan_param_(scan_param), + get_table_param_(get_table_param) + {} + + int check_scan_range_count(bool &res, ObIArray &sample_ranges); + + // int get_sample_iter(ObISampleIterator *&i_sample_iter, + // ObQueryRowIterator *&main_iter, + // ObMultipleScanMerge *scan_merge); + int get_sample_iter(ObMemtableRowSampleIterator *&sample_iter, + ObQueryRowIterator *&main_iter, + ObMultipleScanMerge *scan_merge); + int get_sample_iter(ObRowSampleIterator *&sample_iter, + ObQueryRowIterator *&main_iter, + ObMultipleScanMerge *scan_merge); + + int get_sample_iter(ObBlockSampleIterator *&sample_iter, + ObQueryRowIterator *&main_iter, + ObMultipleScanMerge *scan_merge); + +private: + // if need retire to row sample, sample_memtable_ranges must not be null + int can_retire_to_memtable_row_sample_(bool &retire, ObIArray &sample_ranges); + + int get_memtable_sample_ranges_(const ObIArray &memtables, + ObIArray &sample_ranges); + +private: + const ObTableScanRange &table_scan_range_; + ObTableAccessContext &main_table_ctx_; + ObTableScanParam &scan_param_; + ObGetTableParam &get_table_param_; + bool need_scan_multiple_range_; +}; + +} // namespace storage +} // namespace oceanbase + +#endif \ No newline at end of file diff --git a/src/storage/access/ob_table_scan_iterator.cpp b/src/storage/access/ob_table_scan_iterator.cpp index fb23abc286..11697e6386 100755 --- a/src/storage/access/ob_table_scan_iterator.cpp +++ b/src/storage/access/ob_table_scan_iterator.cpp @@ -12,22 +12,22 @@ #define USING_LOG_PREFIX STORAGE +#include +#include #include "common/object/ob_obj_compare.h" #include "common/sql_mode/ob_sql_mode_utils.h" #include "lib/stat/ob_diagnose_info.h" #include "lib/utility/ob_tracepoint.h" +#include "storage/access/ob_multiple_scan_merge.h" +#include "storage/access/ob_table_scan_iterator.h" +#include "storage/access/ob_dml_param.h" +#include "storage/access/ob_index_sstable_estimator.h" +#include "storage/access/ob_sample_iter_helper.h" #include "storage/blocksstable/ob_storage_cache_suite.h" -#include "ob_multiple_scan_merge.h" -#include "ob_table_scan_iterator.h" +#include "storage/memtable/ob_memtable.h" #include "storage/tx_storage/ob_ls_map.h" #include "storage/tx_storage/ob_ls_service.h" #include "storage/tx/ob_trans_service.h" -#include -#include -#include "ob_dml_param.h" -#include "storage/memtable/ob_memtable.h" -#include "ob_table_estimator.h" -#include "ob_index_sstable_estimator.h" #include "storage/tablet/ob_tablet.h" namespace oceanbase @@ -48,15 +48,18 @@ ObTableScanIterator::ObTableScanIterator() scan_merge_(NULL), multi_scan_merge_(NULL), skip_scan_merge_(NULL), + memtable_row_sample_iterator_(NULL), row_sample_iterator_(NULL), block_sample_iterator_(NULL), + // i_sample_iter_(NULL), main_table_param_(), main_table_ctx_(), get_table_param_(), ctx_guard_(), scan_param_(NULL), table_scan_range_(), - main_iter_(NULL) + main_iter_(NULL), + sample_ranges_() { } @@ -73,8 +76,10 @@ void ObTableScanIterator::reset() reset_scan_iter(scan_merge_); reset_scan_iter(multi_scan_merge_); reset_scan_iter(skip_scan_merge_); + reset_scan_iter(memtable_row_sample_iterator_); reset_scan_iter(row_sample_iterator_); reset_scan_iter(block_sample_iterator_); + // reset_scan_iter(i_sample_iter_); main_table_param_.reset(); main_table_ctx_.reset(); @@ -84,6 +89,7 @@ void ObTableScanIterator::reset() scan_param_ = NULL; table_scan_range_.reset(); main_iter_ = NULL; + sample_ranges_.reset(); is_inited_ = false; } @@ -108,8 +114,11 @@ void ObTableScanIterator::reuse_row_iters() REUSE_SCAN_ITER(scan_merge_); REUSE_SCAN_ITER(multi_scan_merge_); REUSE_SCAN_ITER(skip_scan_merge_); + REUSE_SCAN_ITER(memtable_row_sample_iterator_); REUSE_SCAN_ITER(row_sample_iterator_); REUSE_SCAN_ITER(block_sample_iterator_); + // REUSE_SCAN_ITER(i_sample_iter_); + #undef REUSE_SCAN_ITER } @@ -165,6 +174,7 @@ void ObTableScanIterator::reuse() main_iter_ = NULL; reuse_row_iters(); main_table_ctx_.reuse(); + sample_ranges_.reuse(); } void ObTableScanIterator::reset_for_switch() @@ -173,6 +183,7 @@ void ObTableScanIterator::reset_for_switch() main_table_param_.reset(); get_table_param_.reset(); ctx_guard_.reset(); + sample_ranges_.reset(); scan_param_ = NULL; } @@ -359,156 +370,104 @@ int ObTableScanIterator::open_iter() } else if (table_scan_range_.is_empty()) { //ret = OB_ITER_END; } else { - bool retire_to_row_sample = false; get_table_param_.frozen_version_ = scan_param_->frozen_version_; get_table_param_.sample_info_ = scan_param_->sample_info_; if (table_scan_range_.is_get()) { - if (table_scan_range_.get_rowkeys().count() == 1) { - INIT_AND_OPEN_ITER(single_merge_, table_scan_range_.get_rowkeys().at(0), true); - if (OB_SUCC(ret)) { - main_table_ctx_.use_fuse_row_cache_ = !single_merge_->is_read_memtable_only(); - } - } else { - INIT_AND_OPEN_ITER(get_merge_, table_scan_range_.get_rowkeys(), false); + if (OB_FAIL(init_and_open_get_merge_iter_())) { + STORAGE_LOG(WARN, "init and open get merge iterator failed", KR(ret)); + } + } else if (table_scan_range_.is_scan()) { + if (OB_FAIL(init_and_open_scan_merge_iter_())) { + STORAGE_LOG(WARN, "init and open scan merge iterator failed", KR(ret)); } } else { - if (table_scan_range_.get_ranges().count() == 1) { - if (scan_param_->sample_info_.is_block_sample()) { - if (nullptr == scan_merge_ && OB_FAIL(init_scan_iter(scan_merge_))) { - STORAGE_LOG(WARN, "Failed to init scanmerge", K(ret)); - } else if (OB_FAIL(get_table_param_.tablet_iter_.refresh_read_tables_from_tablet( - main_table_ctx_.store_ctx_->mvcc_acc_ctx_.get_snapshot_version().get_val_for_tx(), - false /*allow_not_ready*/ ))) { - STORAGE_LOG(WARN, "Fail to read tables", K(ret)); - } else if (!scan_param_->sample_info_.force_block_ && - OB_FAIL(can_retire_to_row_sample(retire_to_row_sample))) { - STORAGE_LOG(WARN, "Fail to try to retire to row sample", K(ret)); - } else if (!retire_to_row_sample) { - if (nullptr == block_sample_iterator_) { - if (OB_ISNULL(buf = scan_param_->allocator_->alloc(sizeof(ObBlockSampleIterator)))) { - ret = OB_ALLOCATE_MEMORY_FAILED; - STORAGE_LOG(WARN, "Fail to allocate memory", K(ret)); - } else { - block_sample_iterator_ = new (buf) ObBlockSampleIterator (scan_param_->sample_info_); - } - } - - if (OB_FAIL(ret)) { - } else if (OB_FAIL(block_sample_iterator_->open(*scan_merge_, - main_table_ctx_, - table_scan_range_.get_ranges().at(0), - get_table_param_, - scan_param_->scan_flag_.is_reverse_scan()))) { - STORAGE_LOG(WARN, "failed to open block_sample_iterator_", K(ret)); - } else { - main_iter_ = block_sample_iterator_; - main_table_ctx_.use_fuse_row_cache_ = false; - } - } - } else if (scan_param_->use_index_skip_scan()) { - INIT_AND_OPEN_SKIP_SCAN_ITER(skip_scan_merge_, table_scan_range_.get_ranges().at(0), table_scan_range_.get_suffix_ranges().at(0), false); - } else { - INIT_AND_OPEN_ITER(scan_merge_, table_scan_range_.get_ranges().at(0), false); - } - } else if (scan_param_->use_index_skip_scan()) { - ret = OB_NOT_SUPPORTED; - STORAGE_LOG(WARN, "multiple ranges are not supported in index skip scan now"); - } else { - INIT_AND_OPEN_ITER(multi_scan_merge_, table_scan_range_.get_ranges(), false); - } + ret = OB_ERR_UNEXPECTED; + STORAGE_LOG(ERROR, "invalid table scan range", KR(ret), K(table_scan_range_)); } - if (OB_SUCC(ret)) { - if (scan_param_->sample_info_.is_row_sample() || retire_to_row_sample) { - if (!retire_to_row_sample) { - } else if (OB_ISNULL(scan_merge_)) { - ret = OB_ERR_UNEXPECTED; - STORAGE_LOG(WARN, "Unexpected null scan merge", K(ret), KP(scan_merge_)); - } else if (OB_FAIL(scan_merge_->open(table_scan_range_.get_ranges().at(0)))) { - STORAGE_LOG(WARN, "Fail to open scan merge iterator", K(ret)); - } else { - main_table_ctx_.use_fuse_row_cache_ = false; - main_iter_ = scan_merge_; - } - if (OB_FAIL(ret)) { - } else if (nullptr == row_sample_iterator_) { - if (OB_ISNULL(buf = scan_param_->allocator_->alloc(sizeof(ObRowSampleIterator)))) { - ret = OB_ALLOCATE_MEMORY_FAILED; - STORAGE_LOG(WARN, "Fail to allocate memory", K(ret)); - } else { - row_sample_iterator_ = new (buf) ObRowSampleIterator (scan_param_->sample_info_); - } - } - if (OB_FAIL(ret)) { - } else if (OB_FAIL(row_sample_iterator_->open(*main_iter_))) { - STORAGE_LOG(WARN, "failed to open row_sample_iterator", K(ret)); - } else { - main_iter_ = row_sample_iterator_; - } - } - } if (OB_SUCC(ret)) { table_scan_range_.set_empty(); } } STORAGE_LOG(DEBUG, "chaser debug open iter", K(ret), K(table_scan_range_)); + + return ret; +} + +int ObTableScanIterator::init_and_open_get_merge_iter_() +{ + int ret = OB_SUCCESS; + if (table_scan_range_.get_rowkeys().count() == 1) { + INIT_AND_OPEN_ITER(single_merge_, table_scan_range_.get_rowkeys().at(0), true); + if (OB_SUCC(ret)) { + main_table_ctx_.use_fuse_row_cache_ = !single_merge_->is_read_memtable_only(); + } + } else { + INIT_AND_OPEN_ITER(get_merge_, table_scan_range_.get_rowkeys(), false); + } + return ret; +} + +int ObTableScanIterator::init_and_open_scan_merge_iter_() +{ + int ret = OB_SUCCESS; + + if (table_scan_range_.get_ranges().count() == 1) { + if (scan_param_->sample_info_.is_row_sample() || scan_param_->sample_info_.is_block_sample()) { + bool need_scan_multiple_range = false; + STORAGE_LOG(INFO, "start init sample iterator", K(scan_param_->sample_info_)); + ObGetSampleIterHelper sample_iter_helper(table_scan_range_, main_table_ctx_, *scan_param_, get_table_param_); + if (OB_FAIL(sample_iter_helper.check_scan_range_count(need_scan_multiple_range, sample_ranges_))) { + STORAGE_LOG(WARN, "check scan range count failed", KR(ret), KPC(scan_param_)); + } else if (need_scan_multiple_range) { + // this branch means the sample is row(memtable row) sample + INIT_AND_OPEN_ITER(multi_scan_merge_, sample_ranges_, false); + if (OB_FAIL(ret)) { + } else if (scan_param_->sample_info_.is_row_sample()) { + if (OB_FAIL(sample_iter_helper.get_sample_iter(row_sample_iterator_, main_iter_, multi_scan_merge_))) { + STORAGE_LOG(WARN, "get sample iter failed", KR(ret), K(scan_param_)); + } else { + STORAGE_LOG(INFO, "finish init row sample iter", KP(row_sample_iterator_), KP(main_iter_)); + } + } else { + if (OB_FAIL( + sample_iter_helper.get_sample_iter(memtable_row_sample_iterator_, main_iter_, multi_scan_merge_))) { + STORAGE_LOG(WARN, "get sample iter failed", KR(ret), K(scan_param_)); + } else { + STORAGE_LOG( + INFO, "finish init memtable row sample iter", KP(memtable_row_sample_iterator_), KP(main_iter_)); + } + } + } else { + // this branch means the sample is block sample + // TODO : @yuanzhe block sample uses a different initialization logic and different open interface + if (nullptr == scan_merge_ && OB_FAIL(init_scan_iter(scan_merge_))) { + STORAGE_LOG(WARN, "Failed to init scanmerge", K(ret)); + } else if (OB_FAIL(sample_iter_helper.get_sample_iter(block_sample_iterator_, main_iter_, scan_merge_))) { + STORAGE_LOG(WARN, "get sample iter failed", KR(ret), K(scan_param_)); + } else { + STORAGE_LOG(INFO, "finish init block row sample iter", KP(block_sample_iterator_), KP(main_iter_)); + } + } + } else if (scan_param_->use_index_skip_scan()) { + INIT_AND_OPEN_SKIP_SCAN_ITER( + skip_scan_merge_, table_scan_range_.get_ranges().at(0), table_scan_range_.get_suffix_ranges().at(0), false); + } else { + INIT_AND_OPEN_ITER(scan_merge_, table_scan_range_.get_ranges().at(0), false); + } + } else if (scan_param_->use_index_skip_scan()) { + ret = OB_NOT_SUPPORTED; + STORAGE_LOG(WARN, "multiple ranges are not supported in index skip scan now"); + } else { + INIT_AND_OPEN_ITER(multi_scan_merge_, table_scan_range_.get_ranges(), false); + } + return ret; } #undef INIT_AND_OPEN_ITER #undef INIT_AND_OPEN_SKIP_SCAN_ITER -int ObTableScanIterator::can_retire_to_row_sample(bool &retire) -{ - int ret = OB_SUCCESS; - - retire = false; - if (get_table_param_.is_valid()) { - int64_t memtable_row_count = 0; - int64_t sstable_row_count = 0; - common::ObSEArray memtables; - while (OB_SUCC(ret)) { - ObSSTableMetaHandle sst_meta_hdl; - ObITable *table = nullptr; - if (OB_FAIL(get_table_param_.tablet_iter_.table_iter()->get_next(table))) { - if (OB_LIKELY(OB_ITER_END == ret)) { - ret = OB_SUCCESS; - break; - } else { - STORAGE_LOG(WARN, "Fail to get next table iter", K(ret), K(get_table_param_.tablet_iter_.table_iter())); - } - } else if (table->is_memtable()) { - memtables.push_back(table); - } else if (table->is_sstable()) { - if (OB_FAIL(static_cast(table)->get_meta(sst_meta_hdl))) { - LOG_WARN("fail to get sstable meta handle", K(ret)); - } else { - sstable_row_count += sst_meta_hdl.get_sstable_meta().get_row_count(); - } - } - } - if (OB_FAIL(ret)) { - } else if (FALSE_IT(get_table_param_.tablet_iter_.table_iter()->resume())) { - } else if (memtables.count() > 0) { - ObPartitionEst batch_est; - ObSEArray est_records; - ObTableEstimateBaseInput base_input( - scan_param_->scan_flag_, - memtables.at(0)->get_key().tablet_id_.id(), - transaction::ObTransID(), - memtables, - get_table_param_.tablet_iter_.get_tablet_handle()); - if (OB_FAIL(ObTableEstimator::estimate_row_count_for_scan(base_input, table_scan_range_.get_ranges(), batch_est, est_records))) { - STORAGE_LOG(WARN, "Failed to estimate row count for scan", K(ret), KPC(scan_param_), K(table_scan_range_)); - } else { - retire = sstable_row_count < batch_est.physical_row_count_; - } - } - } - - return ret; -} - int ObTableScanIterator::get_next_row(ObNewRow *&row) { int ret = OB_SUCCESS; @@ -541,6 +500,7 @@ int ObTableScanIterator::get_next_row(ObNewRow *&row) ret = tmp_ret; } } + return ret; } diff --git a/src/storage/access/ob_table_scan_iterator.h b/src/storage/access/ob_table_scan_iterator.h index cf093c897f..721653ba21 100644 --- a/src/storage/access/ob_table_scan_iterator.h +++ b/src/storage/access/ob_table_scan_iterator.h @@ -19,7 +19,6 @@ #include "lib/container/ob_se_array.h" #include "share/schema/ob_schema_getter_guard.h" #include "storage/blocksstable/ob_block_sstable_struct.h" -#include "ob_block_sample_iterator.h" #include "storage/ob_col_map.h" #include "storage/ob_i_store.h" #include "ob_multiple_get_merge.h" @@ -28,7 +27,6 @@ #include "ob_multiple_scan_merge.h" #include "ob_multiple_skip_scan_merge.h" #include "ob_multiple_multi_skip_scan_merge.h" -#include "ob_row_sample_iterator.h" #include "ob_single_merge.h" #include "storage/tx_storage/ob_access_service.h" #include "storage/tx_storage/ob_ls_map.h" @@ -41,6 +39,10 @@ namespace storage { class ObTableScanParam; class ObTableReadInfo; +class ObISampleIterator; +class ObMemtableRowSampleIterator; +class ObRowSampleIterator; +class ObBlockSampleIterator; class ObTableScanIterator : public common::ObNewRowIterator { @@ -74,18 +76,32 @@ private: void reuse_row_iters(); int switch_param_for_iter(); int open_iter(); - int can_retire_to_row_sample(bool &retire); // try to retire to row sample + + // if need retire to row sample, sample_memtable_ranges must not be null + int can_retire_to_memtable_row_sample_(bool &retire, ObIArray &sample_memtable_ranges); + int get_memtable_sample_ranges(const ObIArray &memtables, + ObIArray &sample_memtable_ranges); + // for read uncommitted data, txn possible rollbacked before iterate // check txn status after read rows to ensure read result is correct int check_txn_status_if_read_uncommitted_(); + int init_and_open_get_merge_iter_(); + int init_and_open_scan_merge_iter_(); + int init_and_open_block_sample_iter_(); + int init_and_open_row_sample_iter_(); + int init_and_open_memtable_row_sample_iter_(const ObIArray &scan_ranges); + +private: bool is_inited_; ObSingleMerge *single_merge_; ObMultipleGetMerge *get_merge_; ObMultipleScanMerge *scan_merge_; ObMultipleMultiScanMerge *multi_scan_merge_; ObMultipleSkipScanMerge *skip_scan_merge_; + ObMemtableRowSampleIterator *memtable_row_sample_iterator_; ObRowSampleIterator *row_sample_iterator_; - ObBlockSampleIterator *block_sample_iterator_; // TODO: refactor + ObBlockSampleIterator *block_sample_iterator_; // TODO: @yuanzhe refactor + // ObISampleIterator *i_sample_iter_; // we should consider the constructor cost ObTableAccessParam main_table_param_; ObTableAccessContext main_table_ctx_; @@ -95,6 +111,7 @@ private: ObTableScanParam *scan_param_; ObTableScanRange table_scan_range_; ObQueryRowIterator *main_iter_; + ObSEArray sample_ranges_; private: DISALLOW_COPY_AND_ASSIGN(ObTableScanIterator); }; diff --git a/src/storage/ls/ob_ls_tablet_service.cpp b/src/storage/ls/ob_ls_tablet_service.cpp index 5267f6afe5..b1a74d1c66 100755 --- a/src/storage/ls/ob_ls_tablet_service.cpp +++ b/src/storage/ls/ob_ls_tablet_service.cpp @@ -5669,14 +5669,18 @@ int ObLSTabletService::estimate_row_count( return ret; } -int ObLSTabletService::estimate_block_count( +int ObLSTabletService::estimate_block_count_and_row_count( const common::ObTabletID &tablet_id, int64_t ¯o_block_count, - int64_t µ_block_count) + int64_t µ_block_count, + int64_t &sstable_row_count, + int64_t &memtable_row_count) { int ret = OB_SUCCESS; macro_block_count = 0; micro_block_count = 0; + sstable_row_count = 0; + memtable_row_count = 0; ObTabletTableIterator tablet_iter; if (IS_NOT_INIT) { @@ -5688,11 +5692,6 @@ int ObLSTabletService::estimate_block_count( ObITable *table = nullptr; ObSSTable *sstable = nullptr; - int64_t total_sample_table_cnt = 2; - int64_t sample_table_cnt = 0; - int64_t sampled_table_row_cnt = 0; - int64_t total_row_count = 0; - while (OB_SUCC(ret)) { ObSSTableMetaHandle sst_meta_hdl; if (OB_FAIL(tablet_iter.table_iter()->get_next(table))) { @@ -5705,18 +5704,17 @@ int ObLSTabletService::estimate_block_count( } else if (OB_ISNULL(table)) { ret = OB_ERR_UNEXPECTED; LOG_WARN("get unexpected null table", K(ret), K(tablet_iter.table_iter())); - } else if (!table->is_sstable()) { - break; - } else if (FALSE_IT(sstable = static_cast(table))) { - } else if (OB_FAIL(sstable->get_meta(sst_meta_hdl))) { - LOG_WARN("fail to get sstable meta handle", K(ret)); - } else { + } else if (table->is_memtable()) { + memtable_row_count += static_cast(table)->get_physical_row_cnt(); + } else if (table->is_sstable()) { sstable = static_cast(table); - macro_block_count += sstable->get_data_macro_block_count(); - micro_block_count += sst_meta_hdl.get_sstable_meta().get_data_micro_block_count(); - total_row_count += sst_meta_hdl.get_sstable_meta().get_row_count(); - if (sample_table_cnt++ < total_sample_table_cnt) { - sampled_table_row_cnt += sst_meta_hdl.get_sstable_meta().get_row_count(); + if (OB_FAIL(sstable->get_meta(sst_meta_hdl))) { + LOG_WARN("fail to get sstable meta handle", K(ret)); + } else { + sstable = static_cast(table); + macro_block_count += sstable->get_data_macro_block_count(); + micro_block_count += sst_meta_hdl.get_sstable_meta().get_data_micro_block_count(); + sstable_row_count += sst_meta_hdl.get_sstable_meta().get_row_count(); } } } diff --git a/src/storage/ls/ob_ls_tablet_service.h b/src/storage/ls/ob_ls_tablet_service.h index 9526f73f2a..fc3a363782 100755 --- a/src/storage/ls/ob_ls_tablet_service.h +++ b/src/storage/ls/ob_ls_tablet_service.h @@ -383,10 +383,12 @@ public: ObIArray &est_records, int64_t &logical_row_count, int64_t &physical_row_count); - int estimate_block_count( + int estimate_block_count_and_row_count( const common::ObTabletID &tablet_id, int64_t ¯o_block_count, - int64_t µ_block_count); + int64_t µ_block_count, + int64_t &sstable_row_count, + int64_t &memtable_row_count); // iterator int build_tablet_iter(ObLSTabletIterator &iter); diff --git a/src/storage/memtable/mvcc/ob_keybtree.cpp b/src/storage/memtable/mvcc/ob_keybtree.cpp index d6349442ae..a8c9940684 100644 --- a/src/storage/memtable/mvcc/ob_keybtree.cpp +++ b/src/storage/memtable/mvcc/ob_keybtree.cpp @@ -1505,13 +1505,16 @@ int BtreeRawIterator::estimate_key_count(int64_t top_level, return ret; } -template -int BtreeRawIterator::split_range(int64_t top_level, int64_t branch_count, int64_t part_count, BtreeKey* key_array) +template +int BtreeRawIterator::split_range(int64_t top_level, + int64_t btree_node_count, + int64_t range_count, + common::ObIArray &key_array) { int ret = OB_SUCCESS; if (OB_ISNULL(iter_)) { ret = OB_NOT_INIT; - } else if (part_count < 1 || branch_count < part_count || OB_ISNULL(key_array)) { + } else if (range_count < 1 || btree_node_count < range_count) { ret = OB_INVALID_ARGUMENT; } else if (top_level > iter_->get_root_level()) { ret = OB_INVALID_ARGUMENT; @@ -1519,26 +1522,55 @@ int BtreeRawIterator::split_range(int64_t top_level, int64_t BtreeKey key; BtreeVal val = nullptr; int64_t level = iter_->get_root_level() - top_level; - int64_t part_key_count = branch_count/part_count; - int64_t seg_id = 0; - int64_t child_count = 0; - while(seg_id < part_count - 1 && OB_SUCC(iter_->next_on_level(level, key, val))) { - if (0 == (++child_count % part_key_count)) { - key_array[seg_id++] = key; + + int64_t remaining_btree_node_count = btree_node_count; + // each loop fill in a key for spliting range + int64_t range_idx = 0; + for (range_idx = 0; OB_SUCC(ret) && range_idx < range_count; range_idx++) + { + // (range_count - range_idx) means the last range count which need to be splitted + int64_t btree_node_cnt_in_this_range = remaining_btree_node_count / (range_count - range_idx); + + // loop btree_node_cnt_in_this_range times to get the next key for splitting + for (int64_t iter_node_count = 0; OB_SUCC(ret) && iter_node_count < btree_node_cnt_in_this_range; + iter_node_count++) { + if (OB_FAIL(iter_->next_on_level(level, key, val))) { + OB_LOG(WARN, + "iterate btree node on level failed", + KR(ret), + K(level), + K(iter_node_count), + K(btree_node_cnt_in_this_range), + K(remaining_btree_node_count), + K(range_count), + K(range_idx)); + } + } + + // update remaining btree node count + remaining_btree_node_count = remaining_btree_node_count - btree_node_cnt_in_this_range; + + if (OB_SUCC(ret) && OB_FAIL(key_array.push_back(key))) { + OB_LOG(WARN, + "push back rowkey into key array failed", + KR(ret), + K(range_idx), + K(range_count), + K(top_level), + K(btree_node_count)); } } - if (OB_SUCCESS == ret) { - // do nothing - } else if (OB_ITER_END == ret) { - ret = OB_SUCCESS; - } else { + + if (range_idx < range_count) { ret = OB_ERR_UNEXPECTED; - } - if (seg_id < part_count - 1) { - ret = OB_ERR_UNEXPECTED; - OB_LOG(WARN, "btree split range: can not get enough sub range", K(branch_count), K(part_count), K(seg_id)); + OB_LOG(WARN, + "btree split range: can not get enough sub range", + K(btree_node_count), + K(range_idx), + K(range_count)); } } + return ret; } diff --git a/src/storage/memtable/mvcc/ob_keybtree.h b/src/storage/memtable/mvcc/ob_keybtree.h index 879f7be048..e492728d5f 100644 --- a/src/storage/memtable/mvcc/ob_keybtree.h +++ b/src/storage/memtable/mvcc/ob_keybtree.h @@ -14,6 +14,7 @@ #define __OB_KEYBTREE_H__ #include "lib/metrics/ob_counter.h" +#include "lib/container/ob_iarray.h" #include "storage/memtable/mvcc/ob_keybtree_deps.h" namespace oceanbase @@ -181,7 +182,10 @@ public: const BtreeKey max_key, const bool end_exclude, int64_t version); int get_next(BtreeKey &key, BtreeVal &val); int estimate_key_count(int64_t top_level, int64_t& child_count, int64_t& key_count); - int split_range(int64_t top_level, int64_t branch_count, int64_t part_count, BtreeKey* key_array); + int split_range(int64_t top_level, + int64_t btree_node_count, + int64_t range_count, + common::ObIArray &key_array); int estimate_element_count(int64_t &physical_row_count, int64_t &element_count, const double ratio); bool is_reverse_scan() const; private: diff --git a/src/storage/memtable/mvcc/ob_query_engine.cpp b/src/storage/memtable/mvcc/ob_query_engine.cpp index c94fd33dab..348c77b3da 100644 --- a/src/storage/memtable/mvcc/ob_query_engine.cpp +++ b/src/storage/memtable/mvcc/ob_query_engine.cpp @@ -547,112 +547,72 @@ int ObQueryEngine::init_raw_iter_for_estimate(Iterator*& iter, } int ObQueryEngine::estimate_size(const ObMemtableKey *start_key, - const ObMemtableKey *end_key, - int64_t& level, - int64_t& branch_count, - int64_t& total_bytes, - int64_t& total_rows) + const ObMemtableKey *end_key, + int64_t &total_bytes, + int64_t &total_rows) { int ret = OB_SUCCESS; - Iterator *iter = nullptr; - branch_count = 0; - for(level = 0; branch_count < ESTIMATE_CHILD_COUNT_THRESHOLD && OB_SUCC(ret); ) { - level++; - if (OB_FAIL(init_raw_iter_for_estimate(iter, start_key, end_key))) { - TRANS_LOG(WARN, "init raw iter fail", K(ret), K(*start_key), K(*end_key)); - } else if (OB_ISNULL(iter)) { - ret = OB_ERR_UNEXPECTED; - } else if (OB_FAIL(iter->get_read_handle().estimate_key_count(level, branch_count, total_rows))) { - if (OB_ENTRY_NOT_EXIST != ret) { - TRANS_LOG(WARN, "estimate key count fail", K(ret), K(*start_key), K(*end_key)); - } + int64_t unused_top_level = 0; + int64_t unused_btree_node_count = 0; + if (OB_FAIL(inner_loop_find_level_( + start_key, end_key, ESTIMATE_CHILD_COUNT_THRESHOLD, unused_top_level, unused_btree_node_count, total_rows))) + { + if (OB_ENTRY_NOT_EXIST == ret) { + ret = OB_SUCCESS; + total_bytes = 0; + total_rows = 0; + } else { + TRANS_LOG(WARN, "esitmate size failed", KR(ret), KPC(start_key), KPC(end_key)); } - if (OB_NOT_NULL(iter)) { - iter->reset(); - raw_iter_alloc_.free(iter); - iter = NULL; - } - } - if (OB_SUCC(ret)) { + } else { int64_t per_row_size = 100; total_bytes = total_rows * per_row_size; - } else if (OB_ENTRY_NOT_EXIST == ret) { - ret = OB_SUCCESS; - total_bytes = 0; - total_rows = 0; } return ret; } int ObQueryEngine::split_range(const ObMemtableKey *start_key, const ObMemtableKey *end_key, - int64_t part_count, + int64_t range_count, ObIArray &range_array) { int ret = OB_SUCCESS; Iterator *iter = nullptr; - int64_t level = 0; - int64_t branch_count = 0; - int64_t total_bytes = 0; - int64_t total_rows = 0; + int64_t top_level = 0; + int64_t btree_node_count = 0; - if (part_count < 1 || part_count > MAX_RANGE_SPLIT_COUNT) { - TRANS_LOG(WARN, "part count should be greater than 1 if you try to split range", K(part_count)); + if (range_count < 1 || range_count > MAX_RANGE_SPLIT_COUNT) { + ret = OB_INVALID_ARGUMENT; + TRANS_LOG(WARN, "range count should be greater than 1 if you try to split range", KR(ret), K(range_count)); } else { - HEAP_VAR(ObStoreRowkeyWrapper[MAX_RANGE_SPLIT_COUNT], key_array) { - if (OB_FAIL(estimate_size(start_key, end_key, level, branch_count, total_bytes, total_rows)) && OB_ENTRY_NOT_EXIST != ret) { - TRANS_LOG(WARN, "estimate size fail", K(ret), K(*start_key), K(*end_key)); - } else if (OB_ENTRY_NOT_EXIST == ret) { - TRANS_LOG(WARN, "range too small, not enough rows ro split", K(ret), K(*start_key), K(*end_key), K(part_count)); - } else if (branch_count < part_count) { - ret = OB_ENTRY_NOT_EXIST; - TRANS_LOG(WARN, "branch fan out less than part count", K(branch_count), K(part_count)); - } else if (OB_FAIL(init_raw_iter_for_estimate(iter, start_key, end_key))) { - TRANS_LOG(WARN, "init raw iter fail", K(ret), K(*start_key), K(*end_key)); - } else if (NULL == iter) { - ret = OB_ERR_UNEXPECTED; - } else if (OB_FAIL(iter->get_read_handle().split_range(level, branch_count, part_count, key_array))) { - TRANS_LOG(WARN, "split range fail", K(ret), K(*start_key), K(*end_key), K(part_count), K(level), K(branch_count), K(part_count)); - } else { - ObStoreRange merge_range; - for (int64_t i = 0; OB_SUCC(ret) && i < part_count; i++) { - const ObStoreRowkey *rowkey = nullptr; - // start key - if (0 == i) { - if (OB_ISNULL(rowkey = start_key->get_rowkey())) { - ret = OB_ERR_UNEXPECTED; - TRANS_LOG(WARN, "Unexcepted null store rowkey", K(ret), KPC(start_key)); - } else { - merge_range.set_start_key(*rowkey); - } - } else { - merge_range.set_start_key(*key_array[i - 1].get_rowkey()); - } - - // endkey - if (OB_FAIL(ret)) { - } else if (i == part_count - 1) { - if (OB_ISNULL(rowkey = end_key->get_rowkey())) { - ret = OB_ERR_UNEXPECTED; - TRANS_LOG(WARN, "Unexcepted null store rowkey", K(ret), KPC(start_key)); - } else { - merge_range.set_end_key(*rowkey); - } - } else if (OB_ISNULL(rowkey = key_array[i].get_rowkey())) { - ret = OB_ERR_UNEXPECTED; - TRANS_LOG(WARN, "Unexpected null store rowkey", K(ret), K(i)); - } else { - merge_range.set_end_key(*rowkey); - } - if (OB_SUCC(ret)) { - merge_range.set_left_open(); - merge_range.set_right_closed(); - if (OB_FAIL(range_array.push_back(merge_range))) { - TRANS_LOG(WARN, "Failed to push back the merge range to array", K(ret), K(merge_range)); - } - } - } - } + // Here we can not use ESTIMATE_CHILD_COUNT_THRESHOLD to init SEArray due to the stack size limit + ObSEArray key_array; + if (OB_FAIL(find_split_range_level_(start_key, end_key, range_count, top_level, btree_node_count)) && + OB_ENTRY_NOT_EXIST != ret) { + TRANS_LOG(WARN, "estimate size fail", K(ret), K(*start_key), K(*end_key)); + } else if (OB_ENTRY_NOT_EXIST == ret) { + TRANS_LOG(WARN, "range too small, not enough rows ro split", K(ret), K(*start_key), K(*end_key), K(range_count)); + } else if (btree_node_count < range_count) { + ret = OB_ENTRY_NOT_EXIST; + TRANS_LOG(WARN, "branch fan out less than range count", K(btree_node_count), K(range_count)); + } else if (OB_FAIL(init_raw_iter_for_estimate(iter, start_key, end_key))) { + TRANS_LOG(WARN, "init raw iter fail", K(ret), K(*start_key), K(*end_key)); + } else if (NULL == iter) { + ret = OB_ERR_UNEXPECTED; + } else if (OB_FAIL(iter->get_read_handle().split_range(top_level, btree_node_count, range_count, key_array))) { + TRANS_LOG(WARN, + "split range fail", + K(ret), + K(*start_key), + K(*end_key), + K(range_count), + K(top_level), + K(btree_node_count), + K(range_count)); + } else if (OB_FAIL(convert_keys_to_store_ranges_(start_key, end_key, range_count, key_array, range_array))) { + TRANS_LOG(WARN, "convert keys to store ranges failed", KR(ret), K(range_count), K(key_array)); + } else { + // split range succeed } if (OB_NOT_NULL(iter)) { @@ -664,7 +624,103 @@ int ObQueryEngine::split_range(const ObMemtableKey *start_key, return ret; } +int ObQueryEngine::find_split_range_level_(const ObMemtableKey *start_key, + const ObMemtableKey *end_key, + const int64_t range_count, + int64_t &top_level, + int64_t &btree_node_count) +{ + int64_t unused_total_rows = 0; + const int64_t node_threshold = MAX(ESTIMATE_CHILD_COUNT_THRESHOLD, range_count); + return inner_loop_find_level_(start_key, end_key, node_threshold, top_level, btree_node_count, unused_total_rows); +} +int ObQueryEngine::inner_loop_find_level_(const ObMemtableKey *start_key, + const ObMemtableKey *end_key, + const int64_t level_node_threshold, + int64_t &top_level, + int64_t &btree_node_count, + int64_t &total_rows) +{ + int ret = OB_SUCCESS; + Iterator *iter = nullptr; + + // directly loop on the third level because in theory, the second level of BTree can have a maximum of 15 * 15 = 225 + // nodes, which does not meet the THRESHOLD requirement. + btree_node_count = 0; + top_level = 2; + while (OB_SUCC(ret) && btree_node_count < level_node_threshold) { + top_level++; + btree_node_count = 0; + if (OB_FAIL(init_raw_iter_for_estimate(iter, start_key, end_key))) { + TRANS_LOG(WARN, "init raw iter fail", K(ret), K(*start_key), K(*end_key)); + } else if (OB_ISNULL(iter)) { + ret = OB_ERR_UNEXPECTED; + } else if (OB_FAIL(iter->get_read_handle().estimate_key_count(top_level, btree_node_count, total_rows))) { + if (OB_ENTRY_NOT_EXIST != ret) { + TRANS_LOG(WARN, "find split range level failed", K(ret), K(*start_key), K(*end_key)); + } + } + if (OB_NOT_NULL(iter)) { + iter->reset(); + raw_iter_alloc_.free(iter); + iter = NULL; + } + } + + STORAGE_LOG(INFO, "finish find split level", KR(ret), K(top_level), K(btree_node_count), K(total_rows)); + + return ret; +} + +int ObQueryEngine::convert_keys_to_store_ranges_(const ObMemtableKey *start_key, + const ObMemtableKey *end_key, + const int64_t range_count, + const common::ObIArray &key_array, + ObIArray &range_array) +{ + int ret = OB_SUCCESS; + + ObStoreRange merge_range; + for (int64_t i = 0; OB_SUCC(ret) && i < range_count; i++) { + const ObStoreRowkey *rowkey = nullptr; + // start key + if (0 == i) { + if (OB_ISNULL(rowkey = start_key->get_rowkey())) { + ret = OB_ERR_UNEXPECTED; + TRANS_LOG(WARN, "Unexcepted null store rowkey", K(ret), KPC(start_key)); + } else { + merge_range.set_start_key(*rowkey); + } + } else { + merge_range.set_start_key(*key_array.at(i - 1).get_rowkey()); + } + + // endkey + if (OB_FAIL(ret)) { + } else if (i == range_count - 1) { + if (OB_ISNULL(rowkey = end_key->get_rowkey())) { + ret = OB_ERR_UNEXPECTED; + TRANS_LOG(WARN, "Unexcepted null store rowkey", K(ret), KPC(start_key)); + } else { + merge_range.set_end_key(*rowkey); + } + } else if (OB_ISNULL(rowkey = key_array.at(i).get_rowkey())) { + ret = OB_ERR_UNEXPECTED; + TRANS_LOG(WARN, "Unexpected null store rowkey", K(ret), K(i)); + } else { + merge_range.set_end_key(*rowkey); + } + if (OB_SUCC(ret)) { + merge_range.set_left_open(); + merge_range.set_right_closed(); + if (OB_FAIL(range_array.push_back(merge_range))) { + TRANS_LOG(WARN, "Failed to push back the merge range to array", K(ret), K(merge_range)); + } + } + } + return ret; +} int ObQueryEngine::estimate_row_count(const transaction::ObTransID &tx_id, const ObMemtableKey *start_key, const int start_exclude, const ObMemtableKey *end_key, const int end_exclude, diff --git a/src/storage/memtable/mvcc/ob_query_engine.h b/src/storage/memtable/mvcc/ob_query_engine.h index 3c6b88dade..4264a1304c 100644 --- a/src/storage/memtable/mvcc/ob_query_engine.h +++ b/src/storage/memtable/mvcc/ob_query_engine.h @@ -175,7 +175,7 @@ public: }; public: - enum { ESTIMATE_CHILD_COUNT_THRESHOLD = 1024, MAX_RANGE_SPLIT_COUNT = 1024 }; + enum { ESTIMATE_CHILD_COUNT_THRESHOLD = 1024, MAX_RANGE_SPLIT_COUNT = 1024 * 1024}; explicit ObQueryEngine(ObIAllocator &memstore_allocator) : is_inited_(false), is_expanding_(false), tenant_id_(common::OB_SERVER_TENANT_ID), index_(nullptr), memstore_allocator_(memstore_allocator), @@ -193,8 +193,6 @@ public: void revert_iter(ObIQueryEngineIterator *iter); int estimate_size(const ObMemtableKey *start_key, const ObMemtableKey *end_key, - int64_t& level, - int64_t& branch_count, int64_t& total_bytes, int64_t& total_rows); int split_range(const ObMemtableKey *start_key, @@ -237,6 +235,7 @@ public: ? index->btree_alloc_memory() + btree_allocator_.get_allocated() : 0; } + void check_cleanout(bool &is_all_cleanout, bool &is_all_delay_cleanout, int64_t &count); @@ -253,6 +252,26 @@ private: const ObMemtableKey *start_key, const ObMemtableKey *end_key); int set_table_index_(const int64_t obj_cnt, TableIndex *&return_ptr); + + int find_split_range_level_(const ObMemtableKey *start_key, + const ObMemtableKey *end_key, + const int64_t range_count, + int64_t &top_level, + int64_t &btree_node_count); + + int inner_loop_find_level_(const ObMemtableKey *start_key, + const ObMemtableKey *end_key, + const int64_t level_node_threshold, + int64_t &top_level, + int64_t &btree_node_count, + int64_t &total_rows); + + int convert_keys_to_store_ranges_(const ObMemtableKey *start_key, + const ObMemtableKey *end_key, + const int64_t range_count, + const common::ObIArray &key_array, + ObIArray &range_array); + private: DISALLOW_COPY_AND_ASSIGN(ObQueryEngine); static TableIndex * const PLACE_HOLDER; diff --git a/src/storage/memtable/ob_memtable.cpp b/src/storage/memtable/ob_memtable.cpp index e766ed19b7..5d13f770a9 100755 --- a/src/storage/memtable/ob_memtable.cpp +++ b/src/storage/memtable/ob_memtable.cpp @@ -45,6 +45,7 @@ #include "storage/tx_storage/ob_tenant_freezer.h" #include "storage/tablet/ob_tablet_memtable_mgr.h" #include "storage/tx_storage/ob_tenant_freezer.h" +#include "storage/access/ob_row_sample_iterator.h" #include "storage/concurrency_control/ob_trans_stat_row.h" @@ -2161,8 +2162,6 @@ bool ObMemtable::is_frozen_memtable() const int ObMemtable::estimate_phy_size(const ObStoreRowkey* start_key, const ObStoreRowkey* end_key, int64_t& total_bytes, int64_t& total_rows) { int ret = OB_SUCCESS; - int64_t level = 0; - int64_t branch_count = 0; total_bytes = 0; total_rows = 0; ObMemtableKey start_mtk; @@ -2175,7 +2174,7 @@ int ObMemtable::estimate_phy_size(const ObStoreRowkey* start_key, const ObStoreR } if (OB_FAIL(start_mtk.encode(start_key)) || OB_FAIL(end_mtk.encode(end_key))) { TRANS_LOG(WARN, "encode key fail", K(ret), K_(key)); - } else if (OB_FAIL(query_engine_.estimate_size(&start_mtk, &end_mtk, level, branch_count, total_bytes, total_rows))) { + } else if (OB_FAIL(query_engine_.estimate_size(&start_mtk, &end_mtk, total_bytes, total_rows))) { TRANS_LOG(WARN, "estimate row count fail", K(ret), K_(key)); } return ret; @@ -2203,6 +2202,122 @@ int ObMemtable::get_split_ranges(const ObStoreRowkey* start_key, const ObStoreRo return ret; } +// The logic for sampling in the memtable is as follows, as shown in the diagram: We set a constant variable +// SAMPLE_MEMTABLE_RANGE_COUNT, which represents the number of intervals to be read during sampling. Currently, it is +// set to 10. Then, based on the sampling rate, we calculate the total number of ranges to be divided, such that the +// ratio of the data within the chosen ranges to the total data is equal to the sampling rate. In the diagram, +// let's assume a sampling rate of 1%. The entire memtable would be divided into 1000 ranges, and 10 ranges would +// be evenly selected for sampling, including the first and last ranges. +// +// +-------+------------+-------+------------+-------+-----------+-------+-----------+-------+ +// | | | | | | | | | | +// |chosen | |chosen | |chosen | |chosen | |chosen | +// |range 1| ......... |range 3| ......... |range 5| ......... |range 7| ......... |range10| +// | idx:0 | |idx:299| |idx:499| |idx:699| |idx:999| +// | | | | | | | | | | +// +-------+------------+-------+------------+-------+-----------+-------+-----------+-------+ +// | | +// +<------------------------ all splited ranges in memtable ----------------------->+ +// | | +// + + +int ObMemtable::split_ranges_for_sample(const blocksstable::ObDatumRange &table_scan_range, + const double sample_rate_percentage, + ObIAllocator &allocator, + ObIArray &sample_memtable_ranges) +{ + int ret = OB_SUCCESS; + if (sample_rate_percentage == 0 || sample_rate_percentage >= 100) { + ret = OB_ERR_UNEXPECTED; + TRANS_LOG(ERROR, "invalid sample_rate_percentage", KR(ret), K(sample_rate_percentage)); + } else { + // The logic here for calculating the number of split ranges based on the sampling rate might be confusing. + // For example, assuming our sampling rate is 1%, the variable "sample_rate_percentage" would be 1. At the same + // time, if we have a total number of intervals to be divided, denoted as "total_split_range_count," with an equal + // number of rowkeys within each range, we can obtain the equation: + // + // SAMPLE_MEMTABLE_RANGE_COUNT / total_split_range_count = sample_rate_percentage / 100. + // + int total_split_range_count = + ObMemtableRowSampleIterator::SAMPLE_MEMTABLE_RANGE_COUNT * 100 / sample_rate_percentage; + if (total_split_range_count > ObQueryEngine::MAX_RANGE_SPLIT_COUNT) { + total_split_range_count = ObQueryEngine::MAX_RANGE_SPLIT_COUNT; + } + + // loop to split range + bool split_succ = false; + while (!split_succ && total_split_range_count > ObMemtableRowSampleIterator::SAMPLE_MEMTABLE_RANGE_COUNT) { + int tmp_ret = OB_SUCCESS; + sample_memtable_ranges.reuse(); + if (OB_TMP_FAIL(try_split_range_for_sample_(table_scan_range.get_start_key().get_store_rowkey(), + table_scan_range.get_end_key().get_store_rowkey(), + total_split_range_count, + allocator, + sample_memtable_ranges))) { + total_split_range_count = total_split_range_count / 10; + TRANS_LOG(WARN, + "try split range for sampling failed, shrink split range count and retry", + KR(tmp_ret), + K(total_split_range_count)); + + } else { + TRANS_LOG(INFO, "split range finish", K(total_split_range_count), K(sample_memtable_ranges)); + split_succ = true; + } + } + + // set ret code to ENTRY_NOT_EXIST if split failed + if (!split_succ) { + ret = OB_ENTRY_NOT_EXIST; + } + } + return ret; +} + +int64_t ObMemtable::try_split_range_for_sample_(const ObStoreRowkey &start_key, + const ObStoreRowkey &end_key, + const int64_t range_count, + ObIAllocator &allocator, + ObIArray &sample_memtable_ranges) +{ + int ret = OB_SUCCESS; + ObSEArray store_range_array; + if (OB_FAIL(get_split_ranges(&start_key, &end_key, range_count, store_range_array))) { + TRANS_LOG(WARN, "try split ranges for sample failed", KR(ret)); + } else if (store_range_array.count() != range_count) { + ret = OB_ERR_UNEXPECTED; + TRANS_LOG(ERROR, "store array count is not equal with range_count", KR(ret), K(range_count), KPC(this)); + } else { + const int64_t range_count_each_chosen = + range_count / (ObMemtableRowSampleIterator::SAMPLE_MEMTABLE_RANGE_COUNT - 1); + + // chose some ranges and push back to sample_memtable_ranges + int64_t chose_range_idx = 0; + bool generate_datum_range_done = false; + while (OB_SUCC(ret) && !generate_datum_range_done) { + if (chose_range_idx >= range_count - 1 || + sample_memtable_ranges.count() == ObMemtableRowSampleIterator::SAMPLE_MEMTABLE_RANGE_COUNT - 1) { + chose_range_idx = range_count - 1; + generate_datum_range_done = true; + } + + ObDatumRange datum_range; + if (OB_FAIL(datum_range.from_range(store_range_array.at(chose_range_idx), allocator))) { + STORAGE_LOG(WARN, + "Failed to transfer store range to datum range", + K(ret), + K(chose_range_idx), + K(store_range_array.at(chose_range_idx))); + } else if (OB_FAIL(sample_memtable_ranges.push_back(datum_range))) { + STORAGE_LOG(WARN, "Failed to push back merge range to array", K(ret), K(datum_range)); + } else { + // chose the next store range + chose_range_idx += range_count_each_chosen; + } + } + } + return ret; +} + int ObMemtable::print_stat() const { int ret = OB_SUCCESS; diff --git a/src/storage/memtable/ob_memtable.h b/src/storage/memtable/ob_memtable.h index f09dde9f57..c20584e545 100644 --- a/src/storage/memtable/ob_memtable.h +++ b/src/storage/memtable/ob_memtable.h @@ -314,6 +314,7 @@ public: ObMtStat& get_mt_stat() { return mt_stat_; } int64_t get_size() const; int64_t get_occupied_size() const; + int64_t get_physical_row_cnt() const { return query_engine_.btree_size(); } inline bool not_empty() const { return INT64_MAX != get_protection_clock(); }; void set_max_schema_version(const int64_t schema_version); virtual int64_t get_max_schema_version() const override; @@ -370,6 +371,11 @@ public: virtual int64_t get_upper_trans_version() const override; virtual int estimate_phy_size(const ObStoreRowkey* start_key, const ObStoreRowkey* end_key, int64_t& total_bytes, int64_t& total_rows) override; virtual int get_split_ranges(const ObStoreRowkey* start_key, const ObStoreRowkey* end_key, const int64_t part_cnt, common::ObIArray &range_array) override; + int split_ranges_for_sample(const blocksstable::ObDatumRange &table_scan_range, + const double sample_rate_percentage, + ObIAllocator &allocator, + ObIArray &sample_memtable_ranges); + ObQueryEngine &get_query_engine() { return query_engine_; } ObMvccEngine &get_mvcc_engine() { return mvcc_engine_; } const ObMvccEngine &get_mvcc_engine() const { return mvcc_engine_; } @@ -548,6 +554,12 @@ private: int64_t dec_unsubmitted_cnt_(); int64_t inc_unsynced_cnt_(); int64_t dec_unsynced_cnt_(); + int64_t try_split_range_for_sample_(const ObStoreRowkey &start_key, + const ObStoreRowkey &end_key, + const int64_t range_count, + ObIAllocator &allocator, + ObIArray &sample_memtable_ranges); + private: DISALLOW_COPY_AND_ASSIGN(ObMemtable); bool is_inited_; diff --git a/src/storage/memtable/ob_memtable_iterator.h b/src/storage/memtable/ob_memtable_iterator.h index 64d5a89f7d..ec8ee73957 100644 --- a/src/storage/memtable/ob_memtable_iterator.h +++ b/src/storage/memtable/ob_memtable_iterator.h @@ -170,6 +170,7 @@ protected: uint8_t iter_flag_; }; + //////////////////////////////////////////////////////////////////////////////////////////////////// class ObMemtableMGetIterator : public ObIMemtableIterator diff --git a/src/storage/tx_storage/ob_access_service.cpp b/src/storage/tx_storage/ob_access_service.cpp index 56fa684343..106e733170 100755 --- a/src/storage/tx_storage/ob_access_service.cpp +++ b/src/storage/tx_storage/ob_access_service.cpp @@ -1025,11 +1025,13 @@ int ObAccessService::estimate_row_count( return ret; } -int ObAccessService::estimate_block_count( +int ObAccessService::estimate_block_count_and_row_count( const share::ObLSID &ls_id, const common::ObTabletID &tablet_id, int64_t ¯o_block_count, - int64_t µ_block_count) const + int64_t µ_block_count, + int64_t &sstable_row_count, + int64_t &memtable_row_count) const { int ret = OB_SUCCESS; ObLSHandle ls_handle; @@ -1045,9 +1047,9 @@ int ObAccessService::estimate_block_count( } else if (nullptr == (ls = ls_handle.get_ls())) { ret = OB_ERR_UNEXPECTED; LOG_WARN("ls is unexpected null", K(ret)); - } else if (OB_FAIL(ls->get_tablet_svr()->estimate_block_count( - tablet_id, macro_block_count, micro_block_count))) { - LOG_WARN("failed to estimate block count", K(ret), K(ls_id), K(tablet_id)); + } else if (OB_FAIL(ls->get_tablet_svr()->estimate_block_count_and_row_count( + tablet_id, macro_block_count, micro_block_count, sstable_row_count, memtable_row_count))) { + LOG_WARN("failed to estimate block count and row count", K(ret), K(ls_id), K(tablet_id)); } return ret; } diff --git a/src/storage/tx_storage/ob_access_service.h b/src/storage/tx_storage/ob_access_service.h index b90d418800..8635dc7987 100644 --- a/src/storage/tx_storage/ob_access_service.h +++ b/src/storage/tx_storage/ob_access_service.h @@ -189,11 +189,13 @@ public: ObIArray &est_records, int64_t &logical_row_count, int64_t &physical_row_count) const; - int estimate_block_count( + int estimate_block_count_and_row_count( const share::ObLSID &ls_id, const common::ObTabletID &tablet_id, int64_t ¯o_block_count, - int64_t µ_block_count) const; + int64_t µ_block_count, + int64_t &sstable_row_count, + int64_t &memtable_row_count) const; protected: int check_tenant_out_of_memstore_limit_(bool &is_out_of_mem); diff --git a/src/storage/tx_table/ob_tx_table_iterator.cpp b/src/storage/tx_table/ob_tx_table_iterator.cpp index b1a05f5a1c..ddc371c9b8 100644 --- a/src/storage/tx_table/ob_tx_table_iterator.cpp +++ b/src/storage/tx_table/ob_tx_table_iterator.cpp @@ -471,7 +471,6 @@ int ObTxDataSingleRowGetter::get_next_row_(const ObSSTableArray &sstables, ObTxD } else if (OB_FAIL(tx_data_buffers_.push_back(std::move(temp_buffer)))) { STORAGE_LOG(WARN, "push element to reserved array should not fail", KR(ret)); } else { - STORAGE_LOG(INFO, "GENGLI total need buffer cnt", K(total_need_buffer_cnt)); int64_t total_need_buffer_cnt2 = 0; for (int64_t idx = 1; idx < total_need_buffer_cnt && OB_SUCC(ret); ++idx) { key_datums_[1].set_int(idx);