diff --git a/src/share/stat/ob_opt_stat_manager.cpp b/src/share/stat/ob_opt_stat_manager.cpp index b77812d71..4521a2d14 100644 --- a/src/share/stat/ob_opt_stat_manager.cpp +++ b/src/share/stat/ob_opt_stat_manager.cpp @@ -20,10 +20,10 @@ #include "share/stat/ob_opt_stat_service.h" #include "share/stat/ob_opt_stat_sql_service.h" #include "share/stat/ob_opt_stat_manager.h" -#include "share/stat/ob_stat_item.h" #include "sql/plan_cache/ob_plan_cache.h" #include "share/stat/ob_dbms_stats_utils.h" #include "share/stat/ob_basic_stats_estimator.h" +#include "sql/optimizer/ob_opt_selectivity.h" namespace oceanbase { @@ -537,12 +537,8 @@ int ObOptStatManager::check_opt_stat_validity(sql::ObExecContext &ctx, int ObOptStatManager::get_table_stat(const uint64_t tenant_id, const uint64_t table_ref_id, const int64_t part_id, - int64_t *row_count, - int64_t *avg_len, - int64_t *avg_part_size, - int64_t *macro_block_count, - int64_t *micro_block_count, - int64_t *last_analyzed) + const double scale_ratio, + ObGlobalTableStat &stat) { int ret = OB_SUCCESS; ObOptTableStat::Key key(tenant_id, table_ref_id, part_id); @@ -550,13 +546,12 @@ int ObOptStatManager::get_table_stat(const uint64_t tenant_id, if (OB_FAIL(get_table_stat(tenant_id, key, opt_stat))) { LOG_WARN("failed to get table stat", K(ret)); } else if (opt_stat.get_last_analyzed() > 0) { - assign_value(opt_stat.get_row_count(), row_count); - assign_value(opt_stat.get_avg_row_size(), avg_len); - assign_value(opt_stat.get_row_count() * opt_stat.get_avg_row_size(), - avg_part_size); - assign_value(opt_stat.get_macro_block_num(), macro_block_count); - assign_value(opt_stat.get_micro_block_num(), micro_block_count); - assign_value(opt_stat.get_last_analyzed(), last_analyzed); + stat.add(opt_stat.get_row_count() * scale_ratio, + opt_stat.get_avg_row_size(), + opt_stat.get_row_count() * opt_stat.get_avg_row_size() * scale_ratio, + opt_stat.get_macro_block_num() * scale_ratio, + opt_stat.get_micro_block_num() * scale_ratio); + stat.set_last_analyzed(opt_stat.get_last_analyzed()); } return ret; } @@ -564,74 +559,20 @@ int ObOptStatManager::get_table_stat(const uint64_t tenant_id, int ObOptStatManager::get_table_stat(const uint64_t tenant_id, const uint64_t tab_ref_id, const ObIArray &part_ids, - int64_t *row_count, - int64_t *avg_len, - int64_t *avg_part_size, - int64_t *micro_block_count, - int64_t *last_analyzed) + const ObIArray &global_part_ids, + const double scale_ratio, + ObGlobalTableStat &stat) { int ret = OB_SUCCESS; - ObGlobalTableStat global_tstat; - int64_t tmp_last_analyzed = 0; - for (int64_t i = 0; OB_SUCC(ret) && i < part_ids.count(); ++i) { - int64_t tmp_row_count = 0; - int64_t tmp_row_len = 0; - int64_t tmp_data_size = 0; - int64_t tmp_macro_block_count = 0; - int64_t tmp_micro_block_count = 0; - if (OB_FAIL(get_table_stat(tenant_id, tab_ref_id, part_ids.at(i), - &tmp_row_count, &tmp_row_len, &tmp_data_size, &tmp_macro_block_count, - &tmp_micro_block_count, &tmp_last_analyzed))) { + bool use_global_stat_instead = !global_part_ids.empty(); + const ObIArray &opt_part_ids = use_global_stat_instead ? global_part_ids : part_ids; + for (int64_t i = 0; OB_SUCC(ret) && i < opt_part_ids.count(); ++i) { + if (OB_FAIL(get_table_stat(tenant_id, tab_ref_id, opt_part_ids.at(i), scale_ratio, stat))) { LOG_WARN("failed to get table stat", K(ret)); - } else { - global_tstat.add(tmp_row_count, tmp_row_len, - tmp_data_size, tmp_macro_block_count, tmp_micro_block_count); - } - } - if (OB_SUCC(ret) && part_ids.count() >= 1) { - assign_value(global_tstat.get_row_count(), row_count); - assign_value(global_tstat.get_avg_row_size(), avg_len); - assign_value(global_tstat.get_avg_data_size(), avg_part_size); - assign_value(global_tstat.get_micro_block_count(), micro_block_count); - assign_value(tmp_last_analyzed, last_analyzed); - } - return ret; -} - -int ObOptStatManager::get_column_stat(const uint64_t tenant_id, - const uint64_t tab_ref_id, - const int64_t part_id, - const uint64_t column_id, - int64_t *num_distinct, - int64_t *num_null, - int64_t *avg_length, - ObObj *min_obj, - ObObj *max_obj, - ObIAllocator *alloc) -{ - int ret = OB_SUCCESS; - ObOptColumnStatHandle opt_stat; - bool is_valid_col_stat = false; - if (OB_FAIL(get_column_stat(tenant_id, tab_ref_id, part_id, column_id, opt_stat))) { - LOG_WARN("failed to get column stat", K(ret)); - } else if (OB_ISNULL(opt_stat.stat_)) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("stat cache value is invalid", K(ret)); - } else if (opt_stat.stat_->get_last_analyzed() > 0) { - assign_value(opt_stat.stat_->get_num_distinct(), num_distinct); - assign_value(opt_stat.stat_->get_num_null(), num_null); - assign_value(opt_stat.stat_->get_avg_len(), avg_length); - assign_value(opt_stat.stat_->get_min_value(), min_obj); - assign_value(opt_stat.stat_->get_max_value(), max_obj); - is_valid_col_stat = true; - } else {/*do nothing*/} - if (OB_SUCC(ret) && NULL != alloc && is_valid_col_stat) { - if (NULL != min_obj && OB_FAIL(ob_write_obj(*alloc, *min_obj, *min_obj))) { - LOG_WARN("failed to deep copy min obj", K(ret)); - } else if (NULL != max_obj && OB_FAIL(ob_write_obj(*alloc, *max_obj, *max_obj))) { - LOG_WARN("failed to deep copy max obj", K(ret)); } } + LOG_TRACE("succeed to get table stat", K(tab_ref_id), K(part_ids), K(global_part_ids), + K(scale_ratio), K(scale_ratio), K(stat)); return ret; } @@ -639,27 +580,29 @@ int ObOptStatManager::get_column_stat(const uint64_t tenant_id, const uint64_t tab_ref_id, const ObIArray &part_ids, const uint64_t column_id, - int64_t *num_distinct, - int64_t *num_null, - int64_t *avg_length, - ObObj *min_obj, - ObObj *max_obj, + const ObIArray &global_part_ids, + const int64_t row_cnt, + const double scale_ratio, + ObGlobalColumnStat &stat, ObIAllocator *alloc) { int ret = OB_SUCCESS; ObSEArray cids; - ObSEArray pids; ObGlobalMinEval min_eval; ObGlobalMaxEval max_eval; ObGlobalNullEval null_eval; ObGlobalAvglenEval avglen_eval; ObGlobalNdvEval ndv_eval; ObArray new_handles; - if (OB_FAIL(cids.push_back(column_id)) || - OB_FAIL(append(pids, part_ids))) { - LOG_WARN("failed to push back column id", K(ret)); - } else if (OB_FAIL(get_column_stat(tenant_id, tab_ref_id, part_ids, cids, new_handles))) { - LOG_WARN("failed to get opt column stats", K(ret), K(part_ids), K(cids)); + bool use_global_stat_instead = !global_part_ids.empty(); + if (OB_UNLIKELY(scale_ratio < 0.0 || scale_ratio > 1.0)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get unexpected error", K(scale_ratio), K(ret)); + } else if (OB_FAIL(cids.push_back(column_id))) { + LOG_WARN("failed to push back", K(ret)); + } else if (OB_FAIL(get_column_stat(tenant_id, tab_ref_id, use_global_stat_instead ? global_part_ids : part_ids, + cids, new_handles))) { + LOG_WARN("failed to get opt column stats", K(ret), K(global_part_ids), K(part_ids), K(cids)); } else if (!new_handles.empty()) { bool all_column_stat_valid = true; for (int64_t i = 0; OB_SUCC(ret) && all_column_stat_valid && i < new_handles.count(); ++i) { @@ -668,22 +611,12 @@ int ObOptStatManager::get_column_stat(const uint64_t tenant_id, ret = OB_ERR_UNEXPECTED; LOG_WARN("cache value is null", K(ret)); } else if (opt_col_stat->get_last_analyzed() > 0) { - //tmp_use_old_stat = false; - if (NULL != num_distinct) { - ndv_eval.add(opt_col_stat->get_num_distinct(), opt_col_stat->get_llc_bitmap()); - } - if (NULL != num_null) { - null_eval.add(opt_col_stat->get_num_null()); - } - if (NULL != avg_length) { - avglen_eval.add(opt_col_stat->get_avg_len()); - } - // a partition has min/max values only when it contains a valid value - // in the other word, ndv is not zero - if (NULL != min_obj && opt_col_stat->get_num_distinct() != 0) { + ndv_eval.add(opt_col_stat->get_num_distinct(), opt_col_stat->get_llc_bitmap()); + null_eval.add(opt_col_stat->get_num_null()); + avglen_eval.add(opt_col_stat->get_avg_len()); + // a partition has min/max values only when it contains a valid value in the other word, ndv is not zero + if (alloc != NULL && opt_col_stat->get_num_distinct() != 0) { min_eval.add(opt_col_stat->get_min_value()); - } - if (NULL != max_obj && opt_col_stat->get_num_distinct() != 0) { max_eval.add(opt_col_stat->get_max_value()); } } else { @@ -693,24 +626,21 @@ int ObOptStatManager::get_column_stat(const uint64_t tenant_id, } } if (OB_SUCC(ret) && all_column_stat_valid) { - assign_value(ndv_eval.get(), num_distinct); - assign_value(null_eval.get(), num_null); - assign_value(avglen_eval.get(), avg_length); - if (min_eval.is_valid()) { - assign_value(min_eval.get(), min_obj); - } - if (max_eval.is_valid()) { - assign_value(max_eval.get(), max_obj); - } + stat.null_val_ = null_eval.get() * scale_ratio; + stat.avglen_val_ = avglen_eval.get(); + stat.ndv_val_ = !use_global_stat_instead ? ndv_eval.get() : + ObOptSelectivity::scale_distinct(row_cnt, row_cnt / scale_ratio, ndv_eval.get()); if (NULL != alloc) { - if (NULL != min_obj && OB_FAIL(ob_write_obj(*alloc, *min_obj, *min_obj))) { + if (min_eval.is_valid() && OB_FAIL(ob_write_obj(*alloc, min_eval.get(), stat.min_val_))) { LOG_WARN("failed to deep copy min obj", K(ret)); - } else if (NULL != max_obj && OB_FAIL(ob_write_obj(*alloc, *max_obj, *max_obj))) { + } else if (max_eval.is_valid() && OB_FAIL(ob_write_obj(*alloc, max_eval.get(), stat.max_val_))) { LOG_WARN("failed to deep copy max obj", K(ret)); } } } } + LOG_TRACE("succeed to get column stat", K(tab_ref_id), K(part_ids), K(column_id), K(scale_ratio), + K(use_global_stat_instead), K(row_cnt), K(stat)); return ret; } diff --git a/src/share/stat/ob_opt_stat_manager.h b/src/share/stat/ob_opt_stat_manager.h index 371563f41..5af716441 100644 --- a/src/share/stat/ob_opt_stat_manager.h +++ b/src/share/stat/ob_opt_stat_manager.h @@ -22,6 +22,7 @@ #include "lib/queue/ob_dedup_queue.h" #include "share/stat/ob_stat_define.h" #include "share/stat/ob_opt_ds_stat.h" +#include "share/stat/ob_stat_item.h" namespace oceanbase { namespace common { @@ -61,26 +62,14 @@ public: const ObIArray &table_stats, const bool is_index_stat); - int get_column_stat(const uint64_t tenant_id, - const uint64_t tab_ref_id, - const int64_t part_id, - const uint64_t column_id, - int64_t *num_distinct = NULL, - int64_t *num_null = NULL, - int64_t *avg_length = NULL, - ObObj *min_obj = NULL, - ObObj *max_obj = NULL, - ObIAllocator *alloc = NULL); - int get_column_stat(const uint64_t tenant_id, const uint64_t tab_ref_id, const ObIArray &part_ids, const uint64_t column_id, - int64_t *num_distinct = NULL, - int64_t *num_null = NULL, - int64_t *avg_length = NULL, - ObObj *min_obj = NULL, - ObObj *max_obj = NULL, + const ObIArray &global_part_ids, + const int64_t row_cnt, + const double scale_ratio, + ObGlobalColumnStat &stat, ObIAllocator *alloc = NULL); int get_column_stat(const uint64_t tenant_id, @@ -98,21 +87,15 @@ public: int get_table_stat(const uint64_t tenant_id, const uint64_t table_ref_id, const int64_t part_id, - int64_t *row_count = NULL, - int64_t *avg_len = NULL, - int64_t *avg_part_size = NULL, - int64_t *macro_block_count = NULL, - int64_t *micro_block_count = NULL, - int64_t *last_analyzed = NULL); + const double scale_ratio, + ObGlobalTableStat &stat); int get_table_stat(const uint64_t tenant_id, const uint64_t tab_ref_id, const ObIArray &part_ids, - int64_t *row_count = NULL, - int64_t *avg_len = NULL, - int64_t *avg_part_size = NULL, - int64_t *micro_block_count = NULL, - int64_t *last_analyzed = NULL); + const ObIArray &global_part_ids, + const double scale_ratio, + ObGlobalTableStat &stat); int get_table_stat(const uint64_t tenant_id, const uint64_t table_id, diff --git a/src/share/stat/ob_opt_stat_sql_service.cpp b/src/share/stat/ob_opt_stat_sql_service.cpp index e3587529b..88967d13d 100644 --- a/src/share/stat/ob_opt_stat_sql_service.cpp +++ b/src/share/stat/ob_opt_stat_sql_service.cpp @@ -1661,37 +1661,31 @@ int ObOptStatSqlService::get_valid_obj_str(const ObObj &src_obj, int ret = OB_SUCCESS; if (src_obj.is_string_type()) { ObObj dst_obj; - char *buf = NULL; - int32_t buf_len = src_obj.get_string_len() * ObCharset::CharConvertFactorNum; - uint32_t result_len = 0; - if (0 == buf_len) { - dst_obj = src_obj; - } else if (OB_UNLIKELY(NULL == (buf = static_cast(allocator.alloc(buf_len))))) { - ret = OB_ALLOCATE_MEMORY_FAILED; - LOG_ERROR("alloc memory failed", K(ret), K(buf_len)); - } else if (OB_FAIL(ObCharset::charset_convert(src_obj.get_collation_type(), src_obj.get_string_ptr(), - src_obj.get_string_len(), dst_column_meta.get_collation_type(), buf, buf_len, result_len))) { + ObCastCtx cast_ctx(&allocator, NULL, CM_NONE, dst_column_meta.get_collation_type()); + const char *incorrect_string = "-4258: Incorrect string value, can't show."; + int64_t well_formed_len = 0; + if (OB_FAIL(ObObjCaster::to_type(dst_column_meta.get_type(), cast_ctx, src_obj, dst_obj)) || + OB_FAIL(ObCharset::well_formed_len(dst_column_meta.get_collation_type(), dst_obj.get_string().ptr(), + dst_obj.get_string().length(), well_formed_len))) { //for column which have invalid char ==> save obj binary to use, and obj value to // save "-4258: Incorrect string value" to show this obj have invalid. if (OB_ERR_INCORRECT_STRING_VALUE == ret) { - LOG_WARN("invalid string for charset", K(ret), K(dst_column_meta), K(dst_obj)); ret = OB_SUCCESS; - const char *incorrect_string = "-4258: Incorrect string value, can't show."; dst_obj.set_string(dst_column_meta.get_type(), incorrect_string, static_cast(strlen(incorrect_string))); + dst_obj.set_meta_type(dst_column_meta); + LOG_TRACE("invalid string for charset", K(ret), K(src_obj), K(dst_column_meta)); } else { - LOG_WARN("failed to judge the string formed", K(ret)); + LOG_WARN("failed to type", K(ret)); } - } else { - dst_obj.set_string(dst_column_meta.get_type(), buf, static_cast(result_len)); - dst_obj.set_collation_type(dst_column_meta.get_collation_type()); } if (OB_SUCC(ret) && OB_FAIL(get_obj_str(dst_obj, allocator, dest_str, print_params))) { LOG_WARN("fail to get obj str", K(ret)); + } else { + LOG_TRACE("succeed to get valid obj str", K(src_obj), K(dst_obj), K(dest_str)); } - LOG_TRACE("succeed to get valid obj str", K(src_obj), K(dst_obj)); } else if (OB_FAIL(get_obj_str(src_obj, allocator, dest_str, print_params))) { LOG_WARN("failed to get obj str", K(ret), K(src_obj)); - } else {/*do nothing*/} + } return ret; } diff --git a/src/share/stat/ob_stat_item.h b/src/share/stat/ob_stat_item.h index 2089ad5f7..6a3fe48fc 100644 --- a/src/share/stat/ob_stat_item.h +++ b/src/share/stat/ob_stat_item.h @@ -307,7 +307,7 @@ class ObGlobalTableStat public: ObGlobalTableStat() : row_count_(0), row_size_(0), data_size_(0), - macro_block_count_(0), micro_block_count_(0), part_cnt_(0) + macro_block_count_(0), micro_block_count_(0), part_cnt_(0), last_analyzed_(0) {} void add(int64_t rc, int64_t rs, int64_t ds, int64_t mac, int64_t mic); @@ -317,13 +317,17 @@ public: int64_t get_avg_data_size() const; int64_t get_macro_block_count() const; int64_t get_micro_block_count() const; + int64_t get_last_analyzed() const { return last_analyzed_; } + void set_last_analyzed(int64_t last_analyzed) { last_analyzed_ = last_analyzed; } + TO_STRING_KV(K(row_count_), K(row_size_), K(data_size_), K(macro_block_count_), K(micro_block_count_), - K(part_cnt_)); + K(part_cnt_), + K(last_analyzed_)); private: int64_t row_count_; @@ -332,6 +336,7 @@ private: int64_t macro_block_count_; int64_t micro_block_count_; int64_t part_cnt_; + int64_t last_analyzed_; }; class ObGlobalNullEval @@ -433,6 +438,25 @@ private: int64_t global_num_not_null_; }; +struct ObGlobalColumnStat +{ + ObGlobalColumnStat() : min_val_(), max_val_(), null_val_(0), avglen_val_(0), ndv_val_(0) + { + min_val_.set_min_value(); + max_val_.set_max_value(); + } + TO_STRING_KV(K(min_val_), + K(max_val_), + K(null_val_), + K(avglen_val_), + K(ndv_val_)); + ObObj min_val_; + ObObj max_val_; + int64_t null_val_; + int64_t avglen_val_; + int64_t ndv_val_; +}; + template static T *copy_stat_item(ObIAllocator &allocator, const T &src) { diff --git a/src/share/stat/ob_stats_estimator.cpp b/src/share/stat/ob_stats_estimator.cpp index cf883a2bd..d0b911314 100644 --- a/src/share/stat/ob_stats_estimator.cpp +++ b/src/share/stat/ob_stats_estimator.cpp @@ -329,30 +329,37 @@ int ObStatsEstimator::do_estimate(uint64_t tenant_id, int ret = OB_SUCCESS; common::ObOracleSqlProxy oracle_proxy; // TODO, check the usage, is there any postprocess ObCommonSqlProxy *sql_proxy = ctx_.get_sql_proxy(); - if (OB_ISNULL(sql_proxy) || OB_ISNULL(ctx_.get_my_session()) || - OB_UNLIKELY(dst_opt_stats.empty() || raw_sql.empty())) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("get unexpected empty", K(ret), K(sql_proxy), K(dst_opt_stats.empty()), - K(ctx_.get_my_session()), K(raw_sql.empty())); - } else if (lib::is_oracle_mode()) { - if (OB_FAIL(oracle_proxy.init(ctx_.get_sql_proxy()->get_pool()))) { - LOG_WARN("failed to init oracle proxy", K(ret)); - } else { - sql_proxy = &oracle_proxy; + ObArenaAllocator tmp_alloc("OptStatGather", OB_MALLOC_NORMAL_BLOCK_SIZE, tenant_id); + sql::ObSQLSessionInfo::StmtSavedValue *session_value = NULL; + void *ptr = NULL; + ObSQLSessionInfo *session = ctx_.get_my_session(); + if (OB_ISNULL(ptr = tmp_alloc.alloc(sizeof(sql::ObSQLSessionInfo::StmtSavedValue)))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("failed to alloc memory for saved session value", K(ret)); + } else { + session_value = new(ptr)sql::ObSQLSessionInfo::StmtSavedValue(); + if (OB_ISNULL(sql_proxy) || OB_ISNULL(session) || + OB_UNLIKELY(dst_opt_stats.empty() || raw_sql.empty())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get unexpected empty", K(ret), K(sql_proxy), K(dst_opt_stats.empty()), + K(session), K(raw_sql.empty())); + } else if (OB_FAIL(session->save_session(*session_value))) { + LOG_WARN("failed to save session", K(ret)); + } else if (lib::is_oracle_mode()) { + if (OB_FAIL(oracle_proxy.init(ctx_.get_sql_proxy()->get_pool()))) { + LOG_WARN("failed to init oracle proxy", K(ret)); + } else { + sql_proxy = &oracle_proxy; + } } } if (OB_SUCC(ret)) { observer::ObInnerSQLConnectionPool *pool = static_cast(sql_proxy->get_pool()); sqlclient::ObISQLConnection *conn = NULL; - bool is_inner = ctx_.get_my_session()->is_inner(); - ObSQLSessionInfo::SessionType session_type = ctx_.get_my_session()->get_session_type(); - ctx_.get_my_session()->set_inner_session(); SMART_VAR(ObMySQLProxy::MySQLResult, proxy_result) { sqlclient::ObMySQLResult *client_result = NULL; - if (lib::is_oracle_mode() && OB_FAIL(pool->acquire(ctx_.get_my_session(), conn, true))) { - LOG_WARN("failed to acquire inner connection", K(ret)); - } else if (lib::is_mysql_mode() && OB_FAIL(pool->acquire(tenant_id, conn, sql_proxy))) { + if (OB_FAIL(pool->acquire(session, conn, lib::is_oracle_mode()))) { LOG_WARN("failed to acquire inner connection", K(ret)); } else if (OB_ISNULL(conn)) { ret = OB_ERR_UNEXPECTED; @@ -401,12 +408,10 @@ int ObStatsEstimator::do_estimate(uint64_t tenant_id, ret = COVER_SUCC(tmp_ret); } } - //reset session type - if (is_inner) { - ctx_.get_my_session()->set_session_type(session_type); - } else { - ctx_.get_my_session()->set_user_session(); - ctx_.get_my_session()->set_session_type(session_type); + int tmp_ret = OB_SUCCESS; + if (session_value != NULL && OB_SUCCESS != (tmp_ret = session->restore_session(*session_value))) { + LOG_WARN("failed to restore session", K(tmp_ret)); + ret = COVER_SUCC(tmp_ret); } } return ret; diff --git a/src/sql/optimizer/ob_dynamic_sampling.cpp b/src/sql/optimizer/ob_dynamic_sampling.cpp index 72319979f..a35b55e9d 100644 --- a/src/sql/optimizer/ob_dynamic_sampling.cpp +++ b/src/sql/optimizer/ob_dynamic_sampling.cpp @@ -505,6 +505,7 @@ int ObDynamicSampling::estimte_rowcount(int64_t max_ds_timeout, int64_t start_time = ObTimeUtility::current_time(); ObSQLSessionInfo *session_info = ctx_->get_session_info(); bool need_restore_session = false; + transaction::ObTxDesc *tx_desc = NULL; if (OB_FAIL(add_block_sample_info(sample_block_ratio_, seed_, sample_str))) { LOG_WARN("failed to add block sample info", K(ret)); } else if (OB_FAIL(add_basic_hint_info(basic_hint_str, max_ds_timeout, degree))) { @@ -512,7 +513,7 @@ int ObDynamicSampling::estimte_rowcount(int64_t max_ds_timeout, } else if (OB_FAIL(pack(raw_sql_str))) { LOG_WARN("failed to pack dynamic sampling", K(ret)); } else if (OB_FAIL(prepare_and_store_session(session_info, session_value, - nested_count, is_no_backslash_escapes))) { + nested_count, is_no_backslash_escapes, tx_desc))) { throw_ds_error = true;//here we must throw error, because the seesion may be unavailable. LOG_WARN("failed to prepare and store session", K(ret)); } else { @@ -527,7 +528,7 @@ int ObDynamicSampling::estimte_rowcount(int64_t max_ds_timeout, if (need_restore_session) { int tmp_ret = OB_SUCCESS; if (OB_SUCCESS != (tmp_ret = restore_session(session_info, session_value, - nested_count, is_no_backslash_escapes))) { + nested_count, is_no_backslash_escapes, tx_desc))) { throw_ds_error = true;//here we must throw error, because the seesion may be unavailable. ret = COVER_SUCC(tmp_ret); LOG_WARN("failed to restore session", K(tmp_ret)); @@ -1017,7 +1018,8 @@ int ObDynamicSampling::do_estimate_rowcount(ObSQLSessionInfo *session_info, int ObDynamicSampling::prepare_and_store_session(ObSQLSessionInfo *session, sql::ObSQLSessionInfo::StmtSavedValue *&session_value, int64_t &nested_count, - bool &is_no_backslash_escapes) + bool &is_no_backslash_escapes, + transaction::ObTxDesc *&tx_desc) { int ret = OB_SUCCESS; void *ptr = NULL; @@ -1034,11 +1036,17 @@ int ObDynamicSampling::prepare_and_store_session(ObSQLSessionInfo *session, } else { nested_count = session->get_nested_count(); IS_NO_BACKSLASH_ESCAPES(session->get_sql_mode(), is_no_backslash_escapes); + session->set_sql_mode(session->get_sql_mode() & ~SMO_NO_BACKSLASH_ESCAPES); session->set_query_start_time(ObTimeUtility::current_time()); session->set_inner_session(); session->set_nested_count(-1); //bug: session->set_autocommit(session_value->inc_autocommit_); + //ac is true, dynamic sampling select query no need tx desc. + if (session_value->inc_autocommit_ && session->get_tx_desc() != NULL) { + tx_desc = session->get_tx_desc(); + session->get_tx_desc() = NULL; + } } } return ret; @@ -1047,7 +1055,8 @@ int ObDynamicSampling::prepare_and_store_session(ObSQLSessionInfo *session, int ObDynamicSampling::restore_session(ObSQLSessionInfo *session, sql::ObSQLSessionInfo::StmtSavedValue *session_value, int64_t nested_count, - bool is_no_backslash_escapes) + bool is_no_backslash_escapes, + transaction::ObTxDesc *tx_desc) { int ret = OB_SUCCESS; if (OB_ISNULL(session) || OB_ISNULL(session_value)) { @@ -1060,6 +1069,9 @@ int ObDynamicSampling::restore_session(ObSQLSessionInfo *session, if (is_no_backslash_escapes) { session->set_sql_mode(session->get_sql_mode() | SMO_NO_BACKSLASH_ESCAPES); } + if (tx_desc != NULL) {//reset origin tx desc. + session->get_tx_desc() = tx_desc; + } } return ret; } diff --git a/src/sql/optimizer/ob_dynamic_sampling.h b/src/sql/optimizer/ob_dynamic_sampling.h index 9b5f2f378..f239debba 100644 --- a/src/sql/optimizer/ob_dynamic_sampling.h +++ b/src/sql/optimizer/ob_dynamic_sampling.h @@ -311,11 +311,13 @@ private: int prepare_and_store_session(ObSQLSessionInfo *session, sql::ObSQLSessionInfo::StmtSavedValue *&session_value, int64_t &nested_count, - bool &is_no_backslash_escapes); + bool &is_no_backslash_escapes, + transaction::ObTxDesc *&tx_desc); int restore_session(ObSQLSessionInfo *session, sql::ObSQLSessionInfo::StmtSavedValue *session_value, int64_t nested_count, - bool is_no_backslash_escapes); + bool is_no_backslash_escapes, + transaction::ObTxDesc *tx_desc); private: ObOptimizerContext *ctx_; diff --git a/src/sql/optimizer/ob_join_order.cpp b/src/sql/optimizer/ob_join_order.cpp index 9f624cc81..d12a1c173 100644 --- a/src/sql/optimizer/ob_join_order.cpp +++ b/src/sql/optimizer/ob_join_order.cpp @@ -11292,16 +11292,13 @@ int ObJoinOrder::init_est_sel_info_for_access_path(const uint64_t table_id, LOG_TRACE("init_est_sel_info_for_access_path", K(all_used_part_id), K(all_used_tablet_id), K(ref_table_id)); if (OB_SUCC(ret)) { // 1. try with statistics - int64_t table_row_count = 0; - int64_t part_size = 0; - int64_t avg_row_size = 0; - int64_t micro_block_count = 0; bool has_opt_stat = false; - int64_t stat_type = 0; + OptTableStatType stat_type = OptTableStatType::DEFAULT_TABLE_STAT; int64_t last_analyzed = 0; - const int64_t origin_part_cnt = all_used_part_id.count(); bool use_global = false; + ObSEArray global_part_ids; + double scale_ratio = 1.0; if (OPT_CTX.use_default_stat()) { // do nothing } else if (OB_ISNULL(OPT_CTX.get_opt_stat_manager())) { @@ -11315,7 +11312,7 @@ int ObJoinOrder::init_est_sel_info_for_access_path(const uint64_t table_id, LOG_WARN("failed to check use global stat", K(ret)); } else if (use_global) { has_opt_stat = true; - stat_type = 1; + stat_type = OptTableStatType::OPT_TABLE_STAT; } else if (OB_FAIL(OPT_CTX.get_opt_stat_manager()->check_opt_stat_validity(*(OPT_CTX.get_exec_ctx()), session_info->get_effective_tenant_id(), ref_table_id, @@ -11323,29 +11320,39 @@ int ObJoinOrder::init_est_sel_info_for_access_path(const uint64_t table_id, has_opt_stat))) { LOG_WARN("failed to check has opt stat", K(ret)); } else if (has_opt_stat) { - stat_type = 1; + stat_type = OptTableStatType::OPT_TABLE_STAT; + } else if (OB_FAIL(check_can_use_global_stat_instead(ref_table_id, + table_schema, + all_used_part_id, + all_used_tablet_id, + has_opt_stat, + global_part_ids, + scale_ratio))) { + LOG_WARN("failed to check can use global stat instead", K(ret)); + } else if (has_opt_stat) { + stat_type = OptTableStatType::OPT_TABLE_GLOBAL_STAT; } - LOG_TRACE("statistics (0: default, 1: user-gathered)", + LOG_TRACE("statistics (0: default, 1: user-gathered, 2: user_gathered_global_stat)", K(stat_type), K(ref_table_id), K(all_used_part_id)); // TODO, consider move the following codes into access_path_estimation if (OB_SUCC(ret) && has_opt_stat) { + ObGlobalTableStat stat; if (OB_FAIL(OPT_CTX.get_opt_stat_manager()->get_table_stat(session_info->get_effective_tenant_id(), ref_table_id, all_used_part_id, - &table_row_count, - &avg_row_size, - &part_size, - µ_block_count, - &last_analyzed))) { + global_part_ids, + scale_ratio, + stat))) { LOG_WARN("failed to get table stats", K(ret)); } else { - table_meta_info_.table_row_count_ = table_row_count; - table_meta_info_.part_size_ = !use_global ? static_cast(part_size) : - static_cast(part_size * all_used_part_id.count()) + last_analyzed = stat.get_last_analyzed(); + table_meta_info_.table_row_count_ = stat.get_row_count(); + table_meta_info_.part_size_ = !use_global ? static_cast(stat.get_avg_data_size()) : + static_cast(stat.get_avg_data_size() * all_used_part_id.count()) / origin_part_cnt; - table_meta_info_.average_row_size_ = static_cast(avg_row_size); - table_meta_info_.micro_block_count_ = micro_block_count; + table_meta_info_.average_row_size_ = static_cast(stat.get_avg_row_size()); + table_meta_info_.micro_block_count_ = stat.get_micro_block_count(); table_meta_info_.has_opt_stat_ = has_opt_stat; LOG_INFO("total rowcount, use statistics", K(table_meta_info_.table_row_count_), K(table_meta_info_.average_row_size_), K(table_meta_info_.micro_block_count_), @@ -11394,6 +11401,8 @@ int ObJoinOrder::init_est_sel_info_for_access_path(const uint64_t table_id, all_used_tablet_id, column_ids, stat_type, + global_part_ids, + scale_ratio, last_analyzed))) { LOG_WARN("failed to add base table meta info", K(ret)); } @@ -11433,14 +11442,11 @@ int ObJoinOrder::init_est_info_for_index(const uint64_t index_id, } } if (OB_SUCC(ret)) { - int64_t table_row_count = 0; - int64_t part_size = 0; - int64_t avg_row_size = 0; - int64_t micro_block_count = 0; - int64_t stat_type = 0; - + OptTableStatType stat_type = OptTableStatType::DEFAULT_TABLE_STAT; const int64_t origin_part_cnt = all_used_part_id.count(); bool use_global = false; + ObSEArray global_part_ids; + double scale_ratio = 1.0; if (OPT_CTX.use_default_stat()) { // do nothing } else if (OB_ISNULL(OPT_CTX.get_opt_stat_manager())) { @@ -11454,7 +11460,7 @@ int ObJoinOrder::init_est_info_for_index(const uint64_t index_id, LOG_WARN("failed to check use global stat", K(ret)); } else if (use_global) { has_opt_stat = true; - stat_type = 1; + stat_type = OptTableStatType::OPT_TABLE_STAT; } else if (OB_FAIL(OPT_CTX.get_opt_stat_manager()->check_opt_stat_validity(*(OPT_CTX.get_exec_ctx()), session_info->get_effective_tenant_id(), index_id, @@ -11462,26 +11468,36 @@ int ObJoinOrder::init_est_info_for_index(const uint64_t index_id, has_opt_stat))) { LOG_WARN("failed to check has opt stat", K(ret)); } else if (has_opt_stat) { - stat_type = 1; + stat_type = OptTableStatType::OPT_TABLE_STAT; + } else if (OB_FAIL(check_can_use_global_stat_instead(index_id, + index_schema, + all_used_part_id, + all_used_tablet_id, + has_opt_stat, + global_part_ids, + scale_ratio))) { + LOG_WARN("failed to check can use global stat instead", K(ret)); + } else if (has_opt_stat) { + stat_type = OptTableStatType::OPT_TABLE_GLOBAL_STAT; } - LOG_TRACE("statistics (0: default, 1: merge-gathered, 2: user-gathered)", + LOG_TRACE("statistics (0: default, 1: user-gathered, 2: user_gathered_global_stat)", K(stat_type), K(index_id), K(all_used_part_id)); if (OB_SUCC(ret) && has_opt_stat) { + ObGlobalTableStat stat; if (OB_FAIL(OPT_CTX.get_opt_stat_manager()->get_table_stat(session_info->get_effective_tenant_id(), index_id, all_used_part_id, - &table_row_count, - &avg_row_size, - &part_size, - µ_block_count))) { + global_part_ids, + scale_ratio, + stat))) { LOG_WARN("failed to get table stats", K(ret)); } else { - index_meta_info.index_part_size_ = !use_global ? static_cast(part_size) : - static_cast(part_size * all_used_part_id.count()) + index_meta_info.index_part_size_ = !use_global ? static_cast(stat.get_avg_data_size()) : + static_cast(stat.get_avg_data_size() * all_used_part_id.count()) / origin_part_cnt; - index_meta_info.index_micro_block_count_ = micro_block_count; - LOG_INFO("total rowcount, use statistics", K(index_meta_info)); + index_meta_info.index_micro_block_count_ = stat.get_micro_block_count(); + LOG_TRACE("index table, use statistics", K(index_meta_info), K(stat)); } } } @@ -13766,3 +13782,91 @@ int ObJoinOrder::extract_naaj_join_conditions(const ObIArray &join_q } return ret; } + + +int ObJoinOrder::check_can_use_global_stat_instead(const uint64_t ref_table_id, + const ObTableSchema &table_schema, + ObIArray &all_used_parts, + ObIArray &all_used_tablets, + bool &can_use, + ObIArray &global_part_ids, + double &scale_ratio) +{ + int ret = OB_SUCCESS; + bool is_global_stat_valid = false; + int64_t global_part_id = -1; + can_use = false; + scale_ratio = 1.0; + ObSQLSessionInfo *session_info = NULL; + if (OB_ISNULL(get_plan()) || + OB_ISNULL(session_info = get_plan()->get_optimizer_context().get_session_info()) || + OB_ISNULL(OPT_CTX.get_exec_ctx()) || + OB_ISNULL(OPT_CTX.get_opt_stat_manager())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get unexpected null", K(ret)); + } else if (!table_schema.is_partitioned_table()) { + //do nothing + } else if (OB_FAIL(OPT_CTX.get_opt_stat_manager()->check_opt_stat_validity(*(OPT_CTX.get_exec_ctx()), + session_info->get_effective_tenant_id(), + ref_table_id, + global_part_id, + is_global_stat_valid))) { + LOG_WARN("failed to check stat version", K(ret)); + } else if (PARTITION_LEVEL_ONE == table_schema.get_part_level()) { + if (!is_global_stat_valid) { + //do nothing + } else if (OB_FAIL(global_part_ids.push_back(global_part_id))) { + LOG_WARN("failed to push back global partition id", K(ret)); + } else { + can_use = true; + scale_ratio = 1.0 * all_used_parts.count() / table_schema.get_all_part_num(); + } + } else if (PARTITION_LEVEL_TWO == table_schema.get_part_level()) { + int64_t total_subpart_cnt = 0; + bool is_opt_stat_valid = true; + for (int64_t i = 0; OB_SUCC(ret) && is_opt_stat_valid && i < all_used_tablets.count(); ++i) { + int64_t part_id = OB_INVALID_ID; + int64_t subpart_id = OB_INVALID_ID; + ObArray subpart_ids; + if (OB_FAIL(table_schema.get_part_id_by_tablet(all_used_tablets.at(i), part_id, subpart_id))) { + LOG_WARN("failed to get part id by tablet", K(ret), K(all_used_tablets.at(i))); + } else if (!ObOptimizerUtil::find_item(global_part_ids, part_id)) { + if (OB_FAIL(OPT_CTX.get_opt_stat_manager()->check_opt_stat_validity(*(OPT_CTX.get_exec_ctx()), + session_info->get_effective_tenant_id(), + ref_table_id, + part_id, + is_opt_stat_valid))) { + LOG_WARN("failed to get stat version", K(ret)); + } else if (!is_opt_stat_valid) { + //do nothing + } else if (OB_FAIL(global_part_ids.push_back(part_id))) { + LOG_WARN("failed to push back part id", K(ret)); + } else if (OB_FAIL(table_schema.get_subpart_ids(part_id, subpart_ids))) { + LOG_WARN("failed to get subpart ids", K(ret)); + } else { + total_subpart_cnt += subpart_ids.count(); + } + } + } + if (OB_SUCC(ret)) { + if (!is_opt_stat_valid || + (total_subpart_cnt == table_schema.get_all_part_num() && is_global_stat_valid)) { + global_part_ids.reset(); + if (!is_global_stat_valid) { + //do nothing + } else if (OB_FAIL(global_part_ids.push_back(global_part_id))) { + LOG_WARN("failed to push back global partition id", K(ret)); + } else { + can_use = true; + scale_ratio = 1.0 * all_used_parts.count() / table_schema.get_all_part_num(); + } + } else { + can_use = true; + scale_ratio = 1.0 * all_used_parts.count() / total_subpart_cnt; + } + } + } + LOG_TRACE("succeed to check can use global stat instead", K(all_used_parts), K(all_used_tablets), + K(can_use), K(global_part_ids), K(scale_ratio)); + return ret; +} diff --git a/src/sql/optimizer/ob_join_order.h b/src/sql/optimizer/ob_join_order.h index 5ae476df6..4b7dac874 100644 --- a/src/sql/optimizer/ob_join_order.h +++ b/src/sql/optimizer/ob_join_order.h @@ -2447,6 +2447,13 @@ struct NullAwareAntiJoinInfo { int add_deduced_expr(ObRawExpr *deduced_expr, ObRawExpr *deduce_from, bool is_persistent, ObExprEqualCheckContext &equal_ctx); int check_match_to_type(ObRawExpr *to_type, ObRawExpr *candi_expr, bool &is_same, ObExprEqualCheckContext &equal_ctx); + int check_can_use_global_stat_instead(const uint64_t ref_table_id, + const ObTableSchema &table_schema, + ObIArray &all_used_parts, + ObIArray &all_used_tablets, + bool &can_use, + ObIArray &global_part_ids, + double &scale_ratio); friend class ::test::TestJoinOrder_ob_join_order_param_check_Test; friend class ::test::TestJoinOrder_ob_join_order_src_Test; private: diff --git a/src/sql/optimizer/ob_opt_est_cost.cpp b/src/sql/optimizer/ob_opt_est_cost.cpp index 230b398b8..0ad6cec4b 100644 --- a/src/sql/optimizer/ob_opt_est_cost.cpp +++ b/src/sql/optimizer/ob_opt_est_cost.cpp @@ -452,21 +452,19 @@ int ObOptEstCost::estimate_width_for_table(const OptTableMetas &table_metas, column_expr->is_hidden_column()) { // do nothing } else { - int64_t avg_len = 0; + ObGlobalColumnStat stat; if (OB_NOT_NULL(table_meta) && table_meta->use_opt_stat() && OB_FAIL(ctx.get_opt_stat_manager()->get_column_stat(ctx.get_session_info()->get_effective_tenant_id(), table_meta->get_ref_table_id(), table_meta->get_all_used_parts(), column_expr->get_column_id(), - NULL, - NULL, - &avg_len, - NULL, - NULL, - &ctx.get_allocator()))) { + table_meta->get_all_used_global_parts(), + table_meta->get_rows(), + table_meta->get_scale_ratio(), + stat))) { LOG_WARN("failed to get column stat", K(ret)); - } else if (avg_len != 0) { - width += avg_len; + } else if (stat.avglen_val_ != 0) { + width += stat.avglen_val_; } else { width += get_estimate_width_from_type(column_expr->get_result_type()); } @@ -498,7 +496,7 @@ int ObOptEstCost::estimate_width_for_exprs(const OptTableMetas &table_metas, // column expr const ObColumnRefRawExpr* column_expr = static_cast(expr); uint64_t table_id = column_expr->get_table_id(); - int64_t avg_len = 0; + ObGlobalColumnStat stat; const OptTableMeta *table_meta = table_metas.get_table_meta_by_table_id(table_id); // base table column expr use statistic if (OB_NOT_NULL(table_meta) && table_meta->use_opt_stat() && @@ -506,15 +504,13 @@ int ObOptEstCost::estimate_width_for_exprs(const OptTableMetas &table_metas, table_meta->get_ref_table_id(), table_meta->get_all_used_parts(), column_expr->get_column_id(), - NULL, - NULL, - &avg_len, - NULL, - NULL, - &ctx.get_allocator()))) { + table_meta->get_all_used_global_parts(), + table_meta->get_rows(), + table_meta->get_scale_ratio(), + stat))) { LOG_WARN("failed to get column stat", K(ret)); - } else if (avg_len != 0) { - width += avg_len; + } else if (stat.avglen_val_ != 0) { + width += stat.avglen_val_; } else { // non base table column expr use estimation width += get_estimate_width_from_type(column_expr->get_result_type()); diff --git a/src/sql/optimizer/ob_opt_selectivity.cpp b/src/sql/optimizer/ob_opt_selectivity.cpp index 519491202..5b8982e22 100644 --- a/src/sql/optimizer/ob_opt_selectivity.cpp +++ b/src/sql/optimizer/ob_opt_selectivity.cpp @@ -87,11 +87,13 @@ int OptTableMeta::assign(const OptTableMeta &other) int OptTableMeta::init(const uint64_t table_id, const uint64_t ref_table_id, const int64_t rows, - const int64_t stat_type, + const OptTableStatType stat_type, ObSqlSchemaGuard &schema_guard, ObIArray &all_used_part_id, common::ObIArray &all_used_tablets, ObIArray &column_ids, + ObIArray &all_used_global_parts, + const double scale_ratio, const OptSelectivityCtx &ctx) { int ret = OB_SUCCESS; @@ -103,10 +105,13 @@ int OptTableMeta::init(const uint64_t table_id, ref_table_id_ = ref_table_id; rows_ = rows; stat_type_ = stat_type; + scale_ratio_ = scale_ratio; if (OB_FAIL(all_used_parts_.assign(all_used_part_id))) { LOG_WARN("failed to assign all used partition ids", K(ret)); } else if (OB_FAIL(all_used_tablets_.assign(all_used_tablets))) { LOG_WARN("failed to assign all used partition ids", K(ret)); + } else if (OB_FAIL(all_used_global_parts_.assign(all_used_global_parts))) { + LOG_WARN("failed to assign all used partition ids", K(ret)); } else if (OB_FAIL(schema_guard.get_table_schema(table_id_, ref_table_id_, ctx.get_stmt(), table_schema))) { LOG_WARN("failed to get table schmea", K(ret), K(ref_table_id_)); } else if (OB_ISNULL(table_schema)) { @@ -136,9 +141,8 @@ int OptTableMeta::init(const uint64_t table_id, column_id = column_ids.at(i); int64_t global_ndv = 0; int64_t num_null = 0; - int64_t avg_len = 0; bool is_single_pkey = (1 == pk_ids_.count() && pk_ids_.at(0) == column_id); - // get global llc bitmap + ObGlobalColumnStat stat; if (is_single_pkey) { global_ndv = rows_; num_null = 0; @@ -153,18 +157,23 @@ int OptTableMeta::init(const uint64_t table_id, ref_table_id_, all_used_part_id, column_id, - &global_ndv, - &num_null, - &avg_len))) { + all_used_global_parts, + rows, + scale_ratio, + stat))) { LOG_WARN("failed to get column stats", K(ret)); - } else if (0 == global_ndv && 0 == num_null) { + } else if (0 == stat.ndv_val_ && 0 == stat.null_val_) { global_ndv = std::min(rows, 100L); num_null = rows * EST_DEF_COL_NULL_RATIO; - } else if (0 == global_ndv && num_null > 0) { + } else if (0 == stat.ndv_val_ && stat.null_val_ > 0) { global_ndv = 1; + num_null = stat.null_val_; + } else { + global_ndv = stat.ndv_val_; + num_null = stat.null_val_; } if (OB_SUCC(ret)) { - column_metas_.at(i).init(column_id, global_ndv, num_null, avg_len); + column_metas_.at(i).init(column_id, global_ndv, num_null, stat.avglen_val_); } } return ret; @@ -213,7 +222,9 @@ int OptTableMetas::add_base_table_meta_info(OptSelectivityCtx &ctx, ObIArray &all_used_part_id, ObIArray &all_used_tablets, ObIArray &column_ids, - const int64_t stat_type, + const OptTableStatType stat_type, + ObIArray &all_used_global_parts, + const double scale_ratio, int64_t last_analyzed) { int ret = OB_SUCCESS; @@ -227,7 +238,7 @@ int OptTableMetas::add_base_table_meta_info(OptSelectivityCtx &ctx, LOG_WARN("failed to allocate place holder for table meta", K(ret)); } else if (OB_FAIL(table_meta->init(table_id, ref_table_id, rows, stat_type, *schema_guard, all_used_part_id, all_used_tablets, - column_ids, ctx))) { + column_ids, all_used_global_parts, scale_ratio, ctx))) { LOG_WARN("failed to init new tstat", K(ret)); } else { table_meta->set_version(last_analyzed); @@ -3299,6 +3310,9 @@ int ObOptSelectivity::get_column_min_max(const OptTableMetas &table_metas, const OptTableMeta *table_meta = table_metas.get_table_meta_by_table_id(table_id); OptColumnMeta *column_meta = const_cast( table_metas.get_column_meta_by_table_id(table_id, column_id)); + const ObTableSchema *table_schema = NULL; + ObSqlSchemaGuard *schema_guard = const_cast(ctx).get_sql_schema_guard(); + ObGlobalColumnStat stat; if (OB_NOT_NULL(table_meta) && OB_NOT_NULL(column_meta)) { if (column_meta->get_min_max_inited()) { min_obj = column_meta->get_min_value(); @@ -3312,15 +3326,16 @@ int ObOptSelectivity::get_column_min_max(const OptTableMetas &table_metas, table_meta->get_ref_table_id(), table_meta->get_all_used_parts(), column_id, - NULL, - NULL, - NULL, - &column_meta->get_min_value(), - &column_meta->get_max_value(), + table_meta->get_all_used_global_parts(), + table_meta->get_rows(), + table_meta->get_scale_ratio(), + stat, &ctx.get_allocator()))) { LOG_WARN("failed to get column stat", K(ret)); } else { column_meta->set_min_max_inited(true); + column_meta->set_min_value(stat.min_val_); + column_meta->set_max_value(stat.max_val_); min_obj = column_meta->get_min_value(); max_obj = column_meta->get_max_value(); LOG_TRACE("var basic stat min/max", K(min_obj), K(max_obj)); @@ -3459,7 +3474,9 @@ int ObOptSelectivity::get_histogram_by_column(const OptTableMetas &table_metas, const OptTableMeta *table_meta = table_metas.get_table_meta_by_table_id(table_id); if (OB_ISNULL(table_meta) || OB_INVALID_ID == table_meta->get_ref_table_id()) { // do nothing - } else if (NULL == ctx.get_opt_stat_manager() || !table_meta->use_opt_stat()) { + } else if (NULL == ctx.get_opt_stat_manager() || + !table_meta->use_opt_stat() || + table_meta->use_opt_global_stat()) { // do nothing } else if (table_meta->get_all_used_parts().count() != 1) { // consider to use the global histogram here diff --git a/src/sql/optimizer/ob_opt_selectivity.h b/src/sql/optimizer/ob_opt_selectivity.h index d8cff6474..b451c9a31 100644 --- a/src/sql/optimizer/ob_opt_selectivity.h +++ b/src/sql/optimizer/ob_opt_selectivity.h @@ -206,6 +206,13 @@ private: bool min_max_inited_; }; +enum OptTableStatType { + DEFAULT_TABLE_STAT = 0, //default table stat. + OPT_TABLE_STAT, //optimizer gather table stat. + OPT_TABLE_GLOBAL_STAT, //optimizer gather table global stat when no table part stat. + DS_TABLE_STAT //dynamic sampling table stat +}; + class OptTableMeta { public: @@ -213,24 +220,28 @@ public: table_id_(OB_INVALID_ID), ref_table_id_(OB_INVALID_ID), rows_(0), - stat_type_(0), + stat_type_(OptTableStatType::DEFAULT_TABLE_STAT), last_analyzed_(0), all_used_parts_(), all_used_tablets_(), pk_ids_(), column_metas_(), - ds_level_(ObDynamicSamplingLevel::NO_DYNAMIC_SAMPLING) + ds_level_(ObDynamicSamplingLevel::NO_DYNAMIC_SAMPLING), + all_used_global_parts_(), + scale_ratio_(1.0) {} int assign(const OptTableMeta &other); int init(const uint64_t table_id, const uint64_t ref_table_id, const int64_t rows, - const int64_t stat_type, + const OptTableStatType stat_type, ObSqlSchemaGuard &schema_guard, common::ObIArray &all_used_part_id, common::ObIArray &all_used_tablets, common::ObIArray &column_ids, + ObIArray &all_used_global_parts, + const double scale_ratio, const OptSelectivityCtx &ctx); // int update_stat(const double rows, const bool can_reduce, const bool can_enlarge); @@ -251,24 +262,28 @@ public: common::ObIArray &get_all_used_tablets() { return all_used_tablets_; } const common::ObIArray& get_pkey_ids() const { return pk_ids_; } common::ObIArray& get_column_metas() { return column_metas_; } + const common::ObIArray& get_all_used_global_parts() const { return all_used_global_parts_; } + common::ObIArray &get_all_used_global_parts() { return all_used_global_parts_; } + double get_scale_ratio() const { return scale_ratio_; } + void set_scale_ratio(const double scale_ratio) { scale_ratio_ = scale_ratio; } void set_ds_level(const int64_t ds_level) { ds_level_ = ds_level; } int64_t get_ds_level() const { return ds_level_; } - bool use_default_stat() const { return stat_type_ == 0; } - bool use_opt_stat() const { return stat_type_ == 1; } - bool use_ds_stat() const { return stat_type_ == 2; } - void set_use_ds_stat() { stat_type_ = 2; } + bool use_default_stat() const { return stat_type_ == OptTableStatType::DEFAULT_TABLE_STAT; } + bool use_opt_stat() const { return stat_type_ == OptTableStatType::OPT_TABLE_STAT || + stat_type_ == OptTableStatType::OPT_TABLE_GLOBAL_STAT; } + bool use_opt_global_stat() const { return stat_type_ == OptTableStatType::OPT_TABLE_GLOBAL_STAT; } + bool use_ds_stat() const { return stat_type_ == OptTableStatType::DS_TABLE_STAT; } + void set_use_ds_stat() { stat_type_ = OptTableStatType::DS_TABLE_STAT; } TO_STRING_KV(K_(table_id), K_(ref_table_id), K_(rows), K_(stat_type), K_(ds_level), - K_(all_used_parts), K_(all_used_tablets), K_(pk_ids), K_(column_metas)); + K_(all_used_parts), K_(all_used_tablets), K_(pk_ids), K_(column_metas), + K_(all_used_global_parts), K_(scale_ratio)); private: uint64_t table_id_; uint64_t ref_table_id_; double rows_; - /// 0 for default stat - /// 1 for optimizer-gathered stat - /// 2 for dynamic sampling stat, TODO, jiangxiu.wt - int64_t stat_type_; + OptTableStatType stat_type_; int64_t last_analyzed_; ObSEArray all_used_parts_; @@ -276,6 +291,8 @@ private: ObSEArray pk_ids_; ObSEArray column_metas_; int64_t ds_level_;//dynamic sampling level + ObSEArray all_used_global_parts_; + double scale_ratio_; }; struct OptSelectivityDSParam { @@ -302,7 +319,9 @@ public: common::ObIArray &all_used_part_id, common::ObIArray &all_used_tablets, common::ObIArray &column_ids, - const int64_t stat_type, + const OptTableStatType stat_type, + ObIArray &all_used_global_parts, + const double scale_ratio, int64_t last_analyzed); int add_set_child_stmt_meta_info(const ObDMLStmt *parent_stmt,