/** * Copyright (c) 2021 OceanBase * OceanBase CE is licensed under Mulan PubL v2. * You can use this software according to the terms and conditions of the Mulan PubL v2. * You may obtain a copy of Mulan PubL v2 at: * http://license.coscl.org.cn/MulanPubL-2.0 * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. * See the Mulan PubL v2 for more details. */ #ifndef _OB_DYNAMIC_SAMPLING_H_ #define _OB_DYNAMIC_SAMPLING_H_ #include "sql/resolver/expr/ob_raw_expr.h" #include "sql/printer/ob_raw_expr_printer.h" #include "sql/resolver/expr/ob_raw_expr_util.h" #include "sql/engine/ob_exec_context.h" #include "share/stat/ob_stat_define.h" #include "share/stat/ob_opt_ds_stat_cache.h" namespace oceanbase { namespace sql { class ObRawExpr; class ObEstSelInfo; class ObRawExprPrinter; class ObExecContext; class OptTableMeta; } // end of namespace sql namespace common { class ObServerConfig; class ObMySQLProxy; struct ObDSFailTabInfo { ObDSFailTabInfo () : table_id_(OB_INVALID_ID), part_ids_() {} uint64_t table_id_; ObSEArray part_ids_; TO_STRING_KV(K(table_id_), K(part_ids_)); }; struct ObDSTableParam { ObDSTableParam () : tenant_id_(0), table_id_(OB_INVALID_ID), db_name_(), table_name_(), alias_name_(), is_virtual_table_(false), ds_level_(ObDynamicSamplingLevel::NO_DYNAMIC_SAMPLING), sample_block_cnt_(0), max_ds_timeout_(0), degree_(1), need_specify_partition_(false), partition_infos_() {} bool is_valid() const { return tenant_id_ != 0 && table_id_ != OB_INVALID_ID && ds_level_ != ObDynamicSamplingLevel::NO_DYNAMIC_SAMPLING && max_ds_timeout_ > 0; } uint64_t tenant_id_; uint64_t table_id_; ObString db_name_; ObString table_name_; ObString alias_name_; bool is_virtual_table_; int64_t ds_level_; int64_t sample_block_cnt_; int64_t max_ds_timeout_; int64_t degree_; bool need_specify_partition_; ObSEArray partition_infos_; TO_STRING_KV(K(tenant_id_), K(table_id_), K(db_name_), K(table_name_), K(alias_name_), K(is_virtual_table_), K(ds_level_), K(max_ds_timeout_), K(degree_), K(sample_block_cnt_), K(need_specify_partition_), K(partition_infos_)); }; enum ObDSResultItemType { OB_DS_INVALID_STAT = -1, OB_DS_BASIC_STAT,//basic table stat, like table rowcount、column ndv、column num null OB_DS_OUTPUT_STAT, OB_DS_FILTER_OUTPUT_STAT//match filters output }; enum ObDSStatItemType { OB_DS_INVALID_TYPE = -1, OB_DS_ROWCOUNT, OB_DS_OUTPUT_COUNT, OB_DS_FILTER_OUTPUT, OB_DS_COLUMN_NUM_DISTINCT, OB_DS_COLUMN_NUM_NULL }; struct ObDSResultItem { ObDSResultItem(): type_(OB_DS_INVALID_STAT), index_id_(OB_INVALID_ID), exprs_(), stat_key_(), stat_handle_(), stat_(NULL) {} ObDSResultItem(ObDSResultItemType type, uint64_t index_id): type_(type), index_id_(index_id), exprs_(), stat_key_(), stat_handle_(), stat_(NULL) {} TO_STRING_KV(K(type_), K(index_id_), K(exprs_), K(stat_key_), KPC(stat_handle_.stat_), KPC(stat_)); ObDSResultItemType type_; uint64_t index_id_; ObSEArray exprs_; ObOptDSStat::Key stat_key_; ObOptDSStatHandle stat_handle_; ObOptDSStat *stat_; }; class ObDSStatItem { public: ObDSStatItem() : result_item_(NULL), filter_string_(), column_expr_(NULL), type_(OB_DS_INVALID_TYPE) {} ObDSStatItem(ObDSResultItem *result_item, const ObString &filter_string, ObDSStatItemType type) : result_item_(result_item), filter_string_(filter_string), column_expr_(NULL), type_(type) {} ObDSStatItem(ObDSResultItem *result_item, const ObString &filter_string, const ObColumnRefRawExpr *column_expr, ObDSStatItemType type) : result_item_(result_item), filter_string_(filter_string), column_expr_(column_expr), type_(type) {} void reset() { result_item_ = NULL; filter_string_.reset(); column_expr_ = NULL; type_ = OB_DS_INVALID_TYPE; } virtual ~ObDSStatItem() { reset(); } virtual bool is_needed() const { return true; }//TODO, need refine?? virtual int gen_expr(char *buf, const int64_t buf_len, int64_t &pos); virtual int decode(double sample_ratio, ObObj &obj); ObDSStatItemType get_type() { return type_; } int cast_int(const ObObj &obj, int64_t &ret_value); TO_STRING_KV(KPC(result_item_), K(filter_string_), KPC(column_expr_), K(type_)); ObDSResultItem *result_item_; ObString filter_string_; const ObColumnRefRawExpr *column_expr_; ObDSStatItemType type_; }; template static T *copy_ds_stat_item(ObIAllocator &allocator, const T &src) { T *ret = NULL; void *ptr = allocator.alloc(sizeof(T)); if (NULL != ptr) { ret = new (ptr) T(); *ret = src; } return ret; } const int64_t OB_DS_BASIC_SAMPLE_MICRO_CNT = 32; const int64_t OB_DS_MAX_FILTER_EXPR_COUNT = 10000; const int64_t OB_DS_MIN_QUERY_TIMEOUT = 1000;//Dynamic sampling requires a minimum timeout of 1ms. //const int64_t OB_OPT_DS_ADAPTIVE_SAMPLE_MICRO_CNT = 200; //const int64_t OB_OPT_DS_MAX_TIMES = 7; class ObDynamicSampling { public: explicit ObDynamicSampling(ObOptimizerContext &ctx, ObIAllocator &allocator) : ctx_(&ctx), allocator_(allocator), db_name_(), table_name_(), alias_name_(), partition_list_(), macro_block_num_(0), micro_block_num_(0), sstable_row_count_(0), memtable_row_count_(0), sample_block_ratio_(0.0), seed_(0), sample_block_(), basic_hints_(), where_conditions_(), ds_stat_items_(), results_() {} int estimate_table_rowcount(const ObDSTableParam ¶m, ObIArray &ds_result_items, bool &throw_ds_error); int add_table_info(const ObString &db_name, const ObString &table_name, const ObString &alias_name); int add_basic_hint_info(ObSqlString &basic_hint_str, int64_t query_timeout, int64_t degree); int add_block_sample_info(const double &sample_block_ratio, const int64_t seed, ObSqlString &sample_str); int add_filter_infos(const ObIArray &filter_exprs, bool only_column_namespace, ObSqlString &filter_sql_str, ObString &filter_str); int calc_table_sample_block_ratio(const ObDSTableParam ¶m); int add_partition_info(const ObIArray &partition_infos, ObSqlString &partition_sql_str, ObString &partition_str); static int print_filter_exprs(const ObSQLSessionInfo *session_info, ObSchemaGetterGuard *schema_guard, const ParamStore *param_store, const ObIArray &filter_exprs, bool only_column_namespace, ObSqlString &expr_str); static inline double revise_between_0_100(double num) { return num < 0 ? 0 : (num > 100.0 ? 100.0 : num); } const ObIArray &get_ds_items() const { return ds_stat_items_; } int64_t get_ds_item_size() const { return ds_stat_items_.count(); } int add_result(ObObj &obj) { return results_.push_back(obj); } int64_t get_micro_block_num() const { return micro_block_num_; } template int add_ds_stat_item(const T &item); private: int do_estimate_table_rowcount(const ObDSTableParam ¶m, bool &throw_ds_error); int get_ds_table_result_from_cache(const ObDSTableParam ¶m, ObOptDSStat::Key &key, ObOptDSStatHandle &ds_stat_handle, int64_t &cur_modified_dml_cnt); int do_estimate_rowcount(ObSQLSessionInfo *session_info, const ObSqlString &raw_sql); int estimte_rowcount(int64_t max_ds_timeout, int64_t degree, bool &throw_ds_error); int pack(ObSqlString &raw_sql_str); int gen_select_filed(ObSqlString &select_fields); int estimate_table_block_count_and_row_count(const ObDSTableParam ¶m); int get_all_tablet_id_and_object_id(const ObDSTableParam ¶m, ObIArray &tablet_ids, ObIArray &partition_ids); int decode(double sample_ratio); int construct_ds_stat_key(const ObDSTableParam ¶m, ObDSResultItemType type, const ObIArray &filter_exprs, ObOptDSStat::Key &key); int gen_partition_str(const ObIArray &partition_infos, ObSqlString &partition_str); int add_ds_stat_items_by_dml_info(const ObDSTableParam ¶m, const int64_t cur_modified_dml_cnt, const double stale_percent_threshold, ObIArray &ds_result_items); int do_add_ds_stat_item(const ObDSTableParam ¶m, ObDSResultItem &result_item, int64_t ds_column_cnt); int add_ds_col_stat_item(const ObDSTableParam ¶m, ObDSResultItem &result_item, int64_t ds_column_cnt); bool all_ds_col_stats_are_gathered(const ObDSTableParam ¶m, const ObIArray &column_exprs, const ObOptDSStat::DSColStats &ds_col_stats, int64_t &ds_column_cnt); int add_ds_table_stat_item(const ObIArray &filters, ObDSStatItemType stat_item_type, ObOptDSStat &ds_stat, ObSqlString &filters_str); int64_t get_dynamic_sampling_micro_block_num(const ObDSTableParam ¶m); int get_table_dml_info(const uint64_t tenant_id, const uint64_t table_id, int64_t &cur_modified_dml_cnt, double &stale_percent_threshold); int add_ds_result_cache(ObIArray &ds_result_items); int add_block_info_for_stat_items(); int get_ds_stat_items(const ObDSTableParam ¶m, ObIArray &ds_result_items); int prepare_and_store_session(ObSQLSessionInfo *session, sql::ObSQLSessionInfo::StmtSavedValue *&session_value, int64_t &nested_count, bool &is_no_backslash_escapes, transaction::ObTxDesc *&tx_desc); int restore_session(ObSQLSessionInfo *session, sql::ObSQLSessionInfo::StmtSavedValue *session_value, int64_t nested_count, bool is_no_backslash_escapes, transaction::ObTxDesc *tx_desc); private: ObOptimizerContext *ctx_; ObIAllocator &allocator_; ObString db_name_; ObString table_name_; ObString alias_name_; ObString partition_list_; int64_t macro_block_num_; int64_t micro_block_num_; int64_t sstable_row_count_; int64_t memtable_row_count_; double sample_block_ratio_; int64_t seed_; ObString sample_block_; ObString basic_hints_; ObString where_conditions_; ObSEArray ds_stat_items_; ObSEArray results_; //following members will be used for dynamic sampling join in the future //ObString join_type_; //ObString join_conditions_; //int64_t micro_total_count2_; //bool is_left_sample_; }; class ObDynamicSamplingUtils { public: static int get_valid_dynamic_sampling_level(const ObSQLSessionInfo *session_info, const ObTableDynamicSamplingHint *table_ds_hint, const int64_t global_ds_level, int64_t &ds_level, int64_t &sample_block_cnt, bool &specify_ds); static int get_ds_table_param(ObOptimizerContext &ctx, const ObLogPlan *log_plan, const OptTableMeta *table_meta, ObDSTableParam &ds_table_param, bool &specify_ds); static int check_ds_can_use_filters(const ObIArray &filters, bool &no_use); static const ObDSResultItem *get_ds_result_item(ObDSResultItemType type, uint64_t index_id, const ObIArray &ds_result_items); static int64_t get_dynamic_sampling_max_timeout(ObOptimizerContext &ctx); static int add_failed_ds_table_list(const uint64_t table_id, const common::ObIArray &used_part_id, common::ObIArray &failed_list); static bool is_ds_virtual_table(const int64_t table_id); static int get_ds_table_degree(ObOptimizerContext &ctx, const ObLogPlan *log_plan, const uint64_t table_id, const uint64_t ref_table_id, int64_t °ree); static bool check_is_failed_ds_table(const uint64_t table_id, const common::ObIArray &used_part_id, const common::ObIArray &failed_list); private: static int check_ds_can_use_filter(const ObRawExpr *filter, bool &no_use, int64_t &total_expr_cnt); static int get_ds_table_part_info(ObOptimizerContext &ctx, const uint64_t ref_table_id, const common::ObIArray &used_tablets, bool &need_specify_partition, ObIArray &partition_infos); } ; // struct ObOptDSJoinParam { // ObOptDSJoinParam() : // left_table_param_(), // right_table_param_(), // join_type_(UNKNOWN_JOIN), // max_ds_timeout_(0), // join_conditions_(NULL) // {} // bool is_valid() const { return left_table_param_.is_valid() && // right_table_param_.is_valid() && // join_type_ != UNKNOWN_JOIN && // max_ds_timeout_ > 0 && // join_conditions_ != NULL; } // ObOptDSBaseParam left_table_param_; // ObOptDSBaseParam right_table_param_; // ObJoinType join_type_; // int64_t max_ds_timeout_; // const ObIArray *join_conditions_; // TO_STRING_KV(K(left_table_param_), // K(right_table_param_), // K(join_type_), // K(max_ds_timeout_), // KPC(join_conditions_)); // }; } // end of namespace common } // end of namespace oceanbase #endif /* _OB_DYNAMIC_SAMPLING_H_ */