433 lines
16 KiB
C++
433 lines
16 KiB
C++
/**
|
|
* Copyright (c) 2021 OceanBase
|
|
* OceanBase CE is licensed under Mulan PubL v2.
|
|
* You can use this software according to the terms and conditions of the Mulan PubL v2.
|
|
* You may obtain a copy of Mulan PubL v2 at:
|
|
* http://license.coscl.org.cn/MulanPubL-2.0
|
|
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
|
|
* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
|
|
* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
|
|
* See the Mulan PubL v2 for more details.
|
|
*/
|
|
|
|
#ifndef _OB_DYNAMIC_SAMPLING_H_
|
|
#define _OB_DYNAMIC_SAMPLING_H_
|
|
#include "sql/resolver/expr/ob_raw_expr.h"
|
|
#include "sql/printer/ob_raw_expr_printer.h"
|
|
#include "sql/resolver/expr/ob_raw_expr_util.h"
|
|
#include "sql/engine/ob_exec_context.h"
|
|
#include "share/stat/ob_stat_define.h"
|
|
#include "share/stat/ob_opt_ds_stat_cache.h"
|
|
namespace oceanbase {
|
|
namespace sql {
|
|
class ObRawExpr;
|
|
class ObEstSelInfo;
|
|
class ObRawExprPrinter;
|
|
class ObExecContext;
|
|
class OptTableMeta;
|
|
} // end of namespace sql
|
|
namespace common {
|
|
class ObServerConfig;
|
|
class ObMySQLProxy;
|
|
|
|
struct ObDSFailTabInfo
|
|
{
|
|
ObDSFailTabInfo () : table_id_(OB_INVALID_ID), part_ids_() {}
|
|
uint64_t table_id_;
|
|
ObSEArray<int64_t, 1, common::ModulePageAllocator, true> part_ids_;
|
|
TO_STRING_KV(K(table_id_),
|
|
K(part_ids_));
|
|
};
|
|
|
|
struct ObDSTableParam
|
|
{
|
|
ObDSTableParam () :
|
|
tenant_id_(0),
|
|
table_id_(OB_INVALID_ID),
|
|
db_name_(),
|
|
table_name_(),
|
|
alias_name_(),
|
|
is_virtual_table_(false),
|
|
ds_level_(ObDynamicSamplingLevel::NO_DYNAMIC_SAMPLING),
|
|
sample_block_cnt_(0),
|
|
max_ds_timeout_(0),
|
|
degree_(1),
|
|
need_specify_partition_(false),
|
|
partition_infos_()
|
|
{}
|
|
|
|
bool is_valid() const { return tenant_id_ != 0 &&
|
|
table_id_ != OB_INVALID_ID &&
|
|
ds_level_ != ObDynamicSamplingLevel::NO_DYNAMIC_SAMPLING &&
|
|
max_ds_timeout_ > 0; }
|
|
uint64_t tenant_id_;
|
|
uint64_t table_id_;
|
|
ObString db_name_;
|
|
ObString table_name_;
|
|
ObString alias_name_;
|
|
bool is_virtual_table_;
|
|
int64_t ds_level_;
|
|
int64_t sample_block_cnt_;
|
|
int64_t max_ds_timeout_;
|
|
int64_t degree_;
|
|
bool need_specify_partition_;
|
|
ObSEArray<PartInfo, 4, common::ModulePageAllocator, true> partition_infos_;
|
|
|
|
TO_STRING_KV(K(tenant_id_),
|
|
K(table_id_),
|
|
K(db_name_),
|
|
K(table_name_),
|
|
K(alias_name_),
|
|
K(is_virtual_table_),
|
|
K(ds_level_),
|
|
K(max_ds_timeout_),
|
|
K(degree_),
|
|
K(sample_block_cnt_),
|
|
K(need_specify_partition_),
|
|
K(partition_infos_));
|
|
};
|
|
|
|
enum ObDSResultItemType
|
|
{
|
|
OB_DS_INVALID_STAT = -1,
|
|
OB_DS_BASIC_STAT,//basic table stat, like table rowcount、column ndv、column num null
|
|
OB_DS_OUTPUT_STAT,
|
|
OB_DS_FILTER_OUTPUT_STAT//match filters output
|
|
};
|
|
|
|
enum ObDSStatItemType
|
|
{
|
|
OB_DS_INVALID_TYPE = -1,
|
|
OB_DS_ROWCOUNT,
|
|
OB_DS_OUTPUT_COUNT,
|
|
OB_DS_FILTER_OUTPUT,
|
|
OB_DS_COLUMN_NUM_DISTINCT,
|
|
OB_DS_COLUMN_NUM_NULL
|
|
};
|
|
|
|
struct ObDSResultItem
|
|
{
|
|
ObDSResultItem():
|
|
type_(OB_DS_INVALID_STAT),
|
|
index_id_(OB_INVALID_ID),
|
|
exprs_(),
|
|
stat_key_(),
|
|
stat_handle_(),
|
|
stat_(NULL)
|
|
{}
|
|
ObDSResultItem(ObDSResultItemType type, uint64_t index_id):
|
|
type_(type),
|
|
index_id_(index_id),
|
|
exprs_(),
|
|
stat_key_(),
|
|
stat_handle_(),
|
|
stat_(NULL)
|
|
{}
|
|
TO_STRING_KV(K(type_),
|
|
K(index_id_),
|
|
K(exprs_),
|
|
K(stat_key_),
|
|
KPC(stat_handle_.stat_),
|
|
KPC(stat_));
|
|
ObDSResultItemType type_;
|
|
uint64_t index_id_;
|
|
ObSEArray<ObRawExpr*, 4, common::ModulePageAllocator, true> exprs_;
|
|
ObOptDSStat::Key stat_key_;
|
|
ObOptDSStatHandle stat_handle_;
|
|
ObOptDSStat *stat_;
|
|
};
|
|
|
|
class ObDSStatItem
|
|
{
|
|
public:
|
|
ObDSStatItem() :
|
|
result_item_(NULL),
|
|
filter_string_(),
|
|
column_expr_(NULL),
|
|
type_(OB_DS_INVALID_TYPE)
|
|
{}
|
|
ObDSStatItem(ObDSResultItem *result_item,
|
|
const ObString &filter_string,
|
|
ObDSStatItemType type) :
|
|
result_item_(result_item),
|
|
filter_string_(filter_string),
|
|
column_expr_(NULL),
|
|
type_(type)
|
|
{}
|
|
ObDSStatItem(ObDSResultItem *result_item,
|
|
const ObString &filter_string,
|
|
const ObColumnRefRawExpr *column_expr,
|
|
ObDSStatItemType type) :
|
|
result_item_(result_item),
|
|
filter_string_(filter_string),
|
|
column_expr_(column_expr),
|
|
type_(type)
|
|
{}
|
|
void reset() {
|
|
result_item_ = NULL;
|
|
filter_string_.reset();
|
|
column_expr_ = NULL;
|
|
type_ = OB_DS_INVALID_TYPE;
|
|
}
|
|
virtual ~ObDSStatItem() { reset(); }
|
|
virtual bool is_needed() const { return true; }//TODO, need refine??
|
|
virtual int gen_expr(char *buf, const int64_t buf_len, int64_t &pos);
|
|
virtual int decode(double sample_ratio, ObObj &obj);
|
|
ObDSStatItemType get_type() { return type_; }
|
|
int cast_int(const ObObj &obj, int64_t &ret_value);
|
|
TO_STRING_KV(KPC(result_item_),
|
|
K(filter_string_),
|
|
KPC(column_expr_),
|
|
K(type_));
|
|
ObDSResultItem *result_item_;
|
|
ObString filter_string_;
|
|
const ObColumnRefRawExpr *column_expr_;
|
|
ObDSStatItemType type_;
|
|
};
|
|
|
|
template <class T>
|
|
static T *copy_ds_stat_item(ObIAllocator &allocator, const T &src)
|
|
{
|
|
T *ret = NULL;
|
|
void *ptr = allocator.alloc(sizeof(T));
|
|
if (NULL != ptr) {
|
|
ret = new (ptr) T();
|
|
*ret = src;
|
|
}
|
|
return ret;
|
|
}
|
|
|
|
const int64_t OB_DS_BASIC_SAMPLE_MICRO_CNT = 32;
|
|
const int64_t OB_DS_MAX_FILTER_EXPR_COUNT = 10000;
|
|
const int64_t OB_DS_MIN_QUERY_TIMEOUT = 1000;//Dynamic sampling requires a minimum timeout of 1ms.
|
|
//const int64_t OB_OPT_DS_ADAPTIVE_SAMPLE_MICRO_CNT = 200;
|
|
//const int64_t OB_OPT_DS_MAX_TIMES = 7;
|
|
|
|
class ObDynamicSampling
|
|
{
|
|
public:
|
|
explicit ObDynamicSampling(ObOptimizerContext &ctx, ObIAllocator &allocator) :
|
|
ctx_(&ctx),
|
|
allocator_(allocator),
|
|
db_name_(),
|
|
table_name_(),
|
|
alias_name_(),
|
|
partition_list_(),
|
|
macro_block_num_(0),
|
|
micro_block_num_(0),
|
|
sstable_row_count_(0),
|
|
memtable_row_count_(0),
|
|
sample_block_ratio_(0.0),
|
|
seed_(0),
|
|
sample_block_(),
|
|
basic_hints_(),
|
|
where_conditions_(),
|
|
ds_stat_items_(),
|
|
results_()
|
|
{}
|
|
|
|
int estimate_table_rowcount(const ObDSTableParam ¶m,
|
|
ObIArray<ObDSResultItem> &ds_result_items,
|
|
bool &throw_ds_error);
|
|
int add_table_info(const ObString &db_name,
|
|
const ObString &table_name,
|
|
const ObString &alias_name);
|
|
int add_basic_hint_info(ObSqlString &basic_hint_str,
|
|
int64_t query_timeout,
|
|
int64_t degree);
|
|
int add_block_sample_info(const double &sample_block_ratio,
|
|
const int64_t seed,
|
|
ObSqlString &sample_str);
|
|
int add_filter_infos(const ObIArray<ObRawExpr*> &filter_exprs,
|
|
bool only_column_namespace,
|
|
ObSqlString &filter_sql_str,
|
|
ObString &filter_str);
|
|
int calc_table_sample_block_ratio(const ObDSTableParam ¶m);
|
|
int add_partition_info(const ObIArray<PartInfo> &partition_infos,
|
|
ObSqlString &partition_sql_str,
|
|
ObString &partition_str);
|
|
static int print_filter_exprs(const ObSQLSessionInfo *session_info,
|
|
ObSchemaGetterGuard *schema_guard,
|
|
const ParamStore *param_store,
|
|
const ObIArray<ObRawExpr*> &filter_exprs,
|
|
bool only_column_namespace,
|
|
ObSqlString &expr_str);
|
|
static inline double revise_between_0_100(double num) {
|
|
return num < 0 ? 0 : (num > 100.0 ? 100.0 : num); }
|
|
const ObIArray<ObDSStatItem*> &get_ds_items() const { return ds_stat_items_; }
|
|
int64_t get_ds_item_size() const { return ds_stat_items_.count(); }
|
|
int add_result(ObObj &obj) { return results_.push_back(obj); }
|
|
int64_t get_micro_block_num() const { return micro_block_num_; }
|
|
|
|
template <class T>
|
|
int add_ds_stat_item(const T &item);
|
|
private:
|
|
int do_estimate_table_rowcount(const ObDSTableParam ¶m, bool &throw_ds_error);
|
|
int get_ds_table_result_from_cache(const ObDSTableParam ¶m,
|
|
ObOptDSStat::Key &key,
|
|
ObOptDSStatHandle &ds_stat_handle,
|
|
int64_t &cur_modified_dml_cnt);
|
|
int do_estimate_rowcount(ObSQLSessionInfo *session_info, const ObSqlString &raw_sql);
|
|
int estimte_rowcount(int64_t max_ds_timeout, int64_t degree, bool &throw_ds_error);
|
|
int pack(ObSqlString &raw_sql_str);
|
|
int gen_select_filed(ObSqlString &select_fields);
|
|
int estimate_table_block_count_and_row_count(const ObDSTableParam ¶m);
|
|
int get_all_tablet_id_and_object_id(const ObDSTableParam ¶m,
|
|
ObIArray<ObTabletID> &tablet_ids,
|
|
ObIArray<ObObjectID> &partition_ids);
|
|
int decode(double sample_ratio);
|
|
int construct_ds_stat_key(const ObDSTableParam ¶m,
|
|
ObDSResultItemType type,
|
|
const ObIArray<ObRawExpr*> &filter_exprs,
|
|
ObOptDSStat::Key &key);
|
|
int gen_partition_str(const ObIArray<PartInfo> &partition_infos, ObSqlString &partition_str);
|
|
int add_ds_stat_items_by_dml_info(const ObDSTableParam ¶m,
|
|
const int64_t cur_modified_dml_cnt,
|
|
const double stale_percent_threshold,
|
|
ObIArray<ObDSResultItem> &ds_result_items);
|
|
int do_add_ds_stat_item(const ObDSTableParam ¶m,
|
|
ObDSResultItem &result_item,
|
|
int64_t ds_column_cnt);
|
|
int add_ds_col_stat_item(const ObDSTableParam ¶m,
|
|
ObDSResultItem &result_item,
|
|
int64_t ds_column_cnt);
|
|
bool all_ds_col_stats_are_gathered(const ObDSTableParam ¶m,
|
|
const ObIArray<ObRawExpr*> &column_exprs,
|
|
const ObOptDSStat::DSColStats &ds_col_stats,
|
|
int64_t &ds_column_cnt);
|
|
int add_ds_table_stat_item(const ObIArray<ObRawExpr*> &filters,
|
|
ObDSStatItemType stat_item_type,
|
|
ObOptDSStat &ds_stat,
|
|
ObSqlString &filters_str);
|
|
int64_t get_dynamic_sampling_micro_block_num(const ObDSTableParam ¶m);
|
|
int get_table_dml_info(const uint64_t tenant_id,
|
|
const uint64_t table_id,
|
|
int64_t &cur_modified_dml_cnt,
|
|
double &stale_percent_threshold);
|
|
int add_ds_result_cache(ObIArray<ObDSResultItem> &ds_result_items);
|
|
int add_block_info_for_stat_items();
|
|
int get_ds_stat_items(const ObDSTableParam ¶m,
|
|
ObIArray<ObDSResultItem> &ds_result_items);
|
|
int prepare_and_store_session(ObSQLSessionInfo *session,
|
|
sql::ObSQLSessionInfo::StmtSavedValue *&session_value,
|
|
int64_t &nested_count,
|
|
bool &is_no_backslash_escapes,
|
|
transaction::ObTxDesc *&tx_desc);
|
|
int restore_session(ObSQLSessionInfo *session,
|
|
sql::ObSQLSessionInfo::StmtSavedValue *session_value,
|
|
int64_t nested_count,
|
|
bool is_no_backslash_escapes,
|
|
transaction::ObTxDesc *tx_desc);
|
|
|
|
private:
|
|
ObOptimizerContext *ctx_;
|
|
ObIAllocator &allocator_;
|
|
ObString db_name_;
|
|
ObString table_name_;
|
|
ObString alias_name_;
|
|
ObString partition_list_;
|
|
int64_t macro_block_num_;
|
|
int64_t micro_block_num_;
|
|
int64_t sstable_row_count_;
|
|
int64_t memtable_row_count_;
|
|
double sample_block_ratio_;
|
|
int64_t seed_;
|
|
ObString sample_block_;
|
|
ObString basic_hints_;
|
|
ObString where_conditions_;
|
|
ObSEArray<ObDSStatItem *, 4, common::ModulePageAllocator, true> ds_stat_items_;
|
|
ObSEArray<ObObj, 4, common::ModulePageAllocator, true> results_;
|
|
//following members will be used for dynamic sampling join in the future
|
|
//ObString join_type_;
|
|
//ObString join_conditions_;
|
|
//int64_t micro_total_count2_;
|
|
//bool is_left_sample_;
|
|
};
|
|
|
|
class ObDynamicSamplingUtils
|
|
{
|
|
public:
|
|
static int get_valid_dynamic_sampling_level(const ObSQLSessionInfo *session_info,
|
|
const ObTableDynamicSamplingHint *table_ds_hint,
|
|
const int64_t global_ds_level,
|
|
int64_t &ds_level,
|
|
int64_t &sample_block_cnt,
|
|
bool &specify_ds);
|
|
|
|
static int get_ds_table_param(ObOptimizerContext &ctx,
|
|
const ObLogPlan *log_plan,
|
|
const OptTableMeta *table_meta,
|
|
ObDSTableParam &ds_table_param,
|
|
bool &specify_ds);
|
|
|
|
static int check_ds_can_use_filters(const ObIArray<ObRawExpr*> &filters,
|
|
bool &no_use);
|
|
|
|
static const ObDSResultItem *get_ds_result_item(ObDSResultItemType type,
|
|
uint64_t index_id,
|
|
const ObIArray<ObDSResultItem> &ds_result_items);
|
|
|
|
static int64_t get_dynamic_sampling_max_timeout(ObOptimizerContext &ctx);
|
|
|
|
static int add_failed_ds_table_list(const uint64_t table_id,
|
|
const common::ObIArray<int64_t> &used_part_id,
|
|
common::ObIArray<ObDSFailTabInfo> &failed_list);
|
|
|
|
static bool is_ds_virtual_table(const int64_t table_id);
|
|
|
|
static int get_ds_table_degree(ObOptimizerContext &ctx,
|
|
const ObLogPlan *log_plan,
|
|
const uint64_t table_id,
|
|
const uint64_t ref_table_id,
|
|
int64_t °ree);
|
|
|
|
static bool check_is_failed_ds_table(const uint64_t table_id,
|
|
const common::ObIArray<int64_t> &used_part_id,
|
|
const common::ObIArray<ObDSFailTabInfo> &failed_list);
|
|
|
|
private:
|
|
static int check_ds_can_use_filter(const ObRawExpr *filter,
|
|
bool &no_use,
|
|
int64_t &total_expr_cnt);
|
|
|
|
static int get_ds_table_part_info(ObOptimizerContext &ctx,
|
|
const uint64_t ref_table_id,
|
|
const common::ObIArray<ObTabletID> &used_tablets,
|
|
bool &need_specify_partition,
|
|
ObIArray<PartInfo> &partition_infos);
|
|
|
|
}
|
|
;
|
|
|
|
|
|
// struct ObOptDSJoinParam {
|
|
// ObOptDSJoinParam() :
|
|
// left_table_param_(),
|
|
// right_table_param_(),
|
|
// join_type_(UNKNOWN_JOIN),
|
|
// max_ds_timeout_(0),
|
|
// join_conditions_(NULL)
|
|
// {}
|
|
|
|
// bool is_valid() const { return left_table_param_.is_valid() &&
|
|
// right_table_param_.is_valid() &&
|
|
// join_type_ != UNKNOWN_JOIN &&
|
|
// max_ds_timeout_ > 0 &&
|
|
// join_conditions_ != NULL; }
|
|
// ObOptDSBaseParam left_table_param_;
|
|
// ObOptDSBaseParam right_table_param_;
|
|
// ObJoinType join_type_;
|
|
// int64_t max_ds_timeout_;
|
|
// const ObIArray<ObRawExpr*> *join_conditions_;
|
|
|
|
// TO_STRING_KV(K(left_table_param_),
|
|
// K(right_table_param_),
|
|
// K(join_type_),
|
|
// K(max_ds_timeout_),
|
|
// KPC(join_conditions_));
|
|
// };
|
|
|
|
} // end of namespace common
|
|
} // end of namespace oceanbase
|
|
#endif /* _OB_DYNAMIC_SAMPLING_H_ */
|