[CP] Fix the bug that the collation is ignored when calulating selectivity

This commit is contained in:
xianyu-w
2024-05-06 13:27:58 +00:00
committed by ob-robot
parent 2361062f7c
commit 777e08e9fa
7 changed files with 151 additions and 41 deletions

View File

@ -3307,7 +3307,7 @@ static struct VarsInit{
}();
[&] (){
ObSysVars[234].default_value_ = "4.3.0.0" ;
ObSysVars[234].default_value_ = "4.3.1.0" ;
ObSysVars[234].info_ = "enabling a series of optimizer features based on an OceanBase release number" ;
ObSysVars[234].name_ = "optimizer_features_enable" ;
ObSysVars[234].data_type_ = ObVarcharType ;

View File

@ -3343,7 +3343,7 @@
"optimizer_features_enable": {
"id": 10150,
"name": "optimizer_features_enable",
"default_value": "4.3.0.0",
"default_value": "4.3.1.0",
"base_value": "",
"data_type": "varchar",
"info": "enabling a series of optimizer features based on an OceanBase release number",

View File

@ -371,9 +371,10 @@ int ObOptEstUtils::columns_has_unique_subset(const ObIArray<uint64_t> &full,
return ret;
}
double ObOptEstObjToScalar::convert_obj_to_scalar(const ObObj *obj)
int ObOptEstObjToScalar::convert_obj_to_scalar(const ObObj *obj, double &scalar)
{
double scalar = 0.0;
int ret = OB_SUCCESS;
scalar = 0.0;
if (NULL == obj) {
//NULL obj means a double 0.0 as scalar to return
@ -454,24 +455,24 @@ double ObOptEstObjToScalar::convert_obj_to_scalar(const ObObj *obj)
case ObLongTextType:
case ObVarcharType: { // charset: utf-8, collation: utf8_general_ci
const ObString &str = obj->get_varchar();
scalar = convert_string_to_scalar(str);
ret = convert_string_to_scalar(obj->get_collation_type(), str, scalar);
break;
}
case ObCharType:
case ObNCharType:
case ObNVarchar2Type: { // charset: utf-8, collation: utf8_general_ci
const ObString &str = obj->get_string();
scalar = convert_string_to_scalar(str);
ret = convert_string_to_scalar(obj->get_collation_type(), str, scalar);
break;
}
case ObHexStringType: {
const ObString &str = obj->get_varbinary();
scalar = convert_string_to_scalar(str);
ret = convert_string_to_scalar(obj->get_collation_type(), str, scalar);
break;
}
case ObRawType: {
const ObString &str = obj->get_raw();
scalar = convert_string_to_scalar(str);
ret = convert_string_to_scalar(obj->get_collation_type(), str, scalar);
break;
}
case ObIntervalYMType: {
@ -493,7 +494,7 @@ double ObOptEstObjToScalar::convert_obj_to_scalar(const ObObj *obj)
}
}
return scalar;
return ret;
}
int ObOptEstObjToScalar::convert_obj_to_double(const ObObj *obj, double &num)
@ -524,8 +525,8 @@ int ObOptEstObjToScalar::convert_obj_to_double(const ObObj *obj, double &num)
LOG_WARN("failed to get double from number", K(ret));
}
}
} else {
num = convert_obj_to_scalar(obj);
} else if (OB_FAIL(convert_obj_to_scalar(obj, num))) {
LOG_WARN("failed to convert obj to scalar", K(ret));
}
return ret;
}
@ -581,7 +582,12 @@ int ObOptEstObjToScalar::convert_obj_to_scalar_obj(const common::ObObj* obj, com
break;
}
default: {
out->set_double(convert_obj_to_scalar(obj));
double num = 0.0;
if (OB_FAIL(convert_obj_to_scalar(obj, num))) {
LOG_WARN("failed to convert obj to scalar", K(ret));
} else {
out->set_double(num);
}
break;
}
}
@ -598,7 +604,8 @@ int ObOptEstObjToScalar::convert_objs_to_scalars(
ObObj *min_out,
ObObj *max_out,
ObObj *start_out,
ObObj *end_out)
ObObj *end_out,
bool convert2sortkey)
{
int ret = OB_SUCCESS;
const static int64_t START_POS = 0;
@ -639,18 +646,19 @@ int ObOptEstObjToScalar::convert_objs_to_scalars(
//Special case for All String : truncate common header and use dynamic base
ObString str;
ObSEArray<ObString, 4> strs;
ObSEArray<ObCollationType, 4> cs_type;
if (start->is_string_type()
&& OB_FAIL(add_to_string_conversion_array(*start, strs, str_conv_map, START_POS))) {
&& OB_FAIL(add_to_string_conversion_array(*start, cs_type, strs, str_conv_map, START_POS))) {
LOG_WARN("Failed to add start to convert array", K(ret));
} else if (end->is_string_type()
&& OB_FAIL(add_to_string_conversion_array(*end, strs, str_conv_map, END_POS))) {
&& OB_FAIL(add_to_string_conversion_array(*end, cs_type, strs, str_conv_map, END_POS))) {
LOG_WARN("Failed to add end to convert array", K(ret));
} else if (with_min_max) {
if (min->is_string_type()
&& OB_FAIL(add_to_string_conversion_array(*min, strs, str_conv_map, MIN_POS))) {
&& OB_FAIL(add_to_string_conversion_array(*min, cs_type, strs, str_conv_map, MIN_POS))) {
LOG_WARN("Failed to add min to convert array", K(ret));
} else if (max->is_string_type()
&& OB_FAIL(add_to_string_conversion_array(*max, strs, str_conv_map, MAX_POS))) {
&& OB_FAIL(add_to_string_conversion_array(*max, cs_type, strs, str_conv_map, MAX_POS))) {
LOG_WARN("Failed to add min to convert array", K(ret));
} else {
//do nothing
@ -658,7 +666,12 @@ int ObOptEstObjToScalar::convert_objs_to_scalars(
}
if (OB_SUCC(ret)) {
if (strs.count() > 0) {
if (OB_FAIL(convert_strings_to_scalar(strs, string_scalars))) {
if (!convert2sortkey) {
for (int64_t i = 0; i < cs_type.count(); i ++) {
cs_type.at(i) = CS_TYPE_BINARY;
}
}
if (OB_FAIL(convert_strings_to_scalar(cs_type, strs, string_scalars))) {
LOG_WARN("Failed to convert string scalar", K(ret));
} else if (string_scalars.count() != strs.count()) {
ret = OB_ERR_UNEXPECTED;
@ -711,6 +724,7 @@ int ObOptEstObjToScalar::convert_objs_to_scalars(
int ObOptEstObjToScalar::add_to_string_conversion_array(
const ObObj &strobj,
common::ObIArray<ObCollationType> &cs_type,
ObIArray<common::ObString> &arr,
uint64_t &convertable_map,
int64_t pos)
@ -727,6 +741,8 @@ int ObOptEstObjToScalar::add_to_string_conversion_array(
LOG_WARN("Failed to get string", K(ret));
} else if (OB_FAIL(arr.push_back(str))) {
LOG_WARN("Failed to push back", K(ret));
} else if (OB_FAIL(cs_type.push_back(strobj.get_collation_type()))) {
LOG_WARN("failed to push back", K(ret));
} else {
convertable_map |= (0x1 << pos);
}
@ -734,6 +750,7 @@ int ObOptEstObjToScalar::add_to_string_conversion_array(
}
int ObOptEstObjToScalar::convert_strings_to_scalar(
const common::ObIArray<ObCollationType> &cs_type,
const common::ObIArray<common::ObString> &origin_strs,
common::ObIArray<double> &scalars)
{
@ -742,13 +759,29 @@ int ObOptEstObjToScalar::convert_strings_to_scalar(
double base = 256.0;
uint8_t offset = 0;
int64_t common_prefix_length = 0;
if (OB_FAIL(find_common_prefix_len(origin_strs, common_prefix_length))) {
ObArenaAllocator tmp_alloc("ObOptEstUtils");
common::ObSEArray<common::ObString, 4> sort_keys;
if (OB_UNLIKELY(origin_strs.count() != cs_type.count())) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("unexpected cs type", K(ret));
}
for (int64_t i = 0; OB_SUCC(ret) && i < origin_strs.count(); i ++)
{
ObString *sort_key = sort_keys.alloc_place_holder();
if (OB_ISNULL(sort_key)) {
ret = OB_ALLOCATE_MEMORY_FAILED;
LOG_WARN("failed to allocate", K(ret));
} else if (OB_FAIL(get_string_sort_key(tmp_alloc, cs_type.at(i), origin_strs.at(i), *sort_key))) {
LOG_WARN("failed to get sort key", K(ret));
}
}
if (FAILEDx(find_common_prefix_len(sort_keys, common_prefix_length))) {
LOG_WARN("Failed to find common prefix length", K(ret));
} else if (OB_FAIL(find_string_scalar_offset_base(origin_strs, common_prefix_length, offset, base))) {
} else if (OB_FAIL(find_string_scalar_offset_base(sort_keys, common_prefix_length, offset, base))) {
LOG_WARN("Failed to find offset and base", K(ret));
} else {
for (int64_t i = 0; OB_SUCC(ret) && i < origin_strs.count(); ++i) {
double scalar = convert_string_to_scalar(origin_strs.at(i),
double scalar = convert_string_to_scalar(sort_keys.at(i),
common_prefix_length,
offset,
base);
@ -859,6 +892,42 @@ int ObOptEstObjToScalar::find_string_scalar_offset_base(
return ret;
}
int ObOptEstObjToScalar::get_string_sort_key(ObIAllocator &alloc, ObCollationType cs_type,
const common::ObString &str, common::ObString &sort_key)
{
int ret = OB_SUCCESS;
const ObCharsetInfo *cs = ObCharset::get_charset(cs_type);
if (ObCharset::is_bin_sort(cs_type) || str.empty() ||
NULL == cs || NULL == cs->coll) {
sort_key = str;
} else {
size_t buf_len = cs->coll->strnxfrmlen(cs, str.length()) * cs->mbmaxlen;
ObArrayWrap<char> buffer;
bool is_valid_character = false;
if (OB_FAIL(buffer.allocate_array(alloc, buf_len))) {
LOG_WARN("failed to allocate", K(ret));
} else {
size_t sort_key_len = ObCharset::sortkey(cs_type, str.ptr(), str.length(),
buffer.get_data(), buf_len, is_valid_character);
sort_key.assign_ptr(buffer.get_data(), sort_key_len);
}
}
return ret;
}
int ObOptEstObjToScalar::convert_string_to_scalar(ObCollationType cs_type, const common::ObString &str, double &scalar)
{
int ret = OB_SUCCESS;
ObString sort_key;
ObArenaAllocator tmp_alloc("ObOptEstUtils");
if (OB_FAIL(get_string_sort_key(tmp_alloc, cs_type, str, sort_key))) {
LOG_WARN("failed to get sort key", K(ret));
} else {
scalar = convert_string_to_scalar(sort_key);
}
return ret;
}
double ObOptEstObjToScalar::convert_string_to_scalar(
const common::ObString &str,
int64_t prefix_len,

View File

@ -136,11 +136,12 @@ public:
common::ObObj *min_out,
common::ObObj *max_out,
common::ObObj *start_out,
common::ObObj *end_out);
common::ObObj *end_out,
bool convert2sortkey = true);
/////////////Start convert obj to scalar related function//////
// @param obj
// @return
static double convert_obj_to_scalar(const common::ObObj* obj);
static int convert_obj_to_scalar(const common::ObObj* obj, double &scalar_value);
// double type cannot represent min (max can be represented using Double.INF).
@ -152,13 +153,15 @@ public:
static int convert_string_to_scalar_for_number(const common::ObString &str, double &scala);
private:
static int add_to_string_conversion_array(const common::ObObj &strobj,
common::ObIArray<ObCollationType> &cs_type,
common::ObIArray<common::ObString> &arr,
uint64_t &convertable_map,
int64_t pos);
// 1, find common prefix length of strings
// 2, find dynamic base and offset of strings
// 3, use dynamic base and offset to convert strings to scalars
static int convert_strings_to_scalar(const common::ObIArray<common::ObString> &origin_strs,
static int convert_strings_to_scalar(const common::ObIArray<ObCollationType> &cs_type,
const common::ObIArray<common::ObString> &origin_strs,
common::ObIArray<double> &scalars);
static double convert_string_to_scalar(const common::ObString &str,
@ -166,6 +169,10 @@ private:
uint8_t offset = 0,
double base = 256.0);
static int get_string_sort_key(ObIAllocator &alloc, ObCollationType cs_type, const common::ObString &str, common::ObString &sort_key);
static int convert_string_to_scalar(ObCollationType cs_type, const common::ObString &str, double &scalar);
static int find_common_prefix_len(const common::ObIArray<common::ObString> &strs,
int64_t &length);
static inline void expand_range(uint8_t &min, uint8_t &max, uint8_t rmin, uint8_t rmax)

View File

@ -1151,7 +1151,8 @@ int ObOptSelectivity::get_column_range_sel(const OptTableMetas &table_metas,
double hist_scale = 0.0;
if (OB_FAIL(get_column_hist_scale(table_metas, ctx, col_expr, hist_scale))) {
LOG_WARN("failed to get columnn hist sample scale", K(ret));
} else if (OB_FAIL(get_range_sel_by_histogram(handler.stat_->get_histogram(),
} else if (OB_FAIL(get_range_sel_by_histogram(ctx,
handler.stat_->get_histogram(),
ranges,
true,
hist_scale,
@ -1340,6 +1341,10 @@ int ObOptSelectivity::calc_column_range_selectivity(const OptTableMetas &table_m
ObObj *new_start_obj = NULL;
ObObj *new_end_obj = NULL;
ObArenaAllocator tmp_alloc("ObOptSel");
bool convert2sortkey =
(ctx.get_compat_version() >= COMPAT_VERSION_4_2_1_BP5 && ctx.get_compat_version() < COMPAT_VERSION_4_2_2) ||
(ctx.get_compat_version() >= COMPAT_VERSION_4_2_4 && ctx.get_compat_version() < COMPAT_VERSION_4_3_0) ||
ctx.get_compat_version() >= COMPAT_VERSION_4_3_1;
if (OB_FAIL(ObDbmsStatsUtils::truncate_string_for_opt_stats(&start_obj, tmp_alloc, new_start_obj)) ||
OB_FAIL(ObDbmsStatsUtils::truncate_string_for_opt_stats(&end_obj, tmp_alloc, new_end_obj))) {
LOG_WARN("failed to convert valid obj for opt stats", K(ret), K(start_obj), K(end_obj),
@ -1352,7 +1357,8 @@ int ObOptSelectivity::calc_column_range_selectivity(const OptTableMetas &table_m
} else if (OB_FAIL(ObOptEstObjToScalar::convert_objs_to_scalars(&minobj, &maxobj,
new_start_obj, new_end_obj,
&minscalar, &maxscalar,
&startscalar, &endscalar))) {
&startscalar, &endscalar,
convert2sortkey))) {
LOG_WARN("failed to convert obj to scalars", K(ret));
} else if (OB_FAIL(do_calc_range_selectivity(minscalar.get_double(),
maxscalar.get_double(),
@ -1384,8 +1390,13 @@ int ObOptSelectivity::calc_column_range_selectivity(const OptTableMetas &table_m
//startobj and endobj cannot be min/max in this branch, no need to defend
ObObj startscalar;
ObObj endscalar;
bool convert2sortkey =
(ctx.get_compat_version() >= COMPAT_VERSION_4_2_1_BP5 && ctx.get_compat_version() < COMPAT_VERSION_4_2_2) ||
(ctx.get_compat_version() >= COMPAT_VERSION_4_2_4 && ctx.get_compat_version() < COMPAT_VERSION_4_3_0) ||
ctx.get_compat_version() >= COMPAT_VERSION_4_3_1;
if (OB_FAIL(ObOptEstObjToScalar::convert_objs_to_scalars(NULL, NULL, &start_obj, &end_obj,
NULL, NULL, &startscalar, &endscalar))) {
NULL, NULL, &startscalar, &endscalar,
convert2sortkey))) {
LOG_WARN("failed to convert objs to scalars", K(ret));
} else {
LOG_TRACE("range column est", K(start_obj), K(end_obj), K(startscalar), K(endscalar));
@ -1910,7 +1921,8 @@ int ObOptSelectivity::get_equal_pred_sel(const ObHistogram &histogram,
return ret;
}
int ObOptSelectivity::get_range_sel_by_histogram(const ObHistogram &histogram,
int ObOptSelectivity::get_range_sel_by_histogram(const OptSelectivityCtx &ctx,
const ObHistogram &histogram,
const ObQueryRangeArray &ranges,
bool no_whole_range,
const double sample_size_scale,
@ -1945,7 +1957,8 @@ int ObOptSelectivity::get_range_sel_by_histogram(const ObHistogram &histogram,
} else if (OB_ISNULL(new_startobj) || OB_ISNULL(new_endobj)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("get unexpected null", K(ret), K(new_startobj), K(new_endobj));
} else if (OB_FAIL(get_range_pred_sel(histogram,
} else if (OB_FAIL(get_range_pred_sel(ctx,
histogram,
*new_startobj,
range->border_flag_.inclusive_start(),
*new_endobj,
@ -1984,7 +1997,8 @@ int ObOptSelectivity::get_range_sel_by_histogram(const ObHistogram &histogram,
* 2. b[count()-1].ev < maxv, all elements in the histograms satisfy the predicate
*
*/
int ObOptSelectivity::get_less_pred_sel(const ObHistogram &histogram,
int ObOptSelectivity::get_less_pred_sel(const OptSelectivityCtx &ctx,
const ObHistogram &histogram,
const ObObj &maxv,
const bool inclusive,
double &density)
@ -2015,9 +2029,14 @@ int ObOptSelectivity::get_less_pred_sel(const ObHistogram &histogram,
ObObj minobj(histogram.get(idx).endpoint_value_);
ObObj maxobj(histogram.get(idx+1).endpoint_value_);
ObObj startobj(minobj), endobj(maxv);
bool convert2sortkey =
(ctx.get_compat_version() >= COMPAT_VERSION_4_2_1_BP5 && ctx.get_compat_version() < COMPAT_VERSION_4_2_2) ||
(ctx.get_compat_version() >= COMPAT_VERSION_4_2_4 && ctx.get_compat_version() < COMPAT_VERSION_4_3_0) ||
ctx.get_compat_version() >= COMPAT_VERSION_4_3_1;
if (OB_FAIL(ObOptEstObjToScalar::convert_objs_to_scalars(
&minobj, &maxobj, &startobj, &endobj,
&minscalar, &maxscalar, &startscalar, &endscalar))) {
&minscalar, &maxscalar, &startscalar, &endscalar,
convert2sortkey))) {
LOG_WARN("failed to convert objs to scalars", K(ret));
} else if (maxscalar.get_double() - minscalar.get_double() > OB_DOUBLE_EPSINON) {
last_bucket_count = histogram.get(idx+1).endpoint_num_ -
@ -2043,14 +2062,16 @@ int ObOptSelectivity::get_less_pred_sel(const ObHistogram &histogram,
* II. f(minv <= col) can be converted as sample_size - f(col < minv)
*
*/
int ObOptSelectivity::get_greater_pred_sel(const ObHistogram &histogram,
int ObOptSelectivity::get_greater_pred_sel(const OptSelectivityCtx &ctx,
const ObHistogram &histogram,
const ObObj &minv,
const bool inclusive,
double &density)
{
int ret = OB_SUCCESS;
double less_sel = 0;
if (OB_FAIL(get_less_pred_sel(histogram,
if (OB_FAIL(get_less_pred_sel(ctx,
histogram,
minv,
!inclusive,
less_sel))) {
@ -2070,7 +2091,8 @@ int ObOptSelectivity::get_greater_pred_sel(const ObHistogram &histogram,
* the problem can be converted as f(col <(=) maxv) + f(minv <(=) col) - sample_size
*
*/
int ObOptSelectivity::get_range_pred_sel(const ObHistogram &histogram,
int ObOptSelectivity::get_range_pred_sel(const OptSelectivityCtx &ctx,
const ObHistogram &histogram,
const ObObj &minv,
const bool min_inclusive,
const ObObj &maxv,
@ -2080,9 +2102,9 @@ int ObOptSelectivity::get_range_pred_sel(const ObHistogram &histogram,
int ret = OB_SUCCESS;
double less_sel = 0;
double greater_sel = 0;
if (OB_FAIL(get_greater_pred_sel(histogram, minv, min_inclusive, greater_sel))) {
if (OB_FAIL(get_greater_pred_sel(ctx, histogram, minv, min_inclusive, greater_sel))) {
LOG_WARN("failed to get greater predicate selectivity", K(ret));
} else if (OB_FAIL(get_less_pred_sel(histogram, maxv, max_inclusive, less_sel))) {
} else if (OB_FAIL(get_less_pred_sel(ctx, histogram, maxv, max_inclusive, less_sel))) {
LOG_WARN("failed to get less predicate selectivity", K(ret));
} else {
density = less_sel + greater_sel - 1.0;

View File

@ -115,6 +115,11 @@ class OptSelectivityCtx
FilterDependencyType get_dependency_type() const { return dependency_type_; }
void set_dependency_type(FilterDependencyType type) { dependency_type_ = type; }
uint64_t get_compat_version() const {
return OB_ISNULL(opt_ctx_.get_query_ctx()) ? 0 :
opt_ctx_.get_query_ctx()->optimizer_features_enable_version_;
}
void init_op_ctx(const EqualSets *equal_sets, const double current_rows,
FilterDependencyType dependency_type = FilterDependencyType::INDEPENDENT)
{
@ -635,23 +640,27 @@ public:
const double sample_size_scale,
double &density);
static int get_range_sel_by_histogram(const common::ObHistogram &histogram,
static int get_range_sel_by_histogram(const OptSelectivityCtx &ctx,
const common::ObHistogram &histogram,
const ObQueryRangeArray &ranges,
bool no_whole_range,
const double sample_size_scale,
double &selectivity);
static int get_less_pred_sel(const ObHistogram &histogram,
static int get_less_pred_sel(const OptSelectivityCtx &ctx,
const ObHistogram &histogram,
const ObObj &maxv,
const bool inclusive,
double &density);
static int get_greater_pred_sel(const ObHistogram &histogram,
static int get_greater_pred_sel(const OptSelectivityCtx &ctx,
const ObHistogram &histogram,
const ObObj &minv,
const bool inclusive,
double &density);
static int get_range_pred_sel(const ObHistogram &histogram,
static int get_range_pred_sel(const OptSelectivityCtx &ctx,
const ObHistogram &histogram,
const ObObj &minv,
const bool min_inclusive,
const ObObj &maxv,

View File

@ -210,10 +210,13 @@ struct ObGlobalHint {
#define COMPAT_VERSION_4_2_1 (oceanbase::common::cal_version(4, 2, 1, 0))
//#define COMPAT_VERSION_4_2_1_BP3 (oceanbase::common::cal_version(4, 2, 1, 3))
#define COMPAT_VERSION_4_2_1_BP4 (oceanbase::common::cal_version(4, 2, 1, 4))
#define COMPAT_VERSION_4_2_1_BP5 (oceanbase::common::cal_version(4, 2, 1, 5))
#define COMPAT_VERSION_4_2_2 (oceanbase::common::cal_version(4, 2, 2, 0))
#define COMPAT_VERSION_4_2_3 (oceanbase::common::cal_version(4, 2, 3, 0))
#define COMPAT_VERSION_4_2_4 (oceanbase::common::cal_version(4, 2, 4, 0))
#define COMPAT_VERSION_4_3_0 (oceanbase::common::cal_version(4, 3, 0, 0))
#define LASTED_COMPAT_VERSION COMPAT_VERSION_4_3_0
#define COMPAT_VERSION_4_3_1 (oceanbase::common::cal_version(4, 3, 1, 0))
#define LASTED_COMPAT_VERSION COMPAT_VERSION_4_3_1
static bool is_valid_opt_features_version(uint64_t version)
{ return COMPAT_VERSION_4_0 <= version && LASTED_COMPAT_VERSION >= version; }