[CP] Fix null hash value mismatch by using datum hash_func_v2

This commit is contained in:
hezuojiao
2024-02-08 14:51:04 +00:00
committed by ob-robot
parent 696afcdea8
commit 844dfb0162
6 changed files with 55 additions and 15 deletions

View File

@ -471,6 +471,19 @@ bool ObDatumFuncs::is_geometry(const ObObjType type)
return (tc == ObGeometryTC);
}
/**
* This function is primarily responsible for handling inconsistent hash computations
* for null types and the null values of those types, such as string, float, double, etc.
* It ensures that the hashing process treats null values and null type representations
* consistently across such data types, avoiding discrepancies in hash results.
*/
bool ObDatumFuncs::is_null_aware_hash_type(const ObObjType type)
{
const ObObjTypeClass tc = OBJ_TYPE_TO_CLASS[type];
return is_string_type(type) || is_json(type) || is_geometry(type) ||
(tc == ObUserDefinedSQLTC) || (tc == ObFloatTC) || (tc == ObDoubleTC);
}
OB_SERIALIZE_MEMBER(ObCmpFunc, ser_cmp_func_);
OB_SERIALIZE_MEMBER(ObHashFunc, ser_hash_func_, ser_batch_hash_func_);

View File

@ -44,6 +44,7 @@ public:
static bool is_varying_len_char_type(const ObObjType type, const ObCollationType cs_type) {
return (type == ObNVarchar2Type || (type == ObVarcharType && cs_type != CS_TYPE_BINARY));
}
static bool is_null_aware_hash_type(const ObObjType type);
static ObScale max_scale(const ObScale s1, const ObScale s2)
{
ObScale max_scale = SCALE_UNKNOWN_YET;

View File

@ -1907,6 +1907,9 @@ bool ObRelationalExprOperator::can_cmp_without_cast(ObExprResType type1,
if (ob_is_enum_or_set_type(type1.get_type())
&& ob_is_enum_or_set_type(type2.get_type())) {
need_no_cast = false;
} else if ((type1.is_null() && ObDatumFuncs::is_null_aware_hash_type(type2.get_type())) ||
(type2.is_null() && ObDatumFuncs::is_null_aware_hash_type(type1.get_type()))) {
need_no_cast = false;
} else {
if (ObDatumFuncs::is_string_type(type1.get_type())
&& ObDatumFuncs::is_string_type(type2.get_type())) {

View File

@ -1175,7 +1175,7 @@ int ObTransformConstPropagate::replace_internal(ObRawExpr *&cur_expr,
if (OB_FAIL(ret)) {
} else if (!can_replace) {
// do nothing
} else if (OB_FAIL(check_need_cast_when_replace(cur_expr, parent_exprs, need_cast))) {
} else if (OB_FAIL(check_need_cast_when_replace(cur_expr, const_expr, parent_exprs, need_cast))) {
LOG_WARN("failed to check need cast", K(ret));
} else if (need_cast && OB_FAIL(prepare_new_expr(expr_const_infos.at(i)))) {
LOG_WARN("failed to prepare new expr", K(ret));
@ -1298,7 +1298,7 @@ int ObTransformConstPropagate::do_remove_const_exec_param(ObRawExpr *&expr,
ObRawExpr *cast_expr = ref_expr;
bool need_cast = false;
trans_happened = true;
if (OB_FAIL(check_need_cast_when_replace(expr, parent_exprs, need_cast))) {
if (OB_FAIL(check_need_cast_when_replace(expr, ref_expr, parent_exprs, need_cast))) {
LOG_WARN("failed to check need cast", K(ret));
} else if (!need_cast && parent_exprs.count() != 0) {
expr = ref_expr;
@ -1341,6 +1341,7 @@ int ObTransformConstPropagate::do_remove_const_exec_param(ObRawExpr *&expr,
}
int ObTransformConstPropagate::check_need_cast_when_replace(ObRawExpr *expr,
ObRawExpr *const_expr,
ObIArray<ObRawExpr *> &parent_exprs,
bool &need_cast)
{
@ -1357,10 +1358,31 @@ int ObTransformConstPropagate::check_need_cast_when_replace(ObRawExpr *expr,
need_cast = true;
} else {
ObRawExpr *parent_expr = parent_exprs.at(parent_exprs.count() - 1);
need_cast = !(IS_COMPARISON_OP(parent_expr->get_expr_type()) ||
parent_expr->is_query_ref_expr() ||
parent_expr->is_win_func_expr() ||
T_OP_ROW == parent_expr->get_expr_type());
if (OB_ISNULL(parent_expr) || OB_ISNULL(const_expr)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("unexpected null", K(ret));
} else {
bool is_parent_cmp = IS_COMPARISON_OP(parent_expr->get_expr_type());
// To adapt to the behavior of casting NULL values for hash compare
// cast need to be added above NULL when its' parent expr is CMP_OP.
bool need_cast_null = false;
if (is_parent_cmp && const_expr->get_expr_type() == T_NULL) {
for (int64_t i = 0; !need_cast_null && OB_SUCC(ret) &&
i < parent_expr->get_param_count(); ++i) {
const ObRawExpr *param_expr = parent_expr->get_param_expr(i);
if (OB_ISNULL(param_expr)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("param expr is null");
} else if (ObDatumFuncs::is_null_aware_hash_type(param_expr->get_result_type().get_type())) {
need_cast_null = true;
}
}
}
need_cast = need_cast_null || !(is_parent_cmp ||
parent_expr->is_query_ref_expr() ||
parent_expr->is_win_func_expr() ||
T_OP_ROW == parent_expr->get_expr_type());
}
}
return ret;
}

View File

@ -275,6 +275,7 @@ private:
bool &trans_happened);
int check_need_cast_when_replace(ObRawExpr *expr,
ObRawExpr *const_expr,
ObIArray<ObRawExpr *> &parent_exprs,
bool &need_cast);

View File

@ -238,7 +238,7 @@ Query Plan
===============================================
Outputs & filters:
-------------------------------------
0 - output([NULL = t.float_t]), filter(nil), rowset=16
0 - output([cast(NULL, FLOAT(-1, -1)) = t.float_t]), filter(nil), rowset=16
access([t.float_t]), partitions(p0)
limit(1), offset(nil), is_index_back=false, is_global_index=false,
range_key([t.__pk_increment]), range(MIN ; MAX)always true
@ -257,7 +257,7 @@ Query Plan
===============================================
Outputs & filters:
-------------------------------------
0 - output([NULL = t.ufloat_t]), filter(nil), rowset=16
0 - output([cast(NULL, FLOAT UNSIGNED(-1, -1)) = t.ufloat_t]), filter(nil), rowset=16
access([t.ufloat_t]), partitions(p0)
limit(1), offset(nil), is_index_back=false, is_global_index=false,
range_key([t.__pk_increment]), range(MIN ; MAX)always true
@ -276,7 +276,7 @@ Query Plan
===============================================
Outputs & filters:
-------------------------------------
0 - output([NULL = t.double_t]), filter(nil), rowset=16
0 - output([cast(NULL, DOUBLE(-1, -1)) = t.double_t]), filter(nil), rowset=16
access([t.double_t]), partitions(p0)
limit(1), offset(nil), is_index_back=false, is_global_index=false,
range_key([t.__pk_increment]), range(MIN ; MAX)always true
@ -295,7 +295,7 @@ Query Plan
===============================================
Outputs & filters:
-------------------------------------
0 - output([NULL = t.udouble_t]), filter(nil), rowset=16
0 - output([cast(NULL, DOUBLE UNSIGNED(-1, -1)) = t.udouble_t]), filter(nil), rowset=16
access([t.udouble_t]), partitions(p0)
limit(1), offset(nil), is_index_back=false, is_global_index=false,
range_key([t.__pk_increment]), range(MIN ; MAX)always true
@ -447,7 +447,7 @@ Query Plan
===============================================
Outputs & filters:
-------------------------------------
0 - output([NULL = t.varchar_t]), filter(nil), rowset=16
0 - output([cast(NULL, VARCHAR(1048576)) = t.varchar_t]), filter(nil), rowset=16
access([t.varchar_t]), partitions(p0)
limit(1), offset(nil), is_index_back=false, is_global_index=false,
range_key([t.__pk_increment]), range(MIN ; MAX)always true
@ -466,7 +466,7 @@ Query Plan
===============================================
Outputs & filters:
-------------------------------------
0 - output([NULL = t.char_t]), filter(nil), rowset=16
0 - output([cast(NULL, CHAR(1048576)) = t.char_t]), filter(nil), rowset=16
access([t.char_t]), partitions(p0)
limit(1), offset(nil), is_index_back=false, is_global_index=false,
range_key([t.__pk_increment]), range(MIN ; MAX)always true
@ -485,7 +485,7 @@ Query Plan
===============================================
Outputs & filters:
-------------------------------------
0 - output([NULL = t.tinytext_t]), filter(nil), rowset=16
0 - output([cast(NULL, TINYTEXT(256)) = t.tinytext_t]), filter(nil), rowset=16
access([t.tinytext_t]), partitions(p0)
limit(1), offset(nil), is_index_back=false, is_global_index=false,
range_key([t.__pk_increment]), range(MIN ; MAX)always true
@ -504,7 +504,7 @@ Query Plan
===============================================
Outputs & filters:
-------------------------------------
0 - output([NULL = t.mediumtext_t]), filter(nil)
0 - output([cast(NULL, MEDIUMTEXT(16777216)) = t.mediumtext_t]), filter(nil)
access([t.mediumtext_t]), partitions(p0)
limit(1), offset(nil), is_index_back=false, is_global_index=false,
range_key([t.__pk_increment]), range(MIN ; MAX)always true
@ -523,7 +523,7 @@ Query Plan
===============================================
Outputs & filters:
-------------------------------------
0 - output([NULL = t.longtext_t]), filter(nil)
0 - output([cast(NULL, LONGTEXT(536870911)) = t.longtext_t]), filter(nil)
access([t.longtext_t]), partitions(p0)
limit(1), offset(nil), is_index_back=false, is_global_index=false,
range_key([t.__pk_increment]), range(MIN ; MAX)always true