diff --git a/src/share/datum/ob_datum_funcs.cpp b/src/share/datum/ob_datum_funcs.cpp index e3e4ed1a27..9e4b65d494 100644 --- a/src/share/datum/ob_datum_funcs.cpp +++ b/src/share/datum/ob_datum_funcs.cpp @@ -471,6 +471,19 @@ bool ObDatumFuncs::is_geometry(const ObObjType type) return (tc == ObGeometryTC); } +/** + * This function is primarily responsible for handling inconsistent hash computations + * for null types and the null values of those types, such as string, float, double, etc. + * It ensures that the hashing process treats null values and null type representations + * consistently across such data types, avoiding discrepancies in hash results. + */ +bool ObDatumFuncs::is_null_aware_hash_type(const ObObjType type) +{ + const ObObjTypeClass tc = OBJ_TYPE_TO_CLASS[type]; + return is_string_type(type) || is_json(type) || is_geometry(type) || + (tc == ObUserDefinedSQLTC) || (tc == ObFloatTC) || (tc == ObDoubleTC); +} + OB_SERIALIZE_MEMBER(ObCmpFunc, ser_cmp_func_); OB_SERIALIZE_MEMBER(ObHashFunc, ser_hash_func_, ser_batch_hash_func_); diff --git a/src/share/datum/ob_datum_funcs.h b/src/share/datum/ob_datum_funcs.h index e98b2456ae..4e7726fff2 100644 --- a/src/share/datum/ob_datum_funcs.h +++ b/src/share/datum/ob_datum_funcs.h @@ -44,6 +44,7 @@ public: static bool is_varying_len_char_type(const ObObjType type, const ObCollationType cs_type) { return (type == ObNVarchar2Type || (type == ObVarcharType && cs_type != CS_TYPE_BINARY)); } + static bool is_null_aware_hash_type(const ObObjType type); static ObScale max_scale(const ObScale s1, const ObScale s2) { ObScale max_scale = SCALE_UNKNOWN_YET; diff --git a/src/sql/engine/expr/ob_expr_operator.cpp b/src/sql/engine/expr/ob_expr_operator.cpp index ad6e172b03..2d126ee868 100644 --- a/src/sql/engine/expr/ob_expr_operator.cpp +++ b/src/sql/engine/expr/ob_expr_operator.cpp @@ -1907,6 +1907,9 @@ bool ObRelationalExprOperator::can_cmp_without_cast(ObExprResType type1, if (ob_is_enum_or_set_type(type1.get_type()) && ob_is_enum_or_set_type(type2.get_type())) { need_no_cast = false; + } else if ((type1.is_null() && ObDatumFuncs::is_null_aware_hash_type(type2.get_type())) || + (type2.is_null() && ObDatumFuncs::is_null_aware_hash_type(type1.get_type()))) { + need_no_cast = false; } else { if (ObDatumFuncs::is_string_type(type1.get_type()) && ObDatumFuncs::is_string_type(type2.get_type())) { diff --git a/src/sql/rewrite/ob_transform_const_propagate.cpp b/src/sql/rewrite/ob_transform_const_propagate.cpp index f00633a00b..50cec561a3 100644 --- a/src/sql/rewrite/ob_transform_const_propagate.cpp +++ b/src/sql/rewrite/ob_transform_const_propagate.cpp @@ -1175,7 +1175,7 @@ int ObTransformConstPropagate::replace_internal(ObRawExpr *&cur_expr, if (OB_FAIL(ret)) { } else if (!can_replace) { // do nothing - } else if (OB_FAIL(check_need_cast_when_replace(cur_expr, parent_exprs, need_cast))) { + } else if (OB_FAIL(check_need_cast_when_replace(cur_expr, const_expr, parent_exprs, need_cast))) { LOG_WARN("failed to check need cast", K(ret)); } else if (need_cast && OB_FAIL(prepare_new_expr(expr_const_infos.at(i)))) { LOG_WARN("failed to prepare new expr", K(ret)); @@ -1298,7 +1298,7 @@ int ObTransformConstPropagate::do_remove_const_exec_param(ObRawExpr *&expr, ObRawExpr *cast_expr = ref_expr; bool need_cast = false; trans_happened = true; - if (OB_FAIL(check_need_cast_when_replace(expr, parent_exprs, need_cast))) { + if (OB_FAIL(check_need_cast_when_replace(expr, ref_expr, parent_exprs, need_cast))) { LOG_WARN("failed to check need cast", K(ret)); } else if (!need_cast && parent_exprs.count() != 0) { expr = ref_expr; @@ -1341,6 +1341,7 @@ int ObTransformConstPropagate::do_remove_const_exec_param(ObRawExpr *&expr, } int ObTransformConstPropagate::check_need_cast_when_replace(ObRawExpr *expr, + ObRawExpr *const_expr, ObIArray &parent_exprs, bool &need_cast) { @@ -1357,10 +1358,31 @@ int ObTransformConstPropagate::check_need_cast_when_replace(ObRawExpr *expr, need_cast = true; } else { ObRawExpr *parent_expr = parent_exprs.at(parent_exprs.count() - 1); - need_cast = !(IS_COMPARISON_OP(parent_expr->get_expr_type()) || - parent_expr->is_query_ref_expr() || - parent_expr->is_win_func_expr() || - T_OP_ROW == parent_expr->get_expr_type()); + if (OB_ISNULL(parent_expr) || OB_ISNULL(const_expr)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected null", K(ret)); + } else { + bool is_parent_cmp = IS_COMPARISON_OP(parent_expr->get_expr_type()); + // To adapt to the behavior of casting NULL values for hash compare + // cast need to be added above NULL when its' parent expr is CMP_OP. + bool need_cast_null = false; + if (is_parent_cmp && const_expr->get_expr_type() == T_NULL) { + for (int64_t i = 0; !need_cast_null && OB_SUCC(ret) && + i < parent_expr->get_param_count(); ++i) { + const ObRawExpr *param_expr = parent_expr->get_param_expr(i); + if (OB_ISNULL(param_expr)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("param expr is null"); + } else if (ObDatumFuncs::is_null_aware_hash_type(param_expr->get_result_type().get_type())) { + need_cast_null = true; + } + } + } + need_cast = need_cast_null || !(is_parent_cmp || + parent_expr->is_query_ref_expr() || + parent_expr->is_win_func_expr() || + T_OP_ROW == parent_expr->get_expr_type()); + } } return ret; } diff --git a/src/sql/rewrite/ob_transform_const_propagate.h b/src/sql/rewrite/ob_transform_const_propagate.h index b8660eb49b..5de3c87b44 100644 --- a/src/sql/rewrite/ob_transform_const_propagate.h +++ b/src/sql/rewrite/ob_transform_const_propagate.h @@ -275,6 +275,7 @@ private: bool &trans_happened); int check_need_cast_when_replace(ObRawExpr *expr, + ObRawExpr *const_expr, ObIArray &parent_exprs, bool &need_cast); diff --git a/tools/deploy/mysql_test/test_suite/static_engine/r/mysql/static_engine_cmp_null.result b/tools/deploy/mysql_test/test_suite/static_engine/r/mysql/static_engine_cmp_null.result index 0caf57d02f..8133e6558e 100644 --- a/tools/deploy/mysql_test/test_suite/static_engine/r/mysql/static_engine_cmp_null.result +++ b/tools/deploy/mysql_test/test_suite/static_engine/r/mysql/static_engine_cmp_null.result @@ -238,7 +238,7 @@ Query Plan =============================================== Outputs & filters: ------------------------------------- - 0 - output([NULL = t.float_t]), filter(nil), rowset=16 + 0 - output([cast(NULL, FLOAT(-1, -1)) = t.float_t]), filter(nil), rowset=16 access([t.float_t]), partitions(p0) limit(1), offset(nil), is_index_back=false, is_global_index=false, range_key([t.__pk_increment]), range(MIN ; MAX)always true @@ -257,7 +257,7 @@ Query Plan =============================================== Outputs & filters: ------------------------------------- - 0 - output([NULL = t.ufloat_t]), filter(nil), rowset=16 + 0 - output([cast(NULL, FLOAT UNSIGNED(-1, -1)) = t.ufloat_t]), filter(nil), rowset=16 access([t.ufloat_t]), partitions(p0) limit(1), offset(nil), is_index_back=false, is_global_index=false, range_key([t.__pk_increment]), range(MIN ; MAX)always true @@ -276,7 +276,7 @@ Query Plan =============================================== Outputs & filters: ------------------------------------- - 0 - output([NULL = t.double_t]), filter(nil), rowset=16 + 0 - output([cast(NULL, DOUBLE(-1, -1)) = t.double_t]), filter(nil), rowset=16 access([t.double_t]), partitions(p0) limit(1), offset(nil), is_index_back=false, is_global_index=false, range_key([t.__pk_increment]), range(MIN ; MAX)always true @@ -295,7 +295,7 @@ Query Plan =============================================== Outputs & filters: ------------------------------------- - 0 - output([NULL = t.udouble_t]), filter(nil), rowset=16 + 0 - output([cast(NULL, DOUBLE UNSIGNED(-1, -1)) = t.udouble_t]), filter(nil), rowset=16 access([t.udouble_t]), partitions(p0) limit(1), offset(nil), is_index_back=false, is_global_index=false, range_key([t.__pk_increment]), range(MIN ; MAX)always true @@ -447,7 +447,7 @@ Query Plan =============================================== Outputs & filters: ------------------------------------- - 0 - output([NULL = t.varchar_t]), filter(nil), rowset=16 + 0 - output([cast(NULL, VARCHAR(1048576)) = t.varchar_t]), filter(nil), rowset=16 access([t.varchar_t]), partitions(p0) limit(1), offset(nil), is_index_back=false, is_global_index=false, range_key([t.__pk_increment]), range(MIN ; MAX)always true @@ -466,7 +466,7 @@ Query Plan =============================================== Outputs & filters: ------------------------------------- - 0 - output([NULL = t.char_t]), filter(nil), rowset=16 + 0 - output([cast(NULL, CHAR(1048576)) = t.char_t]), filter(nil), rowset=16 access([t.char_t]), partitions(p0) limit(1), offset(nil), is_index_back=false, is_global_index=false, range_key([t.__pk_increment]), range(MIN ; MAX)always true @@ -485,7 +485,7 @@ Query Plan =============================================== Outputs & filters: ------------------------------------- - 0 - output([NULL = t.tinytext_t]), filter(nil), rowset=16 + 0 - output([cast(NULL, TINYTEXT(256)) = t.tinytext_t]), filter(nil), rowset=16 access([t.tinytext_t]), partitions(p0) limit(1), offset(nil), is_index_back=false, is_global_index=false, range_key([t.__pk_increment]), range(MIN ; MAX)always true @@ -504,7 +504,7 @@ Query Plan =============================================== Outputs & filters: ------------------------------------- - 0 - output([NULL = t.mediumtext_t]), filter(nil) + 0 - output([cast(NULL, MEDIUMTEXT(16777216)) = t.mediumtext_t]), filter(nil) access([t.mediumtext_t]), partitions(p0) limit(1), offset(nil), is_index_back=false, is_global_index=false, range_key([t.__pk_increment]), range(MIN ; MAX)always true @@ -523,7 +523,7 @@ Query Plan =============================================== Outputs & filters: ------------------------------------- - 0 - output([NULL = t.longtext_t]), filter(nil) + 0 - output([cast(NULL, LONGTEXT(536870911)) = t.longtext_t]), filter(nil) access([t.longtext_t]), partitions(p0) limit(1), offset(nil), is_index_back=false, is_global_index=false, range_key([t.__pk_increment]), range(MIN ; MAX)always true