Fix regexp function bug

This commit is contained in:
obdev
2023-04-13 09:05:30 +00:00
committed by ob-robot
parent 48c57d0564
commit c1ac596d14
13 changed files with 146 additions and 32 deletions

View File

@ -76,10 +76,19 @@ int ObExprRegexp::calc_result_type2(ObExprResType &type,
} else if (OB_UNLIKELY(!is_type_valid(type1.get_type()) || !is_type_valid(type2.get_type()))) {
ret = OB_INVALID_ARGUMENT;
LOG_WARN("the param is not castable", K(ret), K(type1), K(type2));
} else if (OB_FAIL(ObCharset::aggregate_collation(type1.get_calc_collation_level(),
type1.get_calc_collation_type(),
type2.get_calc_collation_level(),
type2.get_calc_collation_type(),
} else if ((ObExprRegexContext::is_binary_string(type1) || ObExprRegexContext::is_binary_string(type2))
&& (!ObExprRegexContext::is_binary_compatible(type1) || !ObExprRegexContext::is_binary_compatible(type2))) {
const char *coll_name1 = ObCharset::collation_name(type1.get_collation_type());
const char *coll_name2 = ObCharset::collation_name(type2.get_collation_type());
ObString collation1 = ObString::make_string(coll_name1);
ObString collation2 = ObString::make_string(coll_name2);
ret = OB_ERR_MYSQL_CHARACTER_SET_MISMATCH;
LOG_USER_ERROR(OB_ERR_MYSQL_CHARACTER_SET_MISMATCH, collation1.length(), collation1.ptr(), collation2.length(), collation2.ptr());
LOG_WARN("If one of the params is binary string, all of the params should be implicitly castable to binary charset.", K(ret), K(type1), K(type2));
} else if (OB_FAIL(ObCharset::aggregate_collation(type1.get_collation_level(),
type1.get_collation_type(),
type2.get_collation_level(),
type2.get_collation_type(),
res_cs_level,
res_cs_type))) {
LOG_WARN("fail to aggregate collation", K(ret), K(type1), K(type2));

View File

@ -860,5 +860,33 @@ int ObExprRegexContext::check_need_utf8(ObRawExpr *expr, bool &need_utf8)
return ret;
}
int ObExprRegexContext::check_binary_compatible(const ObExprResType *types, int64_t num) {
int ret = OB_SUCCESS;
if (OB_ISNULL(types)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("unexpected null", K(ret));
} else {
int64_t binary_param_idx = -1;
int64_t nobinary_param_idx = -1;
for (int64_t i = 0; i < num; ++i) {
if (ObExprRegexContext::is_binary_string(types[i])) {
binary_param_idx = i;
} else if (!ObExprRegexContext::is_binary_compatible(types[i])) {
nobinary_param_idx = i;
}
}
if (-1 != binary_param_idx && -1 != nobinary_param_idx) {
const char *coll_name1 = ObCharset::collation_name(types[binary_param_idx].get_collation_type());
const char *coll_name2 = ObCharset::collation_name(types[nobinary_param_idx].get_collation_type());
ObString collation1 = ObString::make_string(coll_name1);
ObString collation2 = ObString::make_string(coll_name2);
ret = OB_ERR_MYSQL_CHARACTER_SET_MISMATCH;
LOG_USER_ERROR(OB_ERR_MYSQL_CHARACTER_SET_MISMATCH, collation1.length(), collation1.ptr(), collation2.length(), collation2.ptr());
LOG_WARN("If one of the params is binary string, all of the params should be implicitly castable to binary charset.", K(ret), K(*types));
}
}
return ret;
}
}
}

View File

@ -104,8 +104,18 @@ public:
uint32_t &flags);
static int check_need_utf8(ObRawExpr *expr, bool &is_nstring);
static inline bool is_binary_string(const ObExprResType &type) {
return CS_TYPE_BINARY == type.get_collation_type() && ob_is_string_tc(type.get_type());
}
static inline bool is_binary_compatible(const ObExprResType &type) {
return CS_TYPE_BINARY == type.get_collation_type() || !ob_is_string_or_lob_type(type.get_type());
}
TO_STRING_KV(K_(inited));
static int check_binary_compatible(const ObExprResType *types, int64_t num);
private:
int preprocess_pattern(common::ObExprStringBuf &string_buf,
const common::ObString &origin_pattern,

View File

@ -50,14 +50,24 @@ int ObExprRegexpInstr::calc_result_typeN(ObExprResType &type,
ret = OB_ERR_PARAM_SIZE;
LOG_WARN("param number of regexp_instr at least 2 and at most 7", K(ret), K(param_num));
} else {
bool is_case_sensitive = types[0].get_calc_collation_type();
for (int i = 0; OB_SUCC(ret) && i < param_num; i++) {
if (!types[i].is_null() && !is_type_valid(types[i].get_type())) {
ret = OB_INVALID_ARGUMENT;
LOG_WARN("the parameter is not castable", K(ret), K(i));
}
}
if (OB_SUCC(ret) && is_mysql_mode()) {
ObExprResType cmp_type;
if (OB_FAIL(ObExprRegexContext::check_binary_compatible(types, 2))) {
LOG_WARN("types are not compatible with binary.", K(ret));
} else if (OB_FAIL(aggregate_charsets_for_comparison(cmp_type, types, 2, type_ctx.get_coll_type()))) {
LOG_WARN("fail to aggregate charsets for comparison");
} else {
is_case_sensitive = ObCharset::is_bin_sort(cmp_type.get_calc_collation_type());
}
}
if (OB_SUCC(ret)) {
bool is_case_sensitive = ObCharset::is_bin_sort(types[0].get_calc_collation_type());
bool need_utf8 = false;
switch (param_num) {
case 7/*subexpr*/:

View File

@ -51,12 +51,23 @@ int ObExprRegexpLike::calc_result_typeN(ObExprResType &type,
ret = OB_ERR_PARAM_SIZE;
LOG_WARN("param number of regexp_replace at least 2 and at most 3", K(ret), K(param_num));
} else {
bool is_case_sensitive = ObCharset::is_bin_sort(types[0].get_collation_type());
for (int i = 0; OB_SUCC(ret) && i < param_num; i++) {
if (!types[i].is_null() && !is_type_valid(types[i].get_type())) {
ret = OB_INVALID_ARGUMENT;
LOG_WARN("the parameter is not castable", K(ret), K(i));
}
}
if (OB_SUCC(ret) && is_mysql_mode()) {
ObExprResType cmp_type;
if (OB_FAIL(ObExprRegexContext::check_binary_compatible(types, 2))) {
LOG_WARN("types are not compatible with binary.", K(ret));
} else if (OB_FAIL(aggregate_charsets_for_comparison(cmp_type, types, 2, type_ctx.get_coll_type()))) {
LOG_WARN("fail to aggregate charsets for comparison");
} else {
is_case_sensitive = ObCharset::is_bin_sort(cmp_type.get_calc_collation_type());
}
}
if (OB_SUCC(ret)) {
if (param_num == 3) {/*match type*/
types[2].set_calc_type(ObVarcharType);
@ -66,7 +77,6 @@ int ObExprRegexpLike::calc_result_typeN(ObExprResType &type,
//we set the calc collation type to utf8 and convert it to utf16 in excution stage, because the ICU regexp engine is used uft16,
//we need convert it the need collation in advance, and no need to think about in regexp.
//lob TODO,jiangxiu.wt
bool is_case_sensitive = ObCharset::is_bin_sort(types[0].get_calc_collation_type());
bool need_utf8 = false;
types[1].set_calc_type(ObVarcharType);
types[1].set_calc_collation_level(CS_LEVEL_IMPLICIT);

View File

@ -49,7 +49,9 @@ int ObExprRegexpReplace::calc_result_typeN(ObExprResType &type,
ObRawExpr * raw_expr = type_ctx.get_raw_expr();
CK(NULL != type_ctx.get_raw_expr());
int64_t max_allowed_packet = 0;
const ObRawExpr *real_expr = NULL;
const ObRawExpr *real_text = NULL;
const ObRawExpr *real_pattern = NULL;
bool is_case_sensitive = false;
if (OB_FAIL(ret)) {
} else if (OB_UNLIKELY(param_num < 2 || param_num > 6)) {
ret = OB_ERR_PARAM_SIZE;
@ -59,13 +61,15 @@ int ObExprRegexpReplace::calc_result_typeN(ObExprResType &type,
LOG_WARN("get unexpected null", K(ret), K(type_ctx.get_session()));
} else if (OB_FAIL(type_ctx.get_session()->get_max_allowed_packet(max_allowed_packet))) {
LOG_WARN("failed to get max allowed packet", K(ret));
} else if (OB_FAIL(ObRawExprUtils::get_real_expr_without_cast(raw_expr->get_param_expr(0), real_expr))) {
} else if (OB_FAIL(ObRawExprUtils::get_real_expr_without_cast(raw_expr->get_param_expr(0), real_text))) {
LOG_WARN("fail to get real expr without cast", K(ret));
} else if (OB_ISNULL(real_expr)) {
} else if (OB_FAIL(ObRawExprUtils::get_real_expr_without_cast(raw_expr->get_param_expr(1), real_pattern))) {
LOG_WARN("fail to get real expr without cast", K(ret));
} else if (OB_ISNULL(real_text) || OB_ISNULL(real_pattern)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("real expr is invalid", K(ret), K(real_expr));
LOG_WARN("real expr is invalid", K(ret), K(real_text), K(real_pattern));
} else {
const ObExprResType &text = real_expr->get_result_type();
const ObExprResType &text = real_text->get_result_type();
for (int i = 0; OB_SUCC(ret) && i < param_num; i++) {
if (!types[i].is_null() && !is_type_valid(types[i].get_type())) {
ret = OB_INVALID_ARGUMENT;
@ -86,23 +90,32 @@ int ObExprRegexpReplace::calc_result_typeN(ObExprResType &type,
*type_ctx.get_session(), input_params, type, PREFER_VAR_LEN_CHAR));
OZ(deduce_string_param_calc_type_and_charset(*type_ctx.get_session(), type, input_params));
OX(type.set_length_semantics(type_ctx.get_session()->get_actual_nls_length_semantics()));
is_case_sensitive = ObCharset::is_bin_sort(types[0].get_collation_type());
} else {
const ObExprResType &pattern = real_pattern->get_result_type();
const common::ObLengthSemantics default_length_semantics = (OB_NOT_NULL(type_ctx.get_session())
? type_ctx.get_session()->get_actual_nls_length_semantics()
: common::LS_BYTE);
if (text.is_lob()) {
type.set_type(text.get_type());
ObObjMeta real_types[2] = {text, pattern};
if (text.is_blob()) {
type.set_blob();
} else if (pattern.is_blob()) {
type.set_blob();
} else {
type.set_clob();
type.set_length_semantics(text.is_varchar_or_char() ? text.get_length_semantics() : default_length_semantics);
}
//建表列的最大长度
type.set_length(max_allowed_packet);
ret = aggregate_charsets_for_string_result(type, &text, 1, type_ctx.get_coll_type());
if (OB_FAIL(ObExprRegexContext::check_binary_compatible(types, 3))) {
LOG_WARN("types are not compatible with binary.", K(ret));
} else {
ret = aggregate_charsets_for_string_result(type, real_types, 2, type_ctx.get_coll_type());
is_case_sensitive = ObCharset::is_bin_sort(type.get_collation_type());
}
}
}
if (OB_SUCC(ret)) {
bool is_case_sensitive = ObCharset::is_bin_sort(types[0].get_calc_collation_type());
bool need_utf8 = false;
switch (param_num) {
case 6/*match type*/:

View File

@ -46,19 +46,24 @@ int ObExprRegexpSubstr::calc_result_typeN(ObExprResType &type,
UNUSED(type_ctx);
int ret = OB_SUCCESS;
ObRawExpr * raw_expr = type_ctx.get_raw_expr();
const ObRawExpr * real_expr = NULL;
const ObRawExpr * real_text = NULL;
const ObRawExpr * real_pattern = NULL;
CK(NULL != type_ctx.get_raw_expr());
if (OB_FAIL(ret)) {
} else if (OB_UNLIKELY(param_num < 2 || param_num > 6)) {
ret = OB_ERR_PARAM_SIZE;
LOG_WARN("param number of regexp_substr at least 2 and at most 6", K(ret), K(param_num));
} else if (OB_FAIL(ObRawExprUtils::get_real_expr_without_cast(raw_expr->get_param_expr(0), real_expr))) {
} else if (OB_FAIL(ObRawExprUtils::get_real_expr_without_cast(raw_expr->get_param_expr(0), real_text))) {
LOG_WARN("fail to get real expr without cast", K(ret));
} else if (OB_ISNULL(real_expr)) {
} else if (OB_FAIL(ObRawExprUtils::get_real_expr_without_cast(raw_expr->get_param_expr(1), real_pattern))) {
LOG_WARN("fail to get real expr without cast", K(ret));
} else if (OB_ISNULL(real_text) || OB_ISNULL(real_pattern)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("real expr is invalid", K(ret), K(real_expr));
LOG_WARN("real expr is invalid", K(ret), K(real_text), K(real_pattern));
} else {
const ObExprResType &text = real_expr->get_result_type();
const ObExprResType &text = real_text->get_result_type();
const ObExprResType &pattern = real_pattern->get_result_type();
bool is_case_sensitive = false;
for (int i = 0; OB_SUCC(ret) && i < param_num; i++) {
if (!types[i].is_null() && !is_type_valid(types[i].get_type())) {
ret = OB_INVALID_ARGUMENT;
@ -71,6 +76,7 @@ int ObExprRegexpSubstr::calc_result_typeN(ObExprResType &type,
// set max length.
type.set_length(static_cast<common::ObLength>(text.get_length()));
auto str_params = make_const_carray(const_cast<ObExprResType*>(&text));
is_case_sensitive = ObCharset::is_bin_sort(types[0].get_collation_type());
OZ(aggregate_string_type_and_charset_oracle(*type_ctx.get_session(),
str_params,
type,
@ -81,13 +87,22 @@ int ObExprRegexpSubstr::calc_result_typeN(ObExprResType &type,
const common::ObLengthSemantics default_length_semantics = (OB_NOT_NULL(type_ctx.get_session())
? type_ctx.get_session()->get_actual_nls_length_semantics()
: common::LS_BYTE);
type.set_varchar();
ObObjMeta real_types[2] = {text, pattern};
if (text.is_blob()) {
type.set_blob();
} else {
type.set_varchar();
type.set_length_semantics(text.is_varchar_or_char() ? text.get_length_semantics() : default_length_semantics);
}
type.set_length(text.get_length());
type.set_length_semantics(text.is_varchar_or_char() ? text.get_length_semantics() : default_length_semantics);
ret = aggregate_charsets_for_string_result(type, types, 1, type_ctx.get_coll_type());
if (OB_FAIL(ObExprRegexContext::check_binary_compatible(types, 2))) {
LOG_WARN("types are not compatible with binary.", K(ret));
} else {
ret = aggregate_charsets_for_string_result(type, real_types, 2, type_ctx.get_coll_type());
is_case_sensitive = ObCharset::is_bin_sort(type.get_collation_type());
}
}
if (OB_SUCC(ret)) {
bool is_case_sensitive = ObCharset::is_bin_sort(types[0].get_calc_collation_type());
bool need_utf8 = false;
switch (param_num) {
case 6/*subexpr*/: