fix regexp expr bug: should consider collations of input params
This commit is contained in:
@ -66,43 +66,51 @@ int ObExprRegexp::calc_result_type2(ObExprResType &type,
|
||||
{
|
||||
int ret = OB_SUCCESS;
|
||||
ObRawExpr * raw_expr = type_ctx.get_raw_expr();
|
||||
ObCollationType res_cs_type = CS_TYPE_INVALID;
|
||||
ObCollationLevel res_cs_level = CS_LEVEL_INVALID;
|
||||
CK(NULL != type_ctx.get_raw_expr());
|
||||
if (type1.is_null() || type2.is_null()) {
|
||||
type.set_int32();
|
||||
type.set_precision(DEFAULT_PRECISION_FOR_BOOL);
|
||||
type.set_scale(DEFAULT_SCALE_FOR_INTEGER);
|
||||
} else if (OB_UNLIKELY(!is_type_valid(type1.get_type()) || !is_type_valid(type2.get_type()))) {
|
||||
ret = OB_INVALID_ARGUMENT;
|
||||
LOG_WARN("the param is not castable", K(ret), K(type1), K(type2));
|
||||
} else if (OB_FAIL(ObCharset::aggregate_collation(type1.get_calc_collation_level(),
|
||||
type1.get_calc_collation_type(),
|
||||
type2.get_calc_collation_level(),
|
||||
type2.get_calc_collation_type(),
|
||||
res_cs_level,
|
||||
res_cs_type))) {
|
||||
LOG_WARN("fail to aggregate collation", K(ret), K(type1), K(type2));
|
||||
} else {
|
||||
if (OB_UNLIKELY(!is_type_valid(type1.get_type()) || !is_type_valid(type2.get_type()))) {
|
||||
ret = OB_INVALID_ARGUMENT;
|
||||
LOG_WARN("the param is not castable", K(ret), K(type1), K(type2));
|
||||
type.set_int32();
|
||||
type.set_precision(DEFAULT_PRECISION_FOR_BOOL);
|
||||
type.set_scale(DEFAULT_SCALE_FOR_INTEGER);
|
||||
//why we set the calc collation type is utf16, because the ICU regexp engine is used uft16,
|
||||
//we need convert it the need collation in advance, and no need to think about in regexp.
|
||||
bool is_case_sensitive = ObCharset::is_bin_sort(res_cs_type);
|
||||
bool need_utf8 = false;
|
||||
type1.set_calc_type(ObVarcharType);
|
||||
type1.set_calc_collation_level(type.get_collation_level());
|
||||
type2.set_calc_type(ObVarcharType);
|
||||
type2.set_calc_collation_level(type.get_collation_level());
|
||||
if (OB_FAIL(ObExprRegexContext::check_need_utf8(raw_expr->get_param_expr(1), need_utf8))) {
|
||||
LOG_WARN("fail to check need utf8", K(ret));
|
||||
} else if (need_utf8) {
|
||||
type2.set_calc_collation_type(is_case_sensitive ? CS_TYPE_UTF8MB4_BIN : CS_TYPE_UTF8MB4_GENERAL_CI);
|
||||
} else {
|
||||
type.set_int32();
|
||||
type.set_precision(DEFAULT_PRECISION_FOR_BOOL);
|
||||
type.set_scale(DEFAULT_SCALE_FOR_INTEGER);
|
||||
//why we set the calc collation type is utf16, because the ICU regexp engine is used uft16,
|
||||
//we need convert it the need collation in advance, and no need to think about in regexp.
|
||||
bool is_case_sensitive = ObCharset::is_bin_sort(type1.get_calc_collation_type());
|
||||
bool need_utf8 = false;
|
||||
type1.set_calc_type(ObVarcharType);
|
||||
type1.set_calc_collation_level(type.get_collation_level());
|
||||
type2.set_calc_type(ObVarcharType);
|
||||
type2.set_calc_collation_level(type.get_collation_level());
|
||||
if (OB_FAIL(ObExprRegexContext::check_need_utf8(raw_expr->get_param_expr(1), need_utf8))) {
|
||||
LOG_WARN("fail to check need utf8", K(ret));
|
||||
} else if (need_utf8) {
|
||||
type2.set_calc_collation_type(is_case_sensitive ? CS_TYPE_UTF8MB4_BIN : CS_TYPE_UTF8MB4_GENERAL_CI);
|
||||
} else {
|
||||
type2.set_calc_collation_type(is_case_sensitive ? CS_TYPE_UTF16_BIN : CS_TYPE_UTF16_GENERAL_CI);
|
||||
}
|
||||
need_utf8 = false;
|
||||
if (OB_FAIL(ret)) {
|
||||
} else if (OB_FAIL(ObExprRegexContext::check_need_utf8(raw_expr->get_param_expr(0), need_utf8))) {
|
||||
LOG_WARN("fail to check need utf8", K(ret));
|
||||
} else if (need_utf8) {
|
||||
type1.set_calc_collation_type(is_case_sensitive ? CS_TYPE_UTF8MB4_BIN : CS_TYPE_UTF8MB4_GENERAL_CI);
|
||||
} else {
|
||||
type1.set_calc_collation_type(is_case_sensitive ? CS_TYPE_UTF16_BIN : CS_TYPE_UTF16_GENERAL_CI);
|
||||
}
|
||||
type2.set_calc_collation_type(is_case_sensitive ? CS_TYPE_UTF16_BIN : CS_TYPE_UTF16_GENERAL_CI);
|
||||
}
|
||||
|
||||
need_utf8 = false;
|
||||
if (OB_FAIL(ret)) {
|
||||
} else if (OB_FAIL(ObExprRegexContext::check_need_utf8(raw_expr->get_param_expr(0), need_utf8))) {
|
||||
LOG_WARN("fail to check need utf8", K(ret));
|
||||
} else if (need_utf8) {
|
||||
type1.set_calc_collation_type(is_case_sensitive ? CS_TYPE_UTF8MB4_BIN : CS_TYPE_UTF8MB4_GENERAL_CI);
|
||||
} else {
|
||||
type1.set_calc_collation_type(is_case_sensitive ? CS_TYPE_UTF16_BIN : CS_TYPE_UTF16_GENERAL_CI);
|
||||
}
|
||||
}
|
||||
return ret;
|
||||
|
@ -6328,3 +6328,49 @@ select * from t1 where c1 regexp '^U';
|
||||
c1
|
||||
UNPRESS123
|
||||
UNPRESS456
|
||||
set names gbk;
|
||||
select 'a' collate gbk_bin regexp 'A';
|
||||
'a' collate gbk_bin regexp 'A'
|
||||
0
|
||||
select 'a' collate gbk_chinese_ci regexp 'A';
|
||||
'a' collate gbk_chinese_ci regexp 'A'
|
||||
1
|
||||
select 'a' regexp 'A' collate gbk_chinese_ci;
|
||||
'a' regexp 'A' collate gbk_chinese_ci
|
||||
1
|
||||
select 'a' regexp 'A' collate gbk_bin;
|
||||
'a' regexp 'A' collate gbk_bin
|
||||
0
|
||||
select 'a' collate gbk_bin regexp 'A' collate gbk_bin;
|
||||
'a' collate gbk_bin regexp 'A' collate gbk_bin
|
||||
0
|
||||
select 'a' collate gbk_chinese_ci regexp 'A' collate gbk_chinese_ci;
|
||||
'a' collate gbk_chinese_ci regexp 'A' collate gbk_chinese_ci
|
||||
1
|
||||
select 'a' collate gbk_bin regexp 'A' collate gbk_chinese_ci;
|
||||
ERROR HY000: Illegal mix of collations
|
||||
select 'a' collate gbk_chinese_ci regexp 'A' collate gbk_bin;
|
||||
ERROR HY000: Illegal mix of collations
|
||||
set names latin1;
|
||||
select 'a' collate latin1_bin regexp 'A';
|
||||
'a' collate latin1_bin regexp 'A'
|
||||
0
|
||||
select 'a' collate latin1_swedish_ci regexp 'A';
|
||||
'a' collate latin1_swedish_ci regexp 'A'
|
||||
1
|
||||
select 'a' regexp 'A' collate latin1_swedish_ci;
|
||||
'a' regexp 'A' collate latin1_swedish_ci
|
||||
1
|
||||
select 'a' regexp 'A' collate latin1_bin;
|
||||
'a' regexp 'A' collate latin1_bin
|
||||
0
|
||||
select 'a' collate latin1_bin regexp 'A' collate latin1_bin;
|
||||
'a' collate latin1_bin regexp 'A' collate latin1_bin
|
||||
0
|
||||
select 'a' collate latin1_swedish_ci regexp 'A' collate latin1_swedish_ci;
|
||||
'a' collate latin1_swedish_ci regexp 'A' collate latin1_swedish_ci
|
||||
1
|
||||
select 'a' collate latin1_bin regexp 'A' collate latin1_swedish_ci;
|
||||
ERROR HY000: Illegal mix of collations
|
||||
select 'a' collate latin1_swedish_ci regexp 'A' collate latin1_bin;
|
||||
ERROR HY000: Illegal mix of collations
|
||||
|
@ -455,3 +455,30 @@ create table t1(c1 blob);
|
||||
insert into t1 values('UNPRESS123');
|
||||
insert into t1 values('UNPRESS456');
|
||||
select * from t1 where c1 regexp '^U';
|
||||
|
||||
|
||||
|
||||
##bug48378677
|
||||
set names gbk;
|
||||
select 'a' collate gbk_bin regexp 'A';
|
||||
select 'a' collate gbk_chinese_ci regexp 'A';
|
||||
select 'a' regexp 'A' collate gbk_chinese_ci;
|
||||
select 'a' regexp 'A' collate gbk_bin;
|
||||
select 'a' collate gbk_bin regexp 'A' collate gbk_bin;
|
||||
select 'a' collate gbk_chinese_ci regexp 'A' collate gbk_chinese_ci;
|
||||
--error 1267
|
||||
select 'a' collate gbk_bin regexp 'A' collate gbk_chinese_ci;
|
||||
--error 1267
|
||||
select 'a' collate gbk_chinese_ci regexp 'A' collate gbk_bin;
|
||||
|
||||
set names latin1;
|
||||
select 'a' collate latin1_bin regexp 'A';
|
||||
select 'a' collate latin1_swedish_ci regexp 'A';
|
||||
select 'a' regexp 'A' collate latin1_swedish_ci;
|
||||
select 'a' regexp 'A' collate latin1_bin;
|
||||
select 'a' collate latin1_bin regexp 'A' collate latin1_bin;
|
||||
select 'a' collate latin1_swedish_ci regexp 'A' collate latin1_swedish_ci;
|
||||
--error 1267
|
||||
select 'a' collate latin1_bin regexp 'A' collate latin1_swedish_ci;
|
||||
--error 1267
|
||||
select 'a' collate latin1_swedish_ci regexp 'A' collate latin1_bin;
|
Reference in New Issue
Block a user