fix regexp expr bug: should consider collations of input params
This commit is contained in:
@ -66,22 +66,30 @@ int ObExprRegexp::calc_result_type2(ObExprResType &type,
|
|||||||
{
|
{
|
||||||
int ret = OB_SUCCESS;
|
int ret = OB_SUCCESS;
|
||||||
ObRawExpr * raw_expr = type_ctx.get_raw_expr();
|
ObRawExpr * raw_expr = type_ctx.get_raw_expr();
|
||||||
|
ObCollationType res_cs_type = CS_TYPE_INVALID;
|
||||||
|
ObCollationLevel res_cs_level = CS_LEVEL_INVALID;
|
||||||
CK(NULL != type_ctx.get_raw_expr());
|
CK(NULL != type_ctx.get_raw_expr());
|
||||||
if (type1.is_null() || type2.is_null()) {
|
if (type1.is_null() || type2.is_null()) {
|
||||||
type.set_int32();
|
type.set_int32();
|
||||||
type.set_precision(DEFAULT_PRECISION_FOR_BOOL);
|
type.set_precision(DEFAULT_PRECISION_FOR_BOOL);
|
||||||
type.set_scale(DEFAULT_SCALE_FOR_INTEGER);
|
type.set_scale(DEFAULT_SCALE_FOR_INTEGER);
|
||||||
} else {
|
} else if (OB_UNLIKELY(!is_type_valid(type1.get_type()) || !is_type_valid(type2.get_type()))) {
|
||||||
if (OB_UNLIKELY(!is_type_valid(type1.get_type()) || !is_type_valid(type2.get_type()))) {
|
|
||||||
ret = OB_INVALID_ARGUMENT;
|
ret = OB_INVALID_ARGUMENT;
|
||||||
LOG_WARN("the param is not castable", K(ret), K(type1), K(type2));
|
LOG_WARN("the param is not castable", K(ret), K(type1), K(type2));
|
||||||
|
} else if (OB_FAIL(ObCharset::aggregate_collation(type1.get_calc_collation_level(),
|
||||||
|
type1.get_calc_collation_type(),
|
||||||
|
type2.get_calc_collation_level(),
|
||||||
|
type2.get_calc_collation_type(),
|
||||||
|
res_cs_level,
|
||||||
|
res_cs_type))) {
|
||||||
|
LOG_WARN("fail to aggregate collation", K(ret), K(type1), K(type2));
|
||||||
} else {
|
} else {
|
||||||
type.set_int32();
|
type.set_int32();
|
||||||
type.set_precision(DEFAULT_PRECISION_FOR_BOOL);
|
type.set_precision(DEFAULT_PRECISION_FOR_BOOL);
|
||||||
type.set_scale(DEFAULT_SCALE_FOR_INTEGER);
|
type.set_scale(DEFAULT_SCALE_FOR_INTEGER);
|
||||||
//why we set the calc collation type is utf16, because the ICU regexp engine is used uft16,
|
//why we set the calc collation type is utf16, because the ICU regexp engine is used uft16,
|
||||||
//we need convert it the need collation in advance, and no need to think about in regexp.
|
//we need convert it the need collation in advance, and no need to think about in regexp.
|
||||||
bool is_case_sensitive = ObCharset::is_bin_sort(type1.get_calc_collation_type());
|
bool is_case_sensitive = ObCharset::is_bin_sort(res_cs_type);
|
||||||
bool need_utf8 = false;
|
bool need_utf8 = false;
|
||||||
type1.set_calc_type(ObVarcharType);
|
type1.set_calc_type(ObVarcharType);
|
||||||
type1.set_calc_collation_level(type.get_collation_level());
|
type1.set_calc_collation_level(type.get_collation_level());
|
||||||
@ -94,6 +102,7 @@ int ObExprRegexp::calc_result_type2(ObExprResType &type,
|
|||||||
} else {
|
} else {
|
||||||
type2.set_calc_collation_type(is_case_sensitive ? CS_TYPE_UTF16_BIN : CS_TYPE_UTF16_GENERAL_CI);
|
type2.set_calc_collation_type(is_case_sensitive ? CS_TYPE_UTF16_BIN : CS_TYPE_UTF16_GENERAL_CI);
|
||||||
}
|
}
|
||||||
|
|
||||||
need_utf8 = false;
|
need_utf8 = false;
|
||||||
if (OB_FAIL(ret)) {
|
if (OB_FAIL(ret)) {
|
||||||
} else if (OB_FAIL(ObExprRegexContext::check_need_utf8(raw_expr->get_param_expr(0), need_utf8))) {
|
} else if (OB_FAIL(ObExprRegexContext::check_need_utf8(raw_expr->get_param_expr(0), need_utf8))) {
|
||||||
@ -104,7 +113,6 @@ int ObExprRegexp::calc_result_type2(ObExprResType &type,
|
|||||||
type1.set_calc_collation_type(is_case_sensitive ? CS_TYPE_UTF16_BIN : CS_TYPE_UTF16_GENERAL_CI);
|
type1.set_calc_collation_type(is_case_sensitive ? CS_TYPE_UTF16_BIN : CS_TYPE_UTF16_GENERAL_CI);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -6328,3 +6328,49 @@ select * from t1 where c1 regexp '^U';
|
|||||||
c1
|
c1
|
||||||
UNPRESS123
|
UNPRESS123
|
||||||
UNPRESS456
|
UNPRESS456
|
||||||
|
set names gbk;
|
||||||
|
select 'a' collate gbk_bin regexp 'A';
|
||||||
|
'a' collate gbk_bin regexp 'A'
|
||||||
|
0
|
||||||
|
select 'a' collate gbk_chinese_ci regexp 'A';
|
||||||
|
'a' collate gbk_chinese_ci regexp 'A'
|
||||||
|
1
|
||||||
|
select 'a' regexp 'A' collate gbk_chinese_ci;
|
||||||
|
'a' regexp 'A' collate gbk_chinese_ci
|
||||||
|
1
|
||||||
|
select 'a' regexp 'A' collate gbk_bin;
|
||||||
|
'a' regexp 'A' collate gbk_bin
|
||||||
|
0
|
||||||
|
select 'a' collate gbk_bin regexp 'A' collate gbk_bin;
|
||||||
|
'a' collate gbk_bin regexp 'A' collate gbk_bin
|
||||||
|
0
|
||||||
|
select 'a' collate gbk_chinese_ci regexp 'A' collate gbk_chinese_ci;
|
||||||
|
'a' collate gbk_chinese_ci regexp 'A' collate gbk_chinese_ci
|
||||||
|
1
|
||||||
|
select 'a' collate gbk_bin regexp 'A' collate gbk_chinese_ci;
|
||||||
|
ERROR HY000: Illegal mix of collations
|
||||||
|
select 'a' collate gbk_chinese_ci regexp 'A' collate gbk_bin;
|
||||||
|
ERROR HY000: Illegal mix of collations
|
||||||
|
set names latin1;
|
||||||
|
select 'a' collate latin1_bin regexp 'A';
|
||||||
|
'a' collate latin1_bin regexp 'A'
|
||||||
|
0
|
||||||
|
select 'a' collate latin1_swedish_ci regexp 'A';
|
||||||
|
'a' collate latin1_swedish_ci regexp 'A'
|
||||||
|
1
|
||||||
|
select 'a' regexp 'A' collate latin1_swedish_ci;
|
||||||
|
'a' regexp 'A' collate latin1_swedish_ci
|
||||||
|
1
|
||||||
|
select 'a' regexp 'A' collate latin1_bin;
|
||||||
|
'a' regexp 'A' collate latin1_bin
|
||||||
|
0
|
||||||
|
select 'a' collate latin1_bin regexp 'A' collate latin1_bin;
|
||||||
|
'a' collate latin1_bin regexp 'A' collate latin1_bin
|
||||||
|
0
|
||||||
|
select 'a' collate latin1_swedish_ci regexp 'A' collate latin1_swedish_ci;
|
||||||
|
'a' collate latin1_swedish_ci regexp 'A' collate latin1_swedish_ci
|
||||||
|
1
|
||||||
|
select 'a' collate latin1_bin regexp 'A' collate latin1_swedish_ci;
|
||||||
|
ERROR HY000: Illegal mix of collations
|
||||||
|
select 'a' collate latin1_swedish_ci regexp 'A' collate latin1_bin;
|
||||||
|
ERROR HY000: Illegal mix of collations
|
||||||
|
@ -455,3 +455,30 @@ create table t1(c1 blob);
|
|||||||
insert into t1 values('UNPRESS123');
|
insert into t1 values('UNPRESS123');
|
||||||
insert into t1 values('UNPRESS456');
|
insert into t1 values('UNPRESS456');
|
||||||
select * from t1 where c1 regexp '^U';
|
select * from t1 where c1 regexp '^U';
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
##bug48378677
|
||||||
|
set names gbk;
|
||||||
|
select 'a' collate gbk_bin regexp 'A';
|
||||||
|
select 'a' collate gbk_chinese_ci regexp 'A';
|
||||||
|
select 'a' regexp 'A' collate gbk_chinese_ci;
|
||||||
|
select 'a' regexp 'A' collate gbk_bin;
|
||||||
|
select 'a' collate gbk_bin regexp 'A' collate gbk_bin;
|
||||||
|
select 'a' collate gbk_chinese_ci regexp 'A' collate gbk_chinese_ci;
|
||||||
|
--error 1267
|
||||||
|
select 'a' collate gbk_bin regexp 'A' collate gbk_chinese_ci;
|
||||||
|
--error 1267
|
||||||
|
select 'a' collate gbk_chinese_ci regexp 'A' collate gbk_bin;
|
||||||
|
|
||||||
|
set names latin1;
|
||||||
|
select 'a' collate latin1_bin regexp 'A';
|
||||||
|
select 'a' collate latin1_swedish_ci regexp 'A';
|
||||||
|
select 'a' regexp 'A' collate latin1_swedish_ci;
|
||||||
|
select 'a' regexp 'A' collate latin1_bin;
|
||||||
|
select 'a' collate latin1_bin regexp 'A' collate latin1_bin;
|
||||||
|
select 'a' collate latin1_swedish_ci regexp 'A' collate latin1_swedish_ci;
|
||||||
|
--error 1267
|
||||||
|
select 'a' collate latin1_bin regexp 'A' collate latin1_swedish_ci;
|
||||||
|
--error 1267
|
||||||
|
select 'a' collate latin1_swedish_ci regexp 'A' collate latin1_bin;
|
Reference in New Issue
Block a user