[fix](regex) String with Chinese characters matching failed (#20493)

This commit is contained in:
Jerry Hu
2023-06-07 07:27:47 +08:00
committed by GitHub
parent 3691372054
commit 49f8f20fb1
3 changed files with 12 additions and 2 deletions

View File

@ -437,8 +437,9 @@ Status FunctionLikeBase::regexp_fn_predicate(LikeSearchState* state,
Status FunctionLikeBase::hs_prepare(FunctionContext* context, const char* expression,
hs_database_t** database, hs_scratch_t** scratch) {
hs_compile_error_t* compile_err;
auto res = hs_compile(expression, HS_FLAG_DOTALL | HS_FLAG_ALLOWEMPTY, HS_MODE_BLOCK, nullptr,
database, &compile_err);
auto res = hs_compile(expression, HS_FLAG_DOTALL | HS_FLAG_ALLOWEMPTY | HS_FLAG_UTF8,
HS_MODE_BLOCK, nullptr, database, &compile_err);
if (res != HS_SUCCESS) {
*database = nullptr;
if (context) {

View File

@ -73,6 +73,12 @@ a-b c
-- !sql --
a <b> b
-- !sql_utf1 --
true
-- !sql_utf2 --
true
-- !sql_regexp_null --
\N
\N

View File

@ -63,6 +63,9 @@ suite("test_string_function_regexp") {
qt_sql "SELECT regexp_replace_one('a b c', \" \", \"-\");"
qt_sql "SELECT regexp_replace_one('a b b','(b)','<\\\\1>');"
qt_sql_utf1 """ select '皖12345' REGEXP '^[皖][0-9]{5}\$'; """
qt_sql_utf2 """ select '皖 12345' REGEXP '^[皖] [0-9]{5}\$'; """
// bug fix
sql """
INSERT INTO ${tbName} VALUES