diff --git a/src/common/backend/catalog/gs_utf8_collation.cpp b/src/common/backend/catalog/gs_utf8_collation.cpp index b51e35d7e..dc7858178 100644 --- a/src/common/backend/catalog/gs_utf8_collation.cpp +++ b/src/common/backend/catalog/gs_utf8_collation.cpp @@ -787,6 +787,14 @@ static int get_current_char_sorted_value(const unsigned char* cur_str, const uns GS_UINT32* next_word, const GS_UNICASE_INFO *uni_plane) { int word_bytes = mb_wc_utf8mb4(cur_str, str_end, next_word); + if (word_bytes <= 0) { + if (word_bytes == GS_ERR_TOOSMALL) { + return 0; + } + word_bytes = 1; + *next_word = *cur_str; + return word_bytes; + } sort_by_unicode(uni_plane->sort_page, next_word); return word_bytes; } \ No newline at end of file diff --git a/src/common/backend/utils/adt/varlena.cpp b/src/common/backend/utils/adt/varlena.cpp index 2e02808fb..d81bddaa7 100644 --- a/src/common/backend/utils/adt/varlena.cpp +++ b/src/common/backend/utils/adt/varlena.cpp @@ -6410,16 +6410,28 @@ Datum group_concat_transfn(PG_FUNCTION_ARGS) */ Datum group_concat_finalfn(PG_FUNCTION_ARGS) { - StringInfo state; - /* cannot be called directly because of internal-type argument */ Assert(AggCheckCallContext(fcinfo, NULL)); if (!PG_ARGISNULL(0)) { /* result not null */ - state = (StringInfo)PG_GETARG_POINTER(0); + StringInfo state = (StringInfo)PG_GETARG_POINTER(0); + int cur_charset = get_valid_charset_by_collation(PG_GET_COLLATION()); + int encoding_max_length = pg_wchar_table[cur_charset].maxmblen; + int real_len = state->len - encoding_max_length; + char* real_data = state->data + real_len; + while (real_data != NULL) { + int cur_len = pg_wchar_table[cur_charset].mblen((const unsigned char*)real_data); + real_len += cur_len; + if (real_len > state->len) { + state->len = (real_len - cur_len); + break; + } + real_data += cur_len; + } PG_RETURN_TEXT_P(cstring_to_text_with_len(state->data, state->len)); - } else + } else { PG_RETURN_NULL(); + } } /* diff --git a/src/test/regress/expected/test_b_format_collate.out b/src/test/regress/expected/test_b_format_collate.out index 918ea7830..dc67f4406 100644 --- a/src/test/regress/expected/test_b_format_collate.out +++ b/src/test/regress/expected/test_b_format_collate.out @@ -2455,6 +2455,22 @@ select distinct c3 from test_utf8mb4_bin; fxlP7sW8vA9hcYdKqRHLwDzRSaAjV1VrMZFYRsmjb9JpsIPdGu7Gpi6OzaOqmR (1 row) +set group_concat_max_len = 2; +drop table if exists t1; +create table t1(a char(32) character set 'utf8' collate utf8_general_ci) character set 'utf8' collate 'utf8_general_ci'; +insert into t1 values('律师事务部中心(中文汉字匹配)'); +select * from (select group_concat(a) ab from t1) where ab like '%中文%'; + ab +---- +(0 rows) + +set group_concat_max_len = default; +select * from (select group_concat(a) ab from t1) where ab like '%中文%'; + ab +------------------------------ + 律师事务部中心(中文汉字匹配) +(1 row) + -- test alter table convert to SET b_format_behavior_compat_options = 'enable_multi_charset'; drop table if exists test_convert_to; diff --git a/src/test/regress/sql/test_b_format_collate.sql b/src/test/regress/sql/test_b_format_collate.sql index 3a888680c..3c30dfcab 100644 --- a/src/test/regress/sql/test_b_format_collate.sql +++ b/src/test/regress/sql/test_b_format_collate.sql @@ -579,6 +579,14 @@ select count(*) from test_utf8mb4_bin group by c2, c3; select distinct c2 from test_utf8mb4_bin; select distinct c3 from test_utf8mb4_bin; +set group_concat_max_len = 2; +drop table if exists t1; +create table t1(a char(32) character set 'utf8' collate utf8_general_ci) character set 'utf8' collate 'utf8_general_ci'; +insert into t1 values('律师事务部中心(中文汉字匹配)'); +select * from (select group_concat(a) ab from t1) where ab like '%中文%'; +set group_concat_max_len = default; +select * from (select group_concat(a) ab from t1) where ab like '%中文%'; + -- test alter table convert to SET b_format_behavior_compat_options = 'enable_multi_charset'; drop table if exists test_convert_to;