!5127 【5.0.2代码回合】修复group_concat和字符序like问题

Merge pull request !5127 from zhangxubo/5.0.0
2024-04-17 02:21:32 +00:00
parent 491925f147 3f751bde8a
commit b7835d4e5f
4 changed files with 48 additions and 4 deletions
--- a/src/common/backend/catalog/gs_utf8_collation.cpp
+++ b/src/common/backend/catalog/gs_utf8_collation.cpp
@ -787,6 +787,14 @@ static int get_current_char_sorted_value(const unsigned char* cur_str, const uns
                                         GS_UINT32* next_word, const GS_UNICASE_INFO *uni_plane)
 {
    int word_bytes = mb_wc_utf8mb4(cur_str, str_end, next_word);
+    if (word_bytes <= 0) {
+        if (word_bytes == GS_ERR_TOOSMALL) {
+            return 0;
+        }
+        word_bytes = 1;
+        *next_word = *cur_str;
+        return word_bytes;
+    }
    sort_by_unicode(uni_plane->sort_page, next_word);
    return word_bytes;
 }
--- a/src/common/backend/utils/adt/varlena.cpp
+++ b/src/common/backend/utils/adt/varlena.cpp
@ -6410,16 +6410,28 @@ Datum group_concat_transfn(PG_FUNCTION_ARGS)
 */
 Datum group_concat_finalfn(PG_FUNCTION_ARGS)
 {
-    StringInfo state;
-
    /* cannot be called directly because of internal-type argument */
    Assert(AggCheckCallContext(fcinfo, NULL));

    if (!PG_ARGISNULL(0)) { /* result not null */
-        state = (StringInfo)PG_GETARG_POINTER(0);
+        StringInfo state = (StringInfo)PG_GETARG_POINTER(0);
+        int cur_charset = get_valid_charset_by_collation(PG_GET_COLLATION());
+        int encoding_max_length = pg_wchar_table[cur_charset].maxmblen;
+        int real_len = state->len - encoding_max_length;
+        char* real_data = state->data + real_len;
+        while (real_data != NULL) {
+            int cur_len = pg_wchar_table[cur_charset].mblen((const unsigned char*)real_data);
+            real_len += cur_len;
+            if (real_len > state->len) {
+                state->len = (real_len - cur_len);
+                break;
+            }
+            real_data += cur_len;
+        }
        PG_RETURN_TEXT_P(cstring_to_text_with_len(state->data, state->len));
-    } else
+    } else {
        PG_RETURN_NULL();
+    }
 }

 /*
--- a/src/test/regress/expected/test_b_format_collate.out
+++ b/src/test/regress/expected/test_b_format_collate.out
@ -2455,6 +2455,22 @@ select distinct c3 from test_utf8mb4_bin;
 fxlP7sW8vA9hcYdKqRHLwDzRSaAjV1VrMZFYRsmjb9JpsIPdGu7Gpi6OzaOqmR                                      
 (1 row)

+set group_concat_max_len = 2;
+drop table if exists t1;
+create table t1(a char(32) character set 'utf8' collate utf8_general_ci) character set 'utf8' collate 'utf8_general_ci';
+insert into t1 values('律师事务部中心(中文汉字匹配)');
+select * from (select group_concat(a) ab from t1) where ab like '%中文%';
+ ab 
+----
+(0 rows)
+
+set group_concat_max_len = default;
+select * from (select group_concat(a) ab from t1) where ab like '%中文%';
+              ab              
+------------------------------
+ 律师事务部中心(中文汉字匹配)
+(1 row)
+
 -- test alter table convert to
 SET b_format_behavior_compat_options = 'enable_multi_charset';
 drop table if exists test_convert_to;
--- a/src/test/regress/sql/test_b_format_collate.sql
+++ b/src/test/regress/sql/test_b_format_collate.sql
@ -579,6 +579,14 @@ select count(*) from test_utf8mb4_bin group by c2, c3;
 select distinct c2 from test_utf8mb4_bin;
 select distinct c3 from test_utf8mb4_bin;

+set group_concat_max_len = 2;
+drop table if exists t1;
+create table t1(a char(32) character set 'utf8' collate utf8_general_ci) character set 'utf8' collate 'utf8_general_ci';
+insert into t1 values('律师事务部中心(中文汉字匹配)');
+select * from (select group_concat(a) ab from t1) where ab like '%中文%';
+set group_concat_max_len = default;
+select * from (select group_concat(a) ab from t1) where ab like '%中文%';
+
 -- test alter table convert to
 SET b_format_behavior_compat_options = 'enable_multi_charset';
 drop table if exists test_convert_to;