diff --git a/src/common/backend/catalog/gs_utf8_collation.cpp b/src/common/backend/catalog/gs_utf8_collation.cpp index 56f690557..3fb98efbc 100644 --- a/src/common/backend/catalog/gs_utf8_collation.cpp +++ b/src/common/backend/catalog/gs_utf8_collation.cpp @@ -491,6 +491,8 @@ Datum hash_utf8mb4_bin_pad_space(const unsigned char *key); static int get_current_char_sorted_value(const unsigned char* cur_str, const unsigned char* str_end, GS_UINT32* next_word, const GS_UNICASE_INFO *uni_plane); bool is_b_format_collation(Oid collation); +static int strnncoll_binary(const unsigned char* arg1, size_t len1, + const unsigned char* arg2, size_t len2); /* binary collation only support binary string types, such as : blob. */ void check_binary_collation(Oid collation, Oid type_oid) @@ -507,31 +509,6 @@ void check_binary_collation(Oid collation, Oid type_oid) } } -Oid binary_need_transform_typeid(Oid typeoid, Oid* collation) -{ - Oid new_typid = typeoid; - if (*collation == BINARY_COLLATION_OID) { - /* use switch case stmt for extension in feature */ - switch (typeoid) { - /* binary type no need to transform */ - case BLOBOID: - break; - /* string type need to transform to binary type */ - case TEXTOID: - new_typid = BLOBOID; - break; - default: - ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), - errmsg("Un-support feature"), - errdetail("type %s cannot be set to binary collation currently", get_typename(typeoid)))); - break;; - } - /* binary collation in attribute level collation no need to be set. */ - *collation = InvalidOid; - } - return new_typid; -} - bool is_support_b_format_collation(Oid collation) { if (is_b_format_collation(collation) && !DB_IS_CMPT(B_FORMAT)) { @@ -579,11 +556,17 @@ int varstr_cmp_by_builtin_collations(char* arg1, int len1, char* arg2, int len2, switch (collid) { case UTF8MB4_GENERAL_CI_COLLATION_OID: case UTF8MB4_UNICODE_CI_COLLATION_OID: + case UTF8_GENERAL_CI_COLLATION_OID: + case UTF8_UNICODE_CI_COLLATION_OID: result = strnncoll_utf8mb4_general_pad_space((unsigned char*)arg1, len1, (unsigned char*)arg2, len2); break; case UTF8MB4_BIN_COLLATION_OID: + case UTF8_BIN_COLLATION_OID: result = strnncoll_utf8mb4_bin_pad_space((unsigned char*)arg1, len1, (unsigned char*)arg2, len2); break; + case BINARY_COLLATION_OID: + result = strnncoll_binary((unsigned char*)arg1, len1, (unsigned char*)arg2, len2); + break; default: ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("Un-support feature"), @@ -600,9 +583,12 @@ Datum hash_text_by_builtin_colltions(const unsigned char *key, size_t len, Oid c switch (collid) { case UTF8MB4_GENERAL_CI_COLLATION_OID: case UTF8MB4_UNICODE_CI_COLLATION_OID: + case UTF8_GENERAL_CI_COLLATION_OID: + case UTF8_UNICODE_CI_COLLATION_OID: result = hash_utf8mb4_general_pad_space((unsigned char*)key, len); break; case UTF8MB4_BIN_COLLATION_OID: + case UTF8_BIN_COLLATION_OID: result = hash_utf8mb4_bin_pad_space((unsigned char*)key); break; default: @@ -615,6 +601,14 @@ Datum hash_text_by_builtin_colltions(const unsigned char *key, size_t len, Oid c return result; } +static int strnncoll_binary(const unsigned char* arg1, size_t len1, + const unsigned char* arg2, size_t len2) +{ + size_t len = len1 < len2 ? len1 : len2; + int res = memcmp(arg1, arg2, len); + return res ? res : (int)(len1 - len2); +} + /* * When UTF8 fails to convert Unicode, * the sorted result is returned by comparing each byte. diff --git a/src/common/backend/catalog/namespace.cpp b/src/common/backend/catalog/namespace.cpp index f97e7826a..28d6d5cd3 100644 --- a/src/common/backend/catalog/namespace.cpp +++ b/src/common/backend/catalog/namespace.cpp @@ -4152,7 +4152,7 @@ Oid get_collation_oid(List* name, bool missing_ok) } if (DB_IS_CMPT(B_FORMAT)) { - colloid = get_collation_oid_with_lower_name(collation_name, dbencoding); + colloid = get_collation_oid_with_lower_name(collation_name, PG_INVALID_ENCODING); if (OidIsValid(colloid) && is_support_b_format_collation(colloid)) { return colloid; } diff --git a/src/common/backend/parser/parse_expr.cpp b/src/common/backend/parser/parse_expr.cpp index decc1e754..62bd32045 100644 --- a/src/common/backend/parser/parse_expr.cpp +++ b/src/common/backend/parser/parse_expr.cpp @@ -2962,7 +2962,6 @@ static Node* transformCollateClause(ParseState* pstate, CollateClause* c) } newc->collOid = LookupCollation(pstate, c->collname, c->location); newc->location = c->location; - check_binary_collation(newc->collOid, argtype); return (Node*)newc; } diff --git a/src/common/backend/parser/parse_type.cpp b/src/common/backend/parser/parse_type.cpp index 0b7ce96df..d700f775f 100644 --- a/src/common/backend/parser/parse_type.cpp +++ b/src/common/backend/parser/parse_type.cpp @@ -657,7 +657,7 @@ Oid LookupCollation(ParseState* pstate, List* collnames, int location) static Oid get_column_def_collation_b_format(ColumnDef* coldef, Oid typeOid, Oid typcollation, bool is_bin_type, Oid rel_coll_oid) { - if (coldef->typname->charset != PG_INVALID_ENCODING && !IsSupportCharsetType(typeOid)) { + if (coldef->typname->charset != PG_INVALID_ENCODING && !IsSupportCharsetType(typeOid) && !type_is_enum(typeOid)) { ereport(ERROR, (errcode(ERRCODE_DATATYPE_MISMATCH), errmsg("type %s not support set charset", format_type_be(typeOid)))); } @@ -1591,3 +1591,8 @@ Oid LookupTypeInPackage(List* typeNames, const char* typeName, Oid pkgOid, Oid n } + +bool IsBinaryType(Oid typid) +{ + return (typid == BLOBOID) ? true : false; +} \ No newline at end of file diff --git a/src/common/backend/utils/adt/like.cpp b/src/common/backend/utils/adt/like.cpp index 32be6bd64..faef75f5a 100644 --- a/src/common/backend/utils/adt/like.cpp +++ b/src/common/backend/utils/adt/like.cpp @@ -156,7 +156,7 @@ int GenericMatchText(char* s, int slen, char* p, int plen) int generic_match_text_with_collation(char* s, int slen, char* p, int plen, Oid collation) { - if (collation == UTF8MB4_GENERAL_CI_COLLATION_OID || collation == UTF8MB4_UNICODE_CI_COLLATION_OID) { + if (IS_UTF8_GENERAL_COLLATION(collation)) { return matchtext_utf8mb4((unsigned char*)s, slen, (unsigned char*)p, plen); } diff --git a/src/common/backend/utils/adt/varlena.cpp b/src/common/backend/utils/adt/varlena.cpp index fb72c9bc3..98c2e12ab 100644 --- a/src/common/backend/utils/adt/varlena.cpp +++ b/src/common/backend/utils/adt/varlena.cpp @@ -667,6 +667,31 @@ Datum bytea_string_agg_finalfn(PG_FUNCTION_ARGS) PG_RETURN_NULL(); } +Oid binary_need_transform_typeid(Oid typeoid, Oid* collation) +{ + Oid new_typid = typeoid; + if (*collation == BINARY_COLLATION_OID) { + /* use switch case stmt for extension in feature */ + switch (typeoid) { + /* binary type no need to transform */ + case BLOBOID: + break; + /* string type need to transform to binary type */ + case TEXTOID: + new_typid = BLOBOID; + break; + default: + ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("Un-support feature"), + errdetail("type %s cannot be set to binary collation currently", get_typename(typeoid)))); + break; + } + /* binary collation in attribute level collation no need to be set. */ + *collation = InvalidOid; + } + return new_typid; +} + /* * textin - converts "..." to internal representation */ diff --git a/src/gausskernel/storage/access/common/reloptions.cpp b/src/gausskernel/storage/access/common/reloptions.cpp index 922bf7977..662f40695 100644 --- a/src/gausskernel/storage/access/common/reloptions.cpp +++ b/src/gausskernel/storage/access/common/reloptions.cpp @@ -2679,7 +2679,7 @@ void check_collate_in_options(List *user_options) (errmsg("Un-support feature"), errdetail("Forbid to set or change \"%s\" in non-B format", "collate")))); - if (!COLLATION_IN_B_FORMAT(collate)) + if (!COLLATION_IN_B_FORMAT(collate) && collate != DEFAULT_COLLATION_OID) ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("this collation only cannot be specified here"))); tp = SearchSysCache1(COLLOID, ObjectIdGetDatum(collate)); diff --git a/src/include/catalog/gs_utf8_collation.h b/src/include/catalog/gs_utf8_collation.h index 918b3d914..6aa774a48 100644 --- a/src/include/catalog/gs_utf8_collation.h +++ b/src/include/catalog/gs_utf8_collation.h @@ -31,4 +31,9 @@ Oid binary_need_transform_typeid(Oid typeoid, Oid* collation); int matchtext_utf8mb4(unsigned char* t, int tlen, unsigned char* p, int plen); bool is_b_format_collation(Oid collation); +#define IS_UTF8_GENERAL_COLLATION(colloid) \ + ((colloid == UTF8MB4_GENERAL_CI_COLLATION_OID) || \ + (colloid == UTF8MB4_UNICODE_CI_COLLATION_OID) || \ + (colloid == UTF8_GENERAL_CI_COLLATION_OID) || \ + (colloid == UTF8_UNICODE_CI_COLLATION_OID)) #endif /* GS_UTF8_COLLATION_H */ \ No newline at end of file diff --git a/src/include/catalog/pg_collation.h b/src/include/catalog/pg_collation.h index 3dff57ed2..8ced78a2a 100644 --- a/src/include/catalog/pg_collation.h +++ b/src/include/catalog/pg_collation.h @@ -99,7 +99,17 @@ DESCR("utf8mb4_unicode_ci collation"); DATA(insert OID = 1539 (utf8mb4_bin PGNSP PGUID 7 "utf8mb4_bin" "utf8mb4_bin" "PAD SPACE" _null_)); DESCR("utf8mb4_bin collation"); #define UTF8MB4_BIN_COLLATION_OID 1539 -/* GB10830's start with 1536 */ + +DATA(insert OID = 1551 (utf8_general_ci PGNSP PGUID 7 "utf8_general_ci" "utf8_general_ci" "PAD SPACE" _null_)); +DESCR("utf8_general_ci collation"); +#define UTF8_GENERAL_CI_COLLATION_OID 1551 +DATA(insert OID = 1552 (utf8_unicode_ci PGNSP PGUID 7 "utf8_unicode_ci" "utf8_unicode_ci" "PAD SPACE" _null_)); +DESCR("utf8_unicode_ci collation"); +#define UTF8_UNICODE_CI_COLLATION_OID 1552 +DATA(insert OID = 1553 (utf8_bin PGNSP PGUID 7 "utf8_bin" "utf8_bin" "PAD SPACE" _null_)); +DESCR("utf8_bin collation"); +#define UTF8_BIN_COLLATION_OID 1553 +/* GB10830's start with 1792 */ #define B_FORMAT_COLLATION_OID_MAX 10000 diff --git a/src/include/catalog/pg_type.h b/src/include/catalog/pg_type.h index 898874457..2409238f3 100644 --- a/src/include/catalog/pg_type.h +++ b/src/include/catalog/pg_type.h @@ -860,9 +860,6 @@ DATA(insert OID = 3272 ( anyset PGNSP PGUID -1 f s H t t \054 0 0 0 anyset_in ((typid) == BYTEAWITHOUTORDERCOLOID || \ (typid) == BYTEAWITHOUTORDERWITHEQUALCOLOID) -#define IsBinaryType(typid) \ - ((typid) == BLOBOID) - #define IsSupportCharsetType(typid) \ (((typid) == TEXTOID) || \ ((typid) == VARCHAROID) || \ diff --git a/src/include/catalog/upgrade_sql/rollback_catalog_maindb/rollback-post_catalog_maindb_92_844.sql b/src/include/catalog/upgrade_sql/rollback_catalog_maindb/rollback-post_catalog_maindb_92_844.sql index cbab56539..572eecbbc 100644 --- a/src/include/catalog/upgrade_sql/rollback_catalog_maindb/rollback-post_catalog_maindb_92_844.sql +++ b/src/include/catalog/upgrade_sql/rollback_catalog_maindb/rollback-post_catalog_maindb_92_844.sql @@ -10,7 +10,7 @@ query_str_nodes text; BEGIN query_str_nodes := 'select * from dbe_perf.node_name'; FOR row_name IN EXECUTE(query_str_nodes) LOOP - delete from pg_catalog.pg_collation where collname in ('utf8mb4_general_ci', 'utf8mb4_unicode_ci', 'utf8mb4_bin', 'binary'); + delete from pg_catalog.pg_collation where collname in ('utf8mb4_general_ci', 'utf8mb4_unicode_ci', 'utf8mb4_bin', 'binary', 'utf8_general_ci', 'utf8_unicode_ci', 'utf8_bin'); END LOOP; return; END; diff --git a/src/include/catalog/upgrade_sql/rollback_catalog_otherdb/rollback-post_catalog_otherdb_92_844.sql b/src/include/catalog/upgrade_sql/rollback_catalog_otherdb/rollback-post_catalog_otherdb_92_844.sql index cbab56539..572eecbbc 100644 --- a/src/include/catalog/upgrade_sql/rollback_catalog_otherdb/rollback-post_catalog_otherdb_92_844.sql +++ b/src/include/catalog/upgrade_sql/rollback_catalog_otherdb/rollback-post_catalog_otherdb_92_844.sql @@ -10,7 +10,7 @@ query_str_nodes text; BEGIN query_str_nodes := 'select * from dbe_perf.node_name'; FOR row_name IN EXECUTE(query_str_nodes) LOOP - delete from pg_catalog.pg_collation where collname in ('utf8mb4_general_ci', 'utf8mb4_unicode_ci', 'utf8mb4_bin', 'binary'); + delete from pg_catalog.pg_collation where collname in ('utf8mb4_general_ci', 'utf8mb4_unicode_ci', 'utf8mb4_bin', 'binary', 'utf8_general_ci', 'utf8_unicode_ci', 'utf8_bin'); END LOOP; return; END; diff --git a/src/include/catalog/upgrade_sql/upgrade_catalog_maindb/upgrade-post_catalog_maindb_92_844.sql b/src/include/catalog/upgrade_sql/upgrade_catalog_maindb/upgrade-post_catalog_maindb_92_844.sql index a44bd4c18..8d26cdc60 100644 --- a/src/include/catalog/upgrade_sql/upgrade_catalog_maindb/upgrade-post_catalog_maindb_92_844.sql +++ b/src/include/catalog/upgrade_sql/upgrade_catalog_maindb/upgrade-post_catalog_maindb_92_844.sql @@ -56,6 +56,15 @@ select pg_catalog.Insert_pg_collation_temp('utf8mb4_unicode_ci', 11, 10, 7, 'utf SET LOCAL inplace_upgrade_next_system_object_oids = IUO_GENERAL, 1539; select pg_catalog.Insert_pg_collation_temp('utf8mb4_bin', 11, 10, 7, 'utf8mb4_bin', 'utf8mb4_bin', 'PAD SPACE', null); +SET LOCAL inplace_upgrade_next_system_object_oids = IUO_GENERAL, 1551; +select pg_catalog.Insert_pg_collation_temp('utf8_general_ci', 11, 10, 7, 'utf8_general_ci', 'utf8_general_ci', 'PAD SPACE', null); + +SET LOCAL inplace_upgrade_next_system_object_oids = IUO_GENERAL, 1552; +select pg_catalog.Insert_pg_collation_temp('utf8_unicode_ci', 11, 10, 7, 'utf8_unicode_ci', 'utf8_unicode_ci', 'PAD SPACE', null); + +SET LOCAL inplace_upgrade_next_system_object_oids = IUO_GENERAL, 1553; +select pg_catalog.Insert_pg_collation_temp('utf8_bin', 11, 10, 7, 'utf8_bin', 'utf8_bin', 'PAD SPACE', null); + DROP FUNCTION pg_catalog.Insert_pg_collation_temp; SET LOCAL inplace_upgrade_next_system_object_oids = IUO_CATALOG, false, true, 0, 0, 0, 3147; diff --git a/src/include/catalog/upgrade_sql/upgrade_catalog_otherdb/upgrade-post_catalog_otherdb_92_844.sql b/src/include/catalog/upgrade_sql/upgrade_catalog_otherdb/upgrade-post_catalog_otherdb_92_844.sql index a44bd4c18..8d26cdc60 100644 --- a/src/include/catalog/upgrade_sql/upgrade_catalog_otherdb/upgrade-post_catalog_otherdb_92_844.sql +++ b/src/include/catalog/upgrade_sql/upgrade_catalog_otherdb/upgrade-post_catalog_otherdb_92_844.sql @@ -56,6 +56,15 @@ select pg_catalog.Insert_pg_collation_temp('utf8mb4_unicode_ci', 11, 10, 7, 'utf SET LOCAL inplace_upgrade_next_system_object_oids = IUO_GENERAL, 1539; select pg_catalog.Insert_pg_collation_temp('utf8mb4_bin', 11, 10, 7, 'utf8mb4_bin', 'utf8mb4_bin', 'PAD SPACE', null); +SET LOCAL inplace_upgrade_next_system_object_oids = IUO_GENERAL, 1551; +select pg_catalog.Insert_pg_collation_temp('utf8_general_ci', 11, 10, 7, 'utf8_general_ci', 'utf8_general_ci', 'PAD SPACE', null); + +SET LOCAL inplace_upgrade_next_system_object_oids = IUO_GENERAL, 1552; +select pg_catalog.Insert_pg_collation_temp('utf8_unicode_ci', 11, 10, 7, 'utf8_unicode_ci', 'utf8_unicode_ci', 'PAD SPACE', null); + +SET LOCAL inplace_upgrade_next_system_object_oids = IUO_GENERAL, 1553; +select pg_catalog.Insert_pg_collation_temp('utf8_bin', 11, 10, 7, 'utf8_bin', 'utf8_bin', 'PAD SPACE', null); + DROP FUNCTION pg_catalog.Insert_pg_collation_temp; SET LOCAL inplace_upgrade_next_system_object_oids = IUO_CATALOG, false, true, 0, 0, 0, 3147; diff --git a/src/include/parser/parse_type.h b/src/include/parser/parse_type.h index aca30d082..0280ee644 100644 --- a/src/include/parser/parse_type.h +++ b/src/include/parser/parse_type.h @@ -58,5 +58,6 @@ extern bool IsTypeTableInInstallationGroup(const Type type_tup); extern HeapTuple FindPkgVariableType(ParseState* pstate, const TypeName* typname, int32* typmod_p); extern char* CastPackageTypeName(const char* typName, Oid pkgOid, bool isPackage, bool isPublic = true); #define ISCOMPLEX(typeid) (typeidTypeRelid(typeid) != InvalidOid) +extern bool IsBinaryType(Oid typid); #endif /* PARSE_TYPE_H */ diff --git a/src/test/regress/expected/test_b_format_collate.out b/src/test/regress/expected/test_b_format_collate.out index 05bd77ccc..1fb53a07e 100644 --- a/src/test/regress/expected/test_b_format_collate.out +++ b/src/test/regress/expected/test_b_format_collate.out @@ -1802,6 +1802,382 @@ select * from pg_get_tabledef('test25'); WITH (orientation=row, compression=no); (1 row) +--test utf8 collate +select 'abCdEf' = 'abcdef' collate "utf8_general_ci"; + ?column? +---------- + t +(1 row) + +select 'abCdEf' != 'abcdef' collate "utf8_general_ci"; + ?column? +---------- + f +(1 row) + +select 'abCdEf' > 'abcdef' collate "utf8_general_ci"; + ?column? +---------- + f +(1 row) + +select 'abCdEf' < 'abcdef' collate "utf8_general_ci"; + ?column? +---------- + f +(1 row) + +select 'abCdEf' = 'abcdef' collate "utf8_unicode_ci"; + ?column? +---------- + t +(1 row) + +select 'abCdEf' != 'abcdef' collate "utf8_unicode_ci"; + ?column? +---------- + f +(1 row) + +select 'abCdEf' > 'abcdef' collate "utf8_unicode_ci"; + ?column? +---------- + f +(1 row) + +select 'abCdEf' < 'abcdef' collate "utf8_unicode_ci"; + ?column? +---------- + f +(1 row) + +select 'abCdEf' = 'abcdef' collate "utf8_bin"; + ?column? +---------- + f +(1 row) + +select 'abCdEf' > 'abcdef' collate "utf8_bin"; + ?column? +---------- + f +(1 row) + +select 'abCdEf' < 'abcdef' collate "utf8_bin"; + ?column? +---------- + t +(1 row) + +drop table if exists column_collate; +create table column_collate(f1 text collate "utf8_general_ci", f2 char(15) collate "utf8_bin", f3 text collate 'utf8_unicode_ci'); +insert into column_collate values('S','S','S'),('s','s','s'),('ś','ś','ś'),('Š','Š','Š'),('z','z','z'),('Z','Z','Z'),('c','c','c'),('A','A','A'),('C','C','C'); +insert into column_collate values('AaA','AaA','AaA'),('bb','bb','bb'),('aAA','aAA','aAA'),('Bb','Bb','Bb'),('dD','dd','dd'),('Cc','Cc','Cc'),('AAA','AAA','AAA'); +insert into column_collate values('A1中文','A1中文','A1中文'), ('b1中文','b1中文','b1中文'), ('a2中文','a2中文','a2中文'), +('B2中文','B2中文','B2中文'), ('中文d1','中文d1','中文d1'), ('中文C1','中文C1','中文C1'), ('中文A3','中文A3','中文A3'); +-- test where clause +select f1 from column_collate where f1 = 's'; + f1 +---- + S + s + ś + Š +(4 rows) + +select f1 from column_collate where f1 = 'aaa'; + f1 +----- + AaA + aAA + AAA +(3 rows) + +select f2 from column_collate where f2 = 's'; + f2 +----------------- + s +(1 row) + +select f2 from column_collate where f2 = 'aaa'; + f2 +---- +(0 rows) + +select f2 from column_collate where f3 = 's'; + f2 +----------------- + S + s + ś + Š +(4 rows) + +select f2 from column_collate where f3 = 'aaa'; + f2 +----------------- + AaA + aAA + AAA +(3 rows) + +-- test order by clause +select f1 from column_collate order by f1; + f1 +-------- + A + A1中文 + a2中文 + AaA + AAA + aAA + b1中文 + B2中文 + Bb + bb + c + C + Cc + dD + S + s + ś + Š + Z + z + 中文A3 + 中文C1 + 中文d1 +(23 rows) + +select f2 from column_collate order by f2; + f2 +----------------- + A + A1中文 + AAA + AaA + B2中文 + Bb + C + Cc + S + Z + a2中文 + aAA + b1中文 + bb + c + dd + s + z + ś + Š + 中文A3 + 中文C1 + 中文d1 +(23 rows) + +select f3 from column_collate order by f3; + f3 +-------- + A + A1中文 + a2中文 + AaA + AAA + aAA + b1中文 + B2中文 + Bb + bb + c + C + Cc + dd + S + s + ś + Š + Z + z + 中文A3 + 中文C1 + 中文d1 +(23 rows) + +-- test distinct clause +insert into column_collate values ('AbcdEf','AbcdEf','AbcdEf'), ('abcdEF','abcdEF','abcdEF'), ('中文AbCdEFG','中文AbCdEFG','中文AbCdEFG'), +('中文abcdEFG','中文abcdEFG','中文abcdEFG'), ('中文Ab','中文Ab','中文Ab'), ('中文ab','中文ab','中文ab'); +select distinct f1 from column_collate order by f1 limit 10; + f1 +-------- + A + A1中文 + a2中文 + AaA + AbcdEf + b1中文 + B2中文 + bb + c + Cc +(10 rows) + +select distinct f2 from column_collate order by f2 limit 10; + f2 +----------------- + A + A1中文 + AAA + AaA + AbcdEf + B2中文 + Bb + C + Cc + S +(10 rows) + +select distinct f3 from column_collate order by f3 limit 10; + f3 +-------- + A + A1中文 + a2中文 + AaA + AbcdEf + b1中文 + B2中文 + bb + c + Cc +(10 rows) + +-- test group by +select count(f1),f1 from column_collate group by f1 order by f1 limit 10; + count | f1 +-------+-------- + 1 | A + 1 | A1中文 + 1 | a2中文 + 3 | AaA + 2 | AbcdEf + 1 | b1中文 + 1 | B2中文 + 2 | bb + 2 | c + 1 | Cc +(10 rows) + +select count(f2),f2 from column_collate group by f2 order by f2 limit 10; + count | f2 +-------+----------------- + 1 | A + 1 | A1中文 + 1 | AAA + 1 | AaA + 1 | AbcdEf + 1 | B2中文 + 1 | Bb + 1 | C + 1 | Cc + 1 | S +(10 rows) + +select count(f3),f3 from column_collate group by f3 order by f3 limit 10; + count | f3 +-------+-------- + 1 | A + 1 | A1中文 + 1 | a2中文 + 3 | AaA + 2 | AbcdEf + 1 | b1中文 + 1 | B2中文 + 2 | bb + 2 | c + 1 | Cc +(10 rows) + +-- test like +select f1 from column_collate where f1 like 'A_%'; + f1 +-------- + AaA + aAA + AAA + A1中文 + a2中文 + AbcdEf + abcdEF +(7 rows) + +select f1 from column_collate where f1 like '%s%'; + f1 +---- + S + s + ś + Š +(4 rows) + +select f1 from column_collate where f1 like 'A%f'; + f1 +-------- + AbcdEf + abcdEF +(2 rows) + +select f2 from column_collate where f2 like 'A_%'; + f2 +----------------- + A + AaA + AAA + A1中文 + AbcdEf +(5 rows) + +select f2 from column_collate where f2 like '%s%'; + f2 +----------------- + s +(1 row) + +select f2 from column_collate where f2 like 'A%f'; + f2 +---- +(0 rows) + +select f3 from column_collate where f3 like 'A_%'; + f3 +-------- + AaA + aAA + AAA + A1中文 + a2中文 + AbcdEf + abcdEF +(7 rows) + +select f3 from column_collate where f3 like '%s%'; + f3 +---- + S + s + ś + Š +(4 rows) + +select f3 from column_collate where f3 like 'A%f'; + f3 +-------- + AbcdEf + abcdEF +(2 rows) + \c regression clean connection to all force for database test_collate_A; clean connection to all force for database test_collate_B; diff --git a/src/test/regress/sql/test_b_format_collate.sql b/src/test/regress/sql/test_b_format_collate.sql index acc282d1a..f7aeae451 100644 --- a/src/test/regress/sql/test_b_format_collate.sql +++ b/src/test/regress/sql/test_b_format_collate.sql @@ -486,6 +486,60 @@ select * from pg_get_tabledef('test24'); create table test25 as select @v1 collate 'utf8mb4_bin', @v2; select * from pg_get_tabledef('test25'); +--test utf8 collate +select 'abCdEf' = 'abcdef' collate "utf8_general_ci"; +select 'abCdEf' != 'abcdef' collate "utf8_general_ci"; +select 'abCdEf' > 'abcdef' collate "utf8_general_ci"; +select 'abCdEf' < 'abcdef' collate "utf8_general_ci"; +select 'abCdEf' = 'abcdef' collate "utf8_unicode_ci"; +select 'abCdEf' != 'abcdef' collate "utf8_unicode_ci"; +select 'abCdEf' > 'abcdef' collate "utf8_unicode_ci"; +select 'abCdEf' < 'abcdef' collate "utf8_unicode_ci"; +select 'abCdEf' = 'abcdef' collate "utf8_bin"; +select 'abCdEf' > 'abcdef' collate "utf8_bin"; +select 'abCdEf' < 'abcdef' collate "utf8_bin"; +drop table if exists column_collate; +create table column_collate(f1 text collate "utf8_general_ci", f2 char(15) collate "utf8_bin", f3 text collate 'utf8_unicode_ci'); +insert into column_collate values('S','S','S'),('s','s','s'),('ś','ś','ś'),('Š','Š','Š'),('z','z','z'),('Z','Z','Z'),('c','c','c'),('A','A','A'),('C','C','C'); +insert into column_collate values('AaA','AaA','AaA'),('bb','bb','bb'),('aAA','aAA','aAA'),('Bb','Bb','Bb'),('dD','dd','dd'),('Cc','Cc','Cc'),('AAA','AAA','AAA'); +insert into column_collate values('A1中文','A1中文','A1中文'), ('b1中文','b1中文','b1中文'), ('a2中文','a2中文','a2中文'), +('B2中文','B2中文','B2中文'), ('中文d1','中文d1','中文d1'), ('中文C1','中文C1','中文C1'), ('中文A3','中文A3','中文A3'); +-- test where clause +select f1 from column_collate where f1 = 's'; +select f1 from column_collate where f1 = 'aaa'; +select f2 from column_collate where f2 = 's'; +select f2 from column_collate where f2 = 'aaa'; +select f2 from column_collate where f3 = 's'; +select f2 from column_collate where f3 = 'aaa'; + +-- test order by clause +select f1 from column_collate order by f1; +select f2 from column_collate order by f2; +select f3 from column_collate order by f3; + +-- test distinct clause +insert into column_collate values ('AbcdEf','AbcdEf','AbcdEf'), ('abcdEF','abcdEF','abcdEF'), ('中文AbCdEFG','中文AbCdEFG','中文AbCdEFG'), +('中文abcdEFG','中文abcdEFG','中文abcdEFG'), ('中文Ab','中文Ab','中文Ab'), ('中文ab','中文ab','中文ab'); +select distinct f1 from column_collate order by f1 limit 10; +select distinct f2 from column_collate order by f2 limit 10; +select distinct f3 from column_collate order by f3 limit 10; + +-- test group by +select count(f1),f1 from column_collate group by f1 order by f1 limit 10; +select count(f2),f2 from column_collate group by f2 order by f2 limit 10; +select count(f3),f3 from column_collate group by f3 order by f3 limit 10; + +-- test like +select f1 from column_collate where f1 like 'A_%'; +select f1 from column_collate where f1 like '%s%'; +select f1 from column_collate where f1 like 'A%f'; +select f2 from column_collate where f2 like 'A_%'; +select f2 from column_collate where f2 like '%s%'; +select f2 from column_collate where f2 like 'A%f'; +select f3 from column_collate where f3 like 'A_%'; +select f3 from column_collate where f3 like '%s%'; +select f3 from column_collate where f3 like 'A%f'; + \c regression clean connection to all force for database test_collate_A; clean connection to all force for database test_collate_B;