From 8fc9d804790efdd6d97a112ecb58a2bc42bd65ed Mon Sep 17 00:00:00 2001 From: morrySnow <101034200+morrySnow@users.noreply.github.com> Date: Wed, 21 Feb 2024 16:40:26 +0800 Subject: [PATCH] [compatibility](MySQL) update charset to utf8mb4, collation to utf8mb4_0900_bin (#31046) Doris's behaviour is more like utf8mb4 and utf8mb4_0900_bin than utf8 and utf8_general_ci --- .../schema_scanner/schema_charsets_scanner.cpp | 2 +- .../schema_scanner/schema_collations_scanner.cpp | 2 +- .../schema_scanner/schema_schemata_scanner.cpp | 4 ++-- .../java/org/apache/doris/qe/SessionVariable.java | 14 +++++++------- .../java/org/apache/doris/qe/ShowExecutor.java | 10 +++++----- .../org/apache/doris/analysis/SelectStmtTest.java | 2 +- .../show_p0/test_show_create_table_and_views.out | 2 +- 7 files changed, 18 insertions(+), 18 deletions(-) diff --git a/be/src/exec/schema_scanner/schema_charsets_scanner.cpp b/be/src/exec/schema_scanner/schema_charsets_scanner.cpp index 1b2b8a1558..9bd7ad7919 100644 --- a/be/src/exec/schema_scanner/schema_charsets_scanner.cpp +++ b/be/src/exec/schema_scanner/schema_charsets_scanner.cpp @@ -39,7 +39,7 @@ std::vector SchemaCharsetsScanner::_s_css_columns = { }; SchemaCharsetsScanner::CharsetStruct SchemaCharsetsScanner::_s_charsets[] = { - {"utf8", "utf8_general_ci", "UTF-8 Unicode", 3}, + {"utf8mb4", "utf8mb4_0900_bin", "UTF-8 Unicode", 4}, {nullptr, nullptr, nullptr, 0}, }; diff --git a/be/src/exec/schema_scanner/schema_collations_scanner.cpp b/be/src/exec/schema_scanner/schema_collations_scanner.cpp index 377cff6948..812a8cff18 100644 --- a/be/src/exec/schema_scanner/schema_collations_scanner.cpp +++ b/be/src/exec/schema_scanner/schema_collations_scanner.cpp @@ -41,7 +41,7 @@ std::vector SchemaCollationsScanner::_s_cols_columns }; SchemaCollationsScanner::CollationStruct SchemaCollationsScanner::_s_collations[] = { - {"utf8_general_ci", "utf8", 33, "Yes", "Yes", 1}, + {"utf8mb4_0900_bin", "utf8mb4", 309, "Yes", "Yes", 1}, {nullptr, nullptr, 0, nullptr, nullptr, 0}, }; diff --git a/be/src/exec/schema_scanner/schema_schemata_scanner.cpp b/be/src/exec/schema_scanner/schema_schemata_scanner.cpp index a465ab6550..d6e82f611e 100644 --- a/be/src/exec/schema_scanner/schema_schemata_scanner.cpp +++ b/be/src/exec/schema_scanner/schema_schemata_scanner.cpp @@ -127,7 +127,7 @@ Status SchemaSchemataScanner::_fill_block_impl(vectorized::Block* block) { } // DEFAULT_CHARACTER_SET_NAME { - std::string src = "utf8"; + std::string src = "utf8mb4"; StringRef str = StringRef(src.c_str(), src.size()); for (int i = 0; i < dbs_num; ++i) { datas[i] = &str; @@ -136,7 +136,7 @@ Status SchemaSchemataScanner::_fill_block_impl(vectorized::Block* block) { } // DEFAULT_COLLATION_NAME { - std::string src = "utf8_general_ci"; + std::string src = "utf8mb4_0900_bin"; StringRef str = StringRef(src.c_str(), src.size()); for (int i = 0; i < dbs_num; ++i) { datas[i] = &str; diff --git a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java index 4c8a500682..4b4d108650 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java +++ b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java @@ -650,20 +650,20 @@ public class SessionVariable implements Serializable, Writable { // this is used to make c3p0 library happy @VariableMgr.VarAttr(name = CHARACTER_SET_CLIENT) - public String charsetClient = "utf8"; + public String charsetClient = "utf8mb4"; @VariableMgr.VarAttr(name = CHARACTER_SET_CONNNECTION) - public String charsetConnection = "utf8"; + public String charsetConnection = "utf8mb4"; @VariableMgr.VarAttr(name = CHARACTER_SET_RESULTS) - public String charsetResults = "utf8"; + public String charsetResults = "utf8mb4"; @VariableMgr.VarAttr(name = CHARACTER_SET_SERVER) - public String charsetServer = "utf8"; + public String charsetServer = "utf8mb4"; @VariableMgr.VarAttr(name = COLLATION_CONNECTION) - public String collationConnection = "utf8_general_ci"; + public String collationConnection = "utf8mb4_0900_bin"; @VariableMgr.VarAttr(name = COLLATION_DATABASE) - public String collationDatabase = "utf8_general_ci"; + public String collationDatabase = "utf8mb4_0900_bin"; @VariableMgr.VarAttr(name = COLLATION_SERVER) - public String collationServer = "utf8_general_ci"; + public String collationServer = "utf8mb4_0900_bin"; // this is used to make c3p0 library happy @VariableMgr.VarAttr(name = SQL_AUTO_IS_NULL) diff --git a/fe/fe-core/src/main/java/org/apache/doris/qe/ShowExecutor.java b/fe/fe-core/src/main/java/org/apache/doris/qe/ShowExecutor.java index c030dfbc66..58b96cde26 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/qe/ShowExecutor.java +++ b/fe/fe-core/src/main/java/org/apache/doris/qe/ShowExecutor.java @@ -1035,7 +1035,7 @@ public class ShowExecutor { } if (table instanceof View) { - rows.add(Lists.newArrayList(table.getName(), createTableStmt.get(0), "utf8", "utf8_general_ci")); + rows.add(Lists.newArrayList(table.getName(), createTableStmt.get(0), "utf8mb4", "utf8mb4_0900_bin")); resultSet = new ShowResultSet(ShowCreateTableStmt.getViewMetaData(), rows); } else { if (showStmt.isView()) { @@ -1661,10 +1661,10 @@ public class ShowExecutor { ShowCollationStmt showStmt = (ShowCollationStmt) stmt; List> rows = Lists.newArrayList(); List row = Lists.newArrayList(); - // | utf8_general_ci | utf8 | 33 | Yes | Yes | 1 | - row.add("utf8_general_ci"); - row.add("utf8"); - row.add("33"); + // | utf8mb4_0900_bin | utf8mb4 | 309 | Yes | Yes | 1 | + row.add("utf8mb4_0900_bin"); + row.add("utf8mb4"); + row.add("309"); row.add("Yes"); row.add("Yes"); row.add("1"); diff --git a/fe/fe-core/src/test/java/org/apache/doris/analysis/SelectStmtTest.java b/fe/fe-core/src/test/java/org/apache/doris/analysis/SelectStmtTest.java index 76bbdec5fd..4fa49376f7 100755 --- a/fe/fe-core/src/test/java/org/apache/doris/analysis/SelectStmtTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/analysis/SelectStmtTest.java @@ -479,7 +479,7 @@ public class SelectStmtTest { + "character_set_name,\n" - + "is_default collate utf8_general_ci = 'Yes' as is_default\n" + + "is_default collate utf8mb4_0900_bin = 'Yes' as is_default\n" + "from information_schema.collations"; dorisAssert.query(sql).explainQuery(); } diff --git a/regression-test/data/show_p0/test_show_create_table_and_views.out b/regression-test/data/show_p0/test_show_create_table_and_views.out index fe4d8ea9fc..c6b261e5b6 100644 --- a/regression-test/data/show_p0/test_show_create_table_and_views.out +++ b/regression-test/data/show_p0/test_show_create_table_and_views.out @@ -24,7 +24,7 @@ show_create_table_and_views_table CREATE TABLE `show_create_table_and_views_tabl 3 1 -- !show -- -show_create_table_and_views_view CREATE VIEW `show_create_table_and_views_view` COMMENT 'VIEW' AS SELECT `user_id` AS `user_id`, `cost` AS `cost` FROM `show_create_table_and_views_db`.`show_create_table_and_views_table` WHERE (`good_id` = 2); utf8 utf8_general_ci +show_create_table_and_views_view CREATE VIEW `show_create_table_and_views_view` COMMENT 'VIEW' AS SELECT `user_id` AS `user_id`, `cost` AS `cost` FROM `show_create_table_and_views_db`.`show_create_table_and_views_table` WHERE (`good_id` = 2); utf8mb4 utf8mb4_0900_bin -- !select -- 1 47