diff --git a/be/src/exec/odbc_connector.cpp b/be/src/exec/odbc_connector.cpp index 5ca74080df..169b626726 100644 --- a/be/src/exec/odbc_connector.cpp +++ b/be/src/exec/odbc_connector.cpp @@ -48,14 +48,9 @@ static constexpr uint32_t BIG_COLUMN_SIZE_BUFFER = 65535; // Default max buffer size use in insert to: 50MB, normally a batch is smaller than the size static constexpr uint32_t INSERT_BUFFER_SIZE = 1024l * 1024 * 50; -static doris::Status utf8_to_wstring(const std::string& str, std::u16string& out) { - std::wstring_convert, char16_t> utf8_ucs2_cvt; - try { - out = utf8_ucs2_cvt.from_bytes(str); - } catch (std::range_error& e) { - return doris::Status::InternalError("UNICODE out of supported range"); - } - return doris::Status::OK(); +static std::u16string utf8_to_u16string(const char* first, const char* last) { + std::wstring_convert, char16_t> utf8_utf16_cvt; + return utf8_utf16_cvt.from_bytes(first, last); } namespace doris { @@ -133,8 +128,7 @@ Status ODBCConnector::query() { "alloc statement"); // Translate utf8 string to utf16 to use unicode encoding - std::u16string wquery; - RETURN_IF_ERROR(utf8_to_wstring(_sql_str, wquery)); + auto wquery = utf8_to_u16string(_sql_str.c_str(), _sql_str.c_str() + _sql_str.length()); ODBC_DISPOSE(_stmt, SQL_HANDLE_STMT, SQLExecDirectW(_stmt, (SQLWCHAR*)(wquery.c_str()), SQL_NTS), "exec direct"); @@ -313,10 +307,8 @@ Status ODBCConnector::append(const std::string& table_name, RowBatch* batch, } } // Translate utf8 string to utf16 to use unicode encodeing - RETURN_IF_ERROR(utf8_to_wstring( - std::string(_insert_stmt_buffer.data(), - _insert_stmt_buffer.data() + _insert_stmt_buffer.size()), - insert_stmt)); + insert_stmt = utf8_to_u16string(_insert_stmt_buffer.data(), + _insert_stmt_buffer.data() + _insert_stmt_buffer.size()); } { @@ -499,10 +491,8 @@ Status ODBCConnector::append(const std::string& table_name, vectorized::Block* b } } // Translate utf8 string to utf16 to use unicode encodeing - RETURN_IF_ERROR(utf8_to_wstring( - std::string(_insert_stmt_buffer.data(), - _insert_stmt_buffer.data() + _insert_stmt_buffer.size()), - insert_stmt)); + insert_stmt = utf8_to_u16string(_insert_stmt_buffer.data(), + _insert_stmt_buffer.data() + _insert_stmt_buffer.size()); } { diff --git a/docs/en/docs/ecosystem/external-table/odbc-of-doris.md b/docs/en/docs/ecosystem/external-table/odbc-of-doris.md index 2d850f8073..5c3c35595b 100644 --- a/docs/en/docs/ecosystem/external-table/odbc-of-doris.md +++ b/docs/en/docs/ecosystem/external-table/odbc-of-doris.md @@ -381,5 +381,8 @@ This is the compatibility problem between MySQL database ODBC driver and existin Connection to the database fails. The` Err: part` represents the error of different database connection failures. This is usually a configuration problem. You should check whether the IP address, port or account password are mismatched. - + 11. Messy code appears when reading and writing emoji emoji in mysql odbc table + + The default encoding used by Doris when connecting to odbc tables is utf8, since the default utf8 encoding in mysql is utf8mb3, it can't represent the emoji expressions which need 4-byte encoding. Here need to set `charset`=`utf8mb4` when you create odbc mysql tables, then can read and write emoji normally 😀. + diff --git a/docs/zh-CN/docs/ecosystem/external-table/odbc-of-doris.md b/docs/zh-CN/docs/ecosystem/external-table/odbc-of-doris.md index 8d1df916d6..05cb3d81a1 100644 --- a/docs/zh-CN/docs/ecosystem/external-table/odbc-of-doris.md +++ b/docs/zh-CN/docs/ecosystem/external-table/odbc-of-doris.md @@ -371,3 +371,7 @@ sudo alien -i oracle-instantclient19.13-sqlplus-19.13.0.0.0-2.x86_64.rpm 10. 报错`driver connect Err: xxx` 通常是连接数据库失败,Err部分代表了不同的数据库连接失败的报错。这种情况通常是配置存在问题。可以检查是否错配了ip地址,端口或账号密码。 + +11. 读写mysql外表的emoji表情出现乱码 + + Doris进行odbc外表连接时,默认采用的编码为utf8,由于mysql之中默认的utf8编码为utf8mb3,无法表示需要4字节编码的emoji表情。这里需要在建立mysql外表时设置`charset`=`utf8mb4`,便可以正常读写emoji表情😀。 diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/Env.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/Env.java index 53ddeab48d..75208057c5 100755 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/Env.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/Env.java @@ -2954,6 +2954,7 @@ public class Env { sb.append("\"password\" = \"").append(hidePassword ? "" : odbcTable.getPasswd()).append("\",\n"); sb.append("\"driver\" = \"").append(odbcTable.getOdbcDriver()).append("\",\n"); sb.append("\"odbc_type\" = \"").append(odbcTable.getOdbcTableTypeName()).append("\",\n"); + sb.append("\"charest\" = \"").append(odbcTable.getCharset()).append("\",\n"); } else { sb.append("\"odbc_catalog_resource\" = \"").append(odbcTable.getOdbcCatalogResourceName()) .append("\",\n");