[improvement](meta) make database,table,column names to support unicode (replace PR #13467 with this) (#14531)

Make database, table, column and other names support unicode by changing LABEL_REGEX COMMON_NAME_REGIEX COMMON_TABLE_NAME_REGEX COLUMN_NAME_REGEX regular expressions in class FeNameFormat.

P.S. @SharpRay has transfered PR #13467 to me, and I‘m responsible for the task now. There will be some modifications during the review period, so I create a new PR and the original #13467 could be closed. Thanks.
This commit is contained in:
lvliang
2023-02-28 18:50:36 +08:00
committed by GitHub
parent dd4bd3f360
commit 34813bae13
8 changed files with 159 additions and 18 deletions

View File

@ -60,6 +60,7 @@
#include "util/thrift_rpc_helper.h"
#include "util/time.h"
#include "util/uid_util.h"
#include "util/url_coding.h"
namespace doris {
using namespace ErrorCode;
@ -227,8 +228,8 @@ int StreamLoadAction::on_header(HttpRequest* req) {
ctx->load_type = TLoadType::MANUL_LOAD;
ctx->load_src_type = TLoadSourceType::RAW;
ctx->db = req->param(HTTP_DB_KEY);
ctx->table = req->param(HTTP_TABLE_KEY);
url_decode(req->param(HTTP_DB_KEY), &ctx->db);
url_decode(req->param(HTTP_TABLE_KEY), &ctx->table);
ctx->label = req->header(HTTP_LABEL_KEY);
if (ctx->label.empty()) {
ctx->label = generate_uuid_string();

View File

@ -21,35 +21,43 @@ import org.apache.doris.alter.SchemaChangeHandler;
import org.apache.doris.analysis.CreateMaterializedViewStmt;
import org.apache.doris.datasource.InternalCatalog;
import org.apache.doris.mysql.privilege.Role;
import org.apache.doris.qe.ConnectContext;
import org.apache.doris.qe.VariableMgr;
import com.google.common.base.Strings;
public class FeNameFormat {
private static final String LABEL_REGEX = "^[-_A-Za-z0-9]{1,128}$";
private static final String COMMON_NAME_REGEX = "^[a-zA-Z][a-zA-Z0-9_]{0,63}$";
private static final String COMMON_TABLE_NAME_REGEX = "^[a-zA-Z][a-zA-Z0-9_]*$";
private static final String TABLE_NAME_REGEX = "^[a-zA-Z][a-zA-Z0-9_]*$";
private static final String COLUMN_NAME_REGEX = "^[_a-zA-Z@0-9][.a-zA-Z0-9_+-/><?@#$%^&*]{0,255}$";
private static final String UNICODE_LABEL_REGEX = "^[-_A-Za-z0-9\\p{L}]{1,128}$";
private static final String UNICODE_COMMON_NAME_REGEX = "^[a-zA-Z\\p{L}][a-zA-Z0-9_\\p{L}]{0,63}$";
private static final String UNICODE_TABLE_NAME_REGEX = "^[a-zA-Z\\p{L}][a-zA-Z0-9_\\p{L}]*$";
private static final String UNICODE_COLUMN_NAME_REGEX
= "^[_a-zA-Z@0-9\\p{L}][.a-zA-Z0-9_+-/><?@#$%^&*\\p{L}]{0,255}$";
public static final String FORBIDDEN_PARTITION_NAME = "placeholder_";
public static void checkCatalogName(String catalogName) throws AnalysisException {
if (!InternalCatalog.INTERNAL_CATALOG_NAME.equals(catalogName) && (Strings.isNullOrEmpty(catalogName)
|| !catalogName.matches(COMMON_NAME_REGEX))) {
|| !catalogName.matches(getCommonNameRegex()))) {
ErrorReport.reportAnalysisException(ErrorCode.ERR_WRONG_CATALOG_NAME, catalogName);
}
}
public static void checkDbName(String dbName) throws AnalysisException {
if (Strings.isNullOrEmpty(dbName) || !dbName.matches(COMMON_NAME_REGEX)) {
if (Strings.isNullOrEmpty(dbName) || !dbName.matches(getCommonNameRegex())) {
ErrorReport.reportAnalysisException(ErrorCode.ERR_WRONG_DB_NAME, dbName);
}
}
public static void checkTableName(String tableName) throws AnalysisException {
if (Strings.isNullOrEmpty(tableName)
|| !tableName.matches(COMMON_TABLE_NAME_REGEX)) {
|| !tableName.matches(getTableNameRegex())) {
ErrorReport.reportAnalysisException(ErrorCode.ERR_WRONG_TABLE_NAME, tableName,
COMMON_TABLE_NAME_REGEX);
getTableNameRegex());
}
if (tableName.length() > Config.table_name_length_limit) {
ErrorReport.reportAnalysisException(ErrorCode.ERR_TABLE_NAME_LENGTH_LIMIT, tableName,
@ -58,7 +66,7 @@ public class FeNameFormat {
}
public static void checkPartitionName(String partitionName) throws AnalysisException {
if (Strings.isNullOrEmpty(partitionName) || !partitionName.matches(COMMON_NAME_REGEX)) {
if (Strings.isNullOrEmpty(partitionName) || !partitionName.matches(getCommonNameRegex())) {
ErrorReport.reportAnalysisException(ErrorCode.ERR_WRONG_PARTITION_NAME, partitionName);
}
@ -68,13 +76,13 @@ public class FeNameFormat {
}
public static void checkColumnName(String columnName) throws AnalysisException {
if (Strings.isNullOrEmpty(columnName) || !columnName.matches(COLUMN_NAME_REGEX)) {
if (Strings.isNullOrEmpty(columnName) || !columnName.matches(getColumnNameRegex())) {
ErrorReport.reportAnalysisException(ErrorCode.ERR_WRONG_COLUMN_NAME,
columnName, FeNameFormat.COLUMN_NAME_REGEX);
columnName, getColumnNameRegex());
}
if (columnName.startsWith(SchemaChangeHandler.SHADOW_NAME_PREFIX)) {
ErrorReport.reportAnalysisException(ErrorCode.ERR_WRONG_COLUMN_NAME,
columnName, FeNameFormat.COLUMN_NAME_REGEX);
columnName, getColumnNameRegex());
}
if (columnName.startsWith(CreateMaterializedViewStmt.MATERIALIZED_VIEW_NAME_PREFIX)) {
ErrorReport.reportAnalysisException(ErrorCode.ERR_WRONG_COLUMN_NAME,
@ -87,19 +95,19 @@ public class FeNameFormat {
}
public static void checkLabel(String label) throws AnalysisException {
if (Strings.isNullOrEmpty(label) || !label.matches(LABEL_REGEX)) {
throw new AnalysisException("Label format error. regex: " + LABEL_REGEX + ", label: " + label);
if (Strings.isNullOrEmpty(label) || !label.matches(getLabelRegex())) {
throw new AnalysisException("Label format error. regex: " + getLabelRegex() + ", label: " + label);
}
}
public static void checkUserName(String userName) throws AnalysisException {
if (Strings.isNullOrEmpty(userName) || !userName.matches(COMMON_NAME_REGEX)) {
if (Strings.isNullOrEmpty(userName) || !userName.matches(getCommonNameRegex())) {
throw new AnalysisException("invalid user name: " + userName);
}
}
public static void checkRoleName(String role, boolean canBeAdmin, String errMsg) throws AnalysisException {
if (Strings.isNullOrEmpty(role) || !role.matches(COMMON_NAME_REGEX)) {
if (Strings.isNullOrEmpty(role) || !role.matches(getCommonNameRegex())) {
throw new AnalysisException("invalid role format: " + role);
}
@ -121,12 +129,50 @@ public class FeNameFormat {
}
public static void checkCommonName(String type, String name) throws AnalysisException {
if (Strings.isNullOrEmpty(name) || !name.matches(COMMON_NAME_REGEX)) {
if (Strings.isNullOrEmpty(name) || !name.matches(getCommonNameRegex())) {
ErrorReport.reportAnalysisException(ErrorCode.ERR_WRONG_NAME_FORMAT, type, name);
}
}
private static boolean isEnableUnicodeNameSupport() {
boolean unicodeSupport;
if (ConnectContext.get() != null) {
unicodeSupport = ConnectContext.get().getSessionVariable().isEnableUnicodeNameSupport();
} else {
unicodeSupport = VariableMgr.getDefaultSessionVariable().isEnableUnicodeNameSupport();
}
return unicodeSupport;
}
public static String getColumnNameRegex() {
return COLUMN_NAME_REGEX;
if (FeNameFormat.isEnableUnicodeNameSupport()) {
return UNICODE_COLUMN_NAME_REGEX;
} else {
return COLUMN_NAME_REGEX;
}
}
public static String getTableNameRegex() {
if (FeNameFormat.isEnableUnicodeNameSupport()) {
return UNICODE_TABLE_NAME_REGEX;
} else {
return TABLE_NAME_REGEX;
}
}
public static String getLabelRegex() {
if (FeNameFormat.isEnableUnicodeNameSupport()) {
return UNICODE_LABEL_REGEX;
} else {
return LABEL_REGEX;
}
}
public static String getCommonNameRegex() {
if (FeNameFormat.isEnableUnicodeNameSupport()) {
return UNICODE_COMMON_NAME_REGEX;
} else {
return COMMON_NAME_REGEX;
}
}
}

View File

@ -258,6 +258,9 @@ public class SessionVariable implements Serializable, Writable {
public static final String ENABLE_SHARE_HASH_TABLE_FOR_BROADCAST_JOIN
= "enable_share_hash_table_for_broadcast_join";
// support unicode in label, table, column, common name check
public static final String ENABLE_UNICODE_NAME_SUPPORT = "enable_unicode_name_support";
public static final String REPEAT_MAX_NUM = "repeat_max_num";
public static final String GROUP_CONCAT_MAX_LEN = "group_concat_max_len";
@ -700,7 +703,11 @@ public class SessionVariable implements Serializable, Writable {
@VariableMgr.VarAttr(name = ENABLE_SHARE_HASH_TABLE_FOR_BROADCAST_JOIN, fuzzy = true)
public boolean enableShareHashTableForBroadcastJoin = true;
@VariableMgr.VarAttr(name = ENABLE_UNICODE_NAME_SUPPORT)
public boolean enableUnicodeNameSupport = false;
@VariableMgr.VarAttr(name = REPEAT_MAX_NUM, needForward = true)
public int repeatMaxNum = 10000;
@VariableMgr.VarAttr(name = GROUP_CONCAT_MAX_LEN)
@ -1501,6 +1508,14 @@ public class SessionVariable implements Serializable, Writable {
this.fragmentTransmissionCompressionCodec = codec;
}
public boolean isEnableUnicodeNameSupport() {
return enableUnicodeNameSupport;
}
public void setEnableUnicodeNameSupport(boolean enableUnicodeNameSupport) {
this.enableUnicodeNameSupport = enableUnicodeNameSupport;
}
public boolean isDropTableIfCtasFailed() {
return dropTableIfCtasFailed;
}

View File

@ -19,6 +19,7 @@ package org.apache.doris.analysis;
import org.apache.doris.common.AnalysisException;
import org.apache.doris.common.ExceptionChecker;
import org.apache.doris.common.FeNameFormat;
import org.apache.doris.utframe.TestWithFeService;
import org.junit.jupiter.api.Test;
@ -42,7 +43,7 @@ public class CreateViewStmtTest extends TestWithFeService {
connectContext.setDatabase("default_cluster:test");
String createViewStr1 = "create view 1view1 as select k1,k2 from test.table1;";
ExceptionChecker.expectThrowsWithMsg(AnalysisException.class,
"Incorrect table name '1view1'. Table name regex is '^[a-zA-Z][a-zA-Z0-9_]*$'",
String.format("Incorrect table name '1view1'. Table name regex is '%s'", FeNameFormat.getTableNameRegex()),
() -> parseAndAnalyzeStmt(createViewStr1, connectContext));
String createViewStr2 = "create view view2 as select k1,k2 from test.table1;";

View File

@ -23,6 +23,9 @@ public class FeNameFormatTest {
@Test
public void testCheckColumnName() {
// check label use correct regex, begin with '-' is different from others
ExceptionChecker.expectThrowsNoException(() -> FeNameFormat.checkLabel("-lable"));
ExceptionChecker.expectThrowsNoException(() -> FeNameFormat.checkColumnName("_id"));
ExceptionChecker.expectThrowsNoException(() -> FeNameFormat.checkColumnName("__id"));
ExceptionChecker.expectThrowsNoException(() -> FeNameFormat.checkColumnName("___id"));
@ -40,6 +43,13 @@ public class FeNameFormatTest {
// length 70
String largeTblName = "test_sys_partition_list_basic_test_list_partition_bigint_tb_uniq_large";
ExceptionChecker.expectThrows(AnalysisException.class, () -> FeNameFormat.checkTableName(largeTblName));
// check table name use correct regex, not begin with '-'
ExceptionChecker.expectThrows(AnalysisException.class, () -> FeNameFormat.checkTableName("-" + tblName));
// check common name use correct regex, length 65
ExceptionChecker.expectThrows(AnalysisException.class, () -> FeNameFormat.checkCommonName("fakeType", tblName + "t"));
ExceptionChecker.expectThrows(AnalysisException.class, () -> FeNameFormat.checkCommonName("fakeType", "_commonName"));
}
}

View File

@ -30,6 +30,7 @@ import org.apache.doris.mysql.privilege.PrivPredicate;
import org.apache.doris.mysql.privilege.Privilege;
import org.apache.doris.mysql.privilege.Role;
import org.apache.doris.qe.ConnectContext;
import org.apache.doris.qe.SessionVariable;
import mockit.Expectations;
import mockit.Mocked;
@ -55,6 +56,9 @@ public class LdapPrivsCheckerTest {
@Mocked
private ConnectContext context;
@Mocked
private SessionVariable sessionVariable;
@Mocked
private Env env;
@ -128,8 +132,15 @@ public class LdapPrivsCheckerTest {
context.getCurrentUserIdentity();
minTimes = 0;
result = userIdentity;
context.getSessionVariable();
minTimes = 0;
result = sessionVariable;
}
};
// call the mocked method before replay
// for there is exception in tests: Missing 1 invocation to: org.apache.doris.qe.ConnectContext#get()
ConnectContext.get().getSessionVariable().isEnableUnicodeNameSupport();
}
@Test

View File

@ -0,0 +1,4 @@
-- This file is automatically generated. You should know what you did if you want to edit this
-- !select --
中文表名_1 CREATE TABLE `中文表名_1` (\n `字符串_字段名_test` varchar(150) NULL,\n `时间_字段名_test` datetime NULL\n) ENGINE=OLAP\nUNIQUE KEY(`字符串_字段名_test`)\nCOMMENT 'OLAP'\nDISTRIBUTED BY HASH(`字符串_字段名_test`) BUCKETS 3\nPROPERTIES (\n"replication_allocation" = "tag.location.default: 1",\n"in_memory" = "false",\n"storage_format" = "V2",\n"light_schema_change" = "true",\n"disable_auto_compaction" = "false"\n);

View File

@ -0,0 +1,53 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
suite("test_unicode_name") {
try {
sql """
set enable_unicode_name_support = true
"""
sql """
CREATE DATABASE IF NOT EXISTS `中文库名`
"""
sql """
CREATE TABLE IF NOT EXISTS `中文表名` (
`字符串_字段名_test` varchar(150) NULL,
`时间_字段名_test` datetime NULL
) ENGINE=OLAP
UNIQUE KEY(`字符串_字段名_test`)
DISTRIBUTED BY HASH(`字符串_字段名_test`) BUCKETS 3
PROPERTIES (
"replication_allocation" = "tag.location.default: 1",
"in_memory" = "false",
"storage_format" = "V2"
)
"""
sql """
CREATE TABLE IF NOT EXISTS `中文表名_1` LIKE `中文表名`
"""
qt_select """SHOW CREATE TABLE `中文表名_1`"""
} finally {
sql """ DROP DATABASE IF EXISTS `中文库名` """
sql """ DROP TABLE IF EXISTS `中文表名` """
sql """ DROP TABLE IF EXISTS `中文表名_1` """
}
}