branch-2.1: [opt](identifer) let unicode format as a superset of latin format #48078 (#53264)

cherry-picked from #48078
This commit is contained in:
morrySnow
2025-07-16 19:58:58 +08:00
committed by GitHub
parent 83cdd44d6b
commit cc387f362f
2 changed files with 97 additions and 18 deletions

View File

@ -35,16 +35,16 @@ public class FeNameFormat {
private static final String UNDERSCORE_COMMON_NAME_REGEX = "^[_a-zA-Z][a-zA-Z0-9-_]{0,63}$";
private static final String TABLE_NAME_REGEX = "^[a-zA-Z][a-zA-Z0-9-_]*$";
private static final String USER_NAME_REGEX = "^[a-zA-Z][a-zA-Z0-9.-_]*$";
private static final String COLUMN_NAME_REGEX = "^[_a-zA-Z@0-9\\s/][.a-zA-Z0-9_+-/?@#$%^&*\"\\s,:]{0,255}$";
private static final String COLUMN_NAME_REGEX = "^[.a-zA-Z0-9_+-/?@#$%^&*\"\\s,:]{1,256}$";
private static final String REPOSITORY_NAME_REGEX = "^[a-zA-Z][a-zA-Z0-9-_]{0,255}$";
private static final String UNICODE_LABEL_REGEX = "^[-_A-Za-z0-9:\\p{L}]{1,128}$";
private static final String UNICODE_LABEL_REGEX = "^[-_A-Za-z0-9:\\p{L}]{1," + Config.label_regex_length + "}$";
private static final String UNICODE_COMMON_NAME_REGEX = "^[a-zA-Z\\p{L}][a-zA-Z0-9-_\\p{L}]{0,63}$";
private static final String UNICODE_UNDERSCORE_COMMON_NAME_REGEX = "^[_a-zA-Z\\p{L}][a-zA-Z0-9-_\\p{L}]{0,63}$";
private static final String UNICODE_TABLE_NAME_REGEX = "^[a-zA-Z\\p{L}][a-zA-Z0-9-_\\p{L}]*$";
private static final String UNICODE_USER_NAME_REGEX = "^[a-zA-Z\\p{L}][a-zA-Z0-9.-_\\p{L}]*$";
private static final String UNICODE_COLUMN_NAME_REGEX
= "^[_a-zA-Z@0-9\\p{L}][.a-zA-Z0-9_+-/?@#$%^&*\\p{L}]{0,255}$";
= "^[.a-zA-Z0-9_+-/?@#$%^&*\"\\s,:\\p{L}]{1,256}$";
private static final String UNICODE_REPOSITORY_NAME_REGEX = "^[a-zA-Z\\p{L}][a-zA-Z0-9-_\\p{L}]{0,255}$";
public static final String FORBIDDEN_PARTITION_NAME = "placeholder_";

View File

@ -17,26 +17,24 @@
package org.apache.doris.common;
import org.junit.Test;
import org.apache.doris.qe.VariableMgr;
import com.google.common.collect.Lists;
import org.apache.ivy.util.StringUtils;
import org.junit.jupiter.api.Test;
import java.util.List;
public class FeNameFormatTest {
@Test
public void testCheckColumnName() {
void testLabelName() {
// check label use correct regex, begin with '-' is different from others
ExceptionChecker.expectThrowsNoException(() -> FeNameFormat.checkLabel("-lable"));
}
ExceptionChecker.expectThrowsNoException(() -> FeNameFormat.checkColumnName("_id"));
ExceptionChecker.expectThrowsNoException(() -> FeNameFormat.checkColumnName("__id"));
ExceptionChecker.expectThrowsNoException(() -> FeNameFormat.checkColumnName("___id"));
ExceptionChecker.expectThrowsNoException(() -> FeNameFormat.checkColumnName("___id_"));
ExceptionChecker.expectThrowsNoException(() -> FeNameFormat.checkColumnName("@timestamp"));
ExceptionChecker.expectThrowsNoException(() -> FeNameFormat.checkColumnName("@timestamp#"));
ExceptionChecker.expectThrowsNoException(() -> FeNameFormat.checkColumnName("timestamp*"));
ExceptionChecker.expectThrowsNoException(() -> FeNameFormat.checkColumnName("timestamp.1"));
ExceptionChecker.expectThrowsNoException(() -> FeNameFormat.checkColumnName("timestamp.#"));
ExceptionChecker.expectThrows(AnalysisException.class, () -> FeNameFormat.checkColumnName("?id_"));
ExceptionChecker.expectThrows(AnalysisException.class, () -> FeNameFormat.checkColumnName("#id_"));
@Test
void testTableName() {
// length 64
String tblName = "test_sys_partition_list_basic_test_list_partition_bigint_tb-uniq";
ExceptionChecker.expectThrowsNoException(() -> FeNameFormat.checkTableName(tblName));
@ -45,19 +43,100 @@ public class FeNameFormatTest {
ExceptionChecker.expectThrows(AnalysisException.class, () -> FeNameFormat.checkTableName(largeTblName));
// check table name use correct regex, not begin with '-'
ExceptionChecker.expectThrows(AnalysisException.class, () -> FeNameFormat.checkTableName("-" + tblName));
}
@Test
void testCheckColumnName() {
List<String> alwaysValid = Lists.newArrayList(
"_id",
"_id",
"_ id",
" _id",
"__id",
"___id",
"___id_",
"@timestamp",
"@timestamp#",
"timestamp*",
"timestamp.1",
"timestamp.#",
"?id_",
"#id_",
"$id_",
"a-zA-Z0-9.+-/?@#$%^&*\" ,:"
);
List<String> alwaysInvalid = Lists.newArrayList(
// inner column prefix
"mv_",
"mva_",
"__doris_shadow_",
// invalid
"",
"\\",
"column\\",
StringUtils.repeat("a", 257)
);
List<String> unicodeValid = Lists.newArrayList(
"中文",
"語言",
"язык",
"언어",
"لغة",
"ภาษา",
"שפה",
"γλώσσα",
"ენა",
"げんご"
);
boolean defaultUnicode = VariableMgr.getDefaultSessionVariable().enableUnicodeNameSupport;
List<Boolean> enableUnicode = Lists.newArrayList(false, true);
try {
for (Boolean unicode : enableUnicode) {
VariableMgr.getDefaultSessionVariable().setEnableUnicodeNameSupport(unicode);
for (String s : alwaysValid) {
ExceptionChecker.expectThrowsNoException(() -> FeNameFormat.checkColumnName(s));
}
for (String s : alwaysInvalid) {
ExceptionChecker.expectThrows(AnalysisException.class, () -> FeNameFormat.checkColumnName(s));
}
for (String s : unicodeValid) {
if (unicode) {
ExceptionChecker.expectThrowsNoException(() -> FeNameFormat.checkColumnName(s));
} else {
ExceptionChecker.expectThrows(AnalysisException.class, () -> FeNameFormat.checkColumnName(s));
}
}
}
} finally {
VariableMgr.getDefaultSessionVariable().setEnableUnicodeNameSupport(defaultUnicode);
}
}
@Test
void testUserName() {
ExceptionChecker.expectThrowsNoException(() -> FeNameFormat.checkUserName("a.b"));
// check user name use correct regex, not begin with '.'
ExceptionChecker.expectThrows(AnalysisException.class, () -> FeNameFormat.checkUserName(".a.b"));
}
@Test
void testCommonName() {
String commonName = "test_sys_partition_list_basic_test_list_partition_bigint_tb-uniq";
// check common name use correct regex, length 65
ExceptionChecker.expectThrows(AnalysisException.class, () -> FeNameFormat.checkCommonName("fakeType", tblName + "t"));
ExceptionChecker.expectThrows(AnalysisException.class, () -> FeNameFormat.checkCommonName("fakeType", commonName + "t"));
ExceptionChecker.expectThrows(AnalysisException.class, () -> FeNameFormat.checkCommonName("fakeType", "_commonName"));
ExceptionChecker.expectThrowsNoException(() -> FeNameFormat.checkCommonName("fakeType", "common-Name"));
ExceptionChecker.expectThrowsNoException(() -> FeNameFormat.checkCommonName("fakeType", "commonName-"));
}
@Test
void testOutfileName() {
// check success file name prefix
ExceptionChecker.expectThrowsNoException(() -> FeNameFormat.checkOutfileSuccessFileName("fakeType", "_success"));
}
}