[api-change] add soft limit of String type length (#8567)

1. add a config string_type_soft_limit to soft limit max length of string type
2. disable using String type in Key column, partition column and
   distribution column
3. remove String type alias BLOB for futrue use
This commit is contained in:
Zhengguo Yang
2022-03-25 09:28:41 +08:00
committed by GitHub
parent 5511d435de
commit cfb57be731
22 changed files with 129 additions and 66 deletions

View File

@ -4578,8 +4578,6 @@ type ::=
{: RESULT = ScalarType.createStringType(); :}
| KW_TEXT
{: RESULT = ScalarType.createStringType(); :}
| KW_BLOB
{: RESULT = ScalarType.createStringType(); :}
| KW_VARCHAR LPAREN INTEGER_LITERAL:len RPAREN
{: ScalarType type = ScalarType.createVarcharType(len.intValue());
type.setAssignedStrLenInColDefinition();

View File

@ -215,7 +215,10 @@ public class ColumnDef {
throw new AnalysisException("Array type column default value only support null");
}
}
if (isKey() && type.getPrimitiveType() == PrimitiveType.STRING) {
throw new AnalysisException("String Type should not be used in key column[" + getName()
+ "].");
}
if (type.getPrimitiveType() == PrimitiveType.MAP) {
if (defaultValue.isSet && defaultValue != DefaultValue.NULL_DEFAULT_VALUE) {
throw new AnalysisException("Map type column default value just support null");

View File

@ -295,7 +295,8 @@ public class CreateTableStmt extends DdlStmt {
}
if (hasAggregate) {
for (ColumnDef columnDef : columnDefs) {
if (columnDef.getAggregateType() == null) {
if (columnDef.getAggregateType() == null
&& !columnDef.getType().isScalarType(PrimitiveType.STRING)) {
keysColumnNames.add(columnDef.getName());
}
}
@ -314,6 +315,9 @@ public class CreateTableStmt extends DdlStmt {
if (columnDef.getType().isFloatingPointType()) {
break;
}
if (columnDef.getType().getPrimitiveType() == PrimitiveType.STRING) {
break;
}
if (columnDef.getType().getPrimitiveType() == PrimitiveType.VARCHAR) {
keysColumnNames.add(columnDef.getName());
break;
@ -323,8 +327,8 @@ public class CreateTableStmt extends DdlStmt {
// The OLAP table must has at least one short key and the float and double should not be short key.
// So the float and double could not be the first column in OLAP table.
if (keysColumnNames.isEmpty()) {
throw new AnalysisException("The olap table first column could not be float or double,"
+ " use decimal instead.");
throw new AnalysisException("The olap table first column could not be float, double, string"
+ " use decimal or varchar instead.");
}
keysDesc = new KeysDesc(KeysType.DUP_KEYS, keysColumnNames);
}
@ -409,7 +413,7 @@ public class CreateTableStmt extends DdlStmt {
if (distributionDesc == null) {
throw new AnalysisException("Create olap table should contain distribution desc");
}
distributionDesc.analyze(columnSet);
distributionDesc.analyze(columnSet, columnDefs);
} else if (engineName.equalsIgnoreCase("elasticsearch")) {
EsUtil.analyzePartitionAndDistributionDesc(partitionDesc, distributionDesc);
} else {

View File

@ -40,7 +40,7 @@ public class DistributionDesc implements Writable {
}
public void analyze(Set<String> colSet) throws AnalysisException {
public void analyze(Set<String> colSet, List<ColumnDef> columnDefs) throws AnalysisException {
throw new NotImplementedException();
}

View File

@ -22,11 +22,13 @@ import org.apache.doris.catalog.Column;
import org.apache.doris.catalog.DistributionInfo;
import org.apache.doris.catalog.DistributionInfo.DistributionInfoType;
import org.apache.doris.catalog.HashDistributionInfo;
import org.apache.doris.catalog.PrimitiveType;
import org.apache.doris.common.AnalysisException;
import org.apache.doris.common.DdlException;
import org.apache.doris.common.io.Text;
import com.google.common.collect.Lists;
import com.google.common.collect.Sets;
import java.io.DataInput;
import java.io.DataOutput;
@ -58,17 +60,29 @@ public class HashDistributionDesc extends DistributionDesc {
}
@Override
public void analyze(Set<String> cols) throws AnalysisException {
public void analyze(Set<String> colSet, List<ColumnDef> columnDefs) throws AnalysisException {
if (numBucket <= 0) {
throw new AnalysisException("Number of hash distribution should be larger than zero.");
}
if (distributionColumnNames == null || distributionColumnNames.size() == 0) {
throw new AnalysisException("Number of hash column should be larger than zero.");
}
Set<String> distColSet = Sets.newTreeSet(String.CASE_INSENSITIVE_ORDER);
for (String columnName : distributionColumnNames) {
if (!cols.contains(columnName)) {
if (!colSet.contains(columnName)) {
throw new AnalysisException("Distribution column(" + columnName + ") doesn't exist.");
}
if (!distColSet.add(columnName)) {
throw new AnalysisException("Duplicated distribution column " + columnName);
}
for (ColumnDef columnDef : columnDefs) {
if (columnDef.getName().equals(columnName)) {
if (columnDef.getType().isScalarType(PrimitiveType.STRING)) {
throw new AnalysisException("String Type should not be used in distribution column["
+ columnDef.getName() + "].");
}
}
}
}
}

View File

@ -21,6 +21,7 @@ import org.apache.doris.catalog.AggregateType;
import org.apache.doris.catalog.Column;
import org.apache.doris.catalog.PartitionInfo;
import org.apache.doris.catalog.PartitionType;
import org.apache.doris.catalog.PrimitiveType;
import org.apache.doris.common.AnalysisException;
import org.apache.doris.common.DdlException;
import org.apache.doris.qe.ConnectContext;
@ -78,6 +79,10 @@ public class PartitionDesc {
if (columnDef.getType().isFloatingPointType()) {
throw new AnalysisException("Floating point type column can not be partition column");
}
if (columnDef.getType().isScalarType(PrimitiveType.STRING)) {
throw new AnalysisException("String Type should not be used in partition column["
+ columnDef.getName() + "].");
}
if (!ConnectContext.get().getSessionVariable().isAllowPartitionColumnNullable()
&& columnDef.isAllowNull()) {
throw new AnalysisException("The partition column must be NOT NULL");

View File

@ -42,7 +42,7 @@ public class RandomDistributionDesc extends DistributionDesc {
}
@Override
public void analyze(Set<String> colSet) throws AnalysisException {
public void analyze(Set<String> colSet, List<ColumnDef> columnDefs) throws AnalysisException {
if (numBucket <= 0) {
throw new AnalysisException("Number of random distribution should be larger than zero.");
}

View File

@ -198,7 +198,6 @@ public class ScalarType extends Type {
return createVarcharType();
case "STRING":
case "TEXT":
case "BLOB":
return createStringType();
case "HLL":
return createHllType();

View File

@ -22,6 +22,7 @@ import org.apache.doris.analysis.CreateTableStmt;
import org.apache.doris.analysis.ExplainOptions;
import org.apache.doris.analysis.Expr;
import org.apache.doris.catalog.Catalog;
import org.apache.doris.common.AnalysisException;
import org.apache.doris.qe.ConnectContext;
import org.apache.doris.qe.StmtExecutor;
import org.apache.doris.utframe.UtFrameUtils;
@ -31,7 +32,9 @@ import org.apache.commons.lang3.StringUtils;
import org.junit.After;
import org.junit.Assert;
import org.junit.BeforeClass;
import org.junit.Rule;
import org.junit.Test;
import org.junit.rules.ExpectedException;
import java.io.File;
import java.util.List;
@ -41,6 +44,9 @@ public class PlannerTest {
private static String runningDir = "fe/mocked/DemoTest/" + UUID.randomUUID().toString() + "/";
private static ConnectContext ctx;
@Rule
public ExpectedException expectedEx = ExpectedException.none();
@After
public void tearDown() throws Exception {
FileUtils.deleteDirectory(new File(runningDir));
@ -434,4 +440,13 @@ public class PlannerTest {
compare.accept("select * from db1.tbl2 where k1 > 2.1", "select * from db1.tbl2 where k1 > 2");
}
@Test
public void testStringType() throws Exception {
String createTbl1 = "create table db1.tbl1(k1 string, k2 varchar(32), k3 varchar(32), k4 int) "
+ "AGGREGATE KEY(k1, k2,k3,k4) distributed by hash(k1) buckets 3 properties('replication_num' = '1')";
expectedEx.expect(AnalysisException.class);
expectedEx.expectMessage("String Type should not be used in key column[k1].");
UtFrameUtils.parseAndAnalyzeStmt(createTbl1, ctx);
}
}