[feature-wip](array) remove array config and check array nested depth (#13428)
1. remove FE config `enable_array_type` 2. limit the nested depth of array in FE side. 3. Fix bug that when loading array from parquet, the decimal type is treated as bigint 4. Fix loading array from csv(vec-engine), handle null and "null" 5. Change the csv array loading behavior, if the array string format is invalid in csv, it will be converted to null. 6. Remove `check_array_format()`, because it's logic is wrong and meaningless 7. Add stream load csv test cases and more parquet broker load tests
This commit is contained in:
@ -267,7 +267,7 @@ public class ColumnDef {
|
||||
if (type.getPrimitiveType() == PrimitiveType.ARRAY) {
|
||||
if (isKey()) {
|
||||
throw new AnalysisException("Array can only be used in the non-key column of"
|
||||
+ " the duplicate table at present.");
|
||||
+ " the duplicate table at present.");
|
||||
}
|
||||
if (defaultValue.isSet && defaultValue != DefaultValue.NULL_DEFAULT_VALUE) {
|
||||
throw new AnalysisException("Array type column default value only support null");
|
||||
|
||||
@ -328,6 +328,9 @@ public class CreateTableStmt extends DdlStmt {
|
||||
if (columnDef.getType().getPrimitiveType() == PrimitiveType.JSONB) {
|
||||
break;
|
||||
}
|
||||
if (columnDef.getType().isCollectionType()) {
|
||||
break;
|
||||
}
|
||||
if (columnDef.getType().getPrimitiveType() == PrimitiveType.VARCHAR) {
|
||||
keysColumnNames.add(columnDef.getName());
|
||||
break;
|
||||
@ -393,9 +396,6 @@ public class CreateTableStmt extends DdlStmt {
|
||||
columnDef.analyze(engineName.equals("olap"));
|
||||
|
||||
if (columnDef.getType().isArrayType()) {
|
||||
if (!Config.enable_array_type) {
|
||||
throw new AnalysisException("Please open enable_array_type config before use Array.");
|
||||
}
|
||||
if (columnDef.getAggregateType() != null && columnDef.getAggregateType() != AggregateType.NONE) {
|
||||
throw new AnalysisException("Array column can't support aggregation "
|
||||
+ columnDef.getAggregateType());
|
||||
|
||||
@ -33,6 +33,8 @@ import java.util.Objects;
|
||||
*/
|
||||
public class ArrayType extends Type {
|
||||
|
||||
public static final int MAX_NESTED_DEPTH = 9;
|
||||
|
||||
@SerializedName(value = "itemType")
|
||||
private Type itemType;
|
||||
|
||||
|
||||
@ -46,12 +46,8 @@ import java.util.List;
|
||||
* as abstract methods that subclasses must implement.
|
||||
*/
|
||||
public abstract class Type {
|
||||
// Maximum nesting depth of a type. This limit was determined experimentally by
|
||||
// org.apache.doris.rewrite.FoldConstantsRule.apply generating and scanning
|
||||
// deeply nested Parquet and Avro files. In those experiments, we exceeded
|
||||
// the stack space in the scanner (which uses recursion for dealing with
|
||||
// nested types) at a nesting depth between 200 and 300 (200 worked, 300 crashed).
|
||||
public static int MAX_NESTING_DEPTH = 2;
|
||||
// Currently only support Array type with max 9 depths.
|
||||
public static int MAX_NESTING_DEPTH = 9;
|
||||
|
||||
// Static constant types for scalar types that don't require additional information.
|
||||
public static final ScalarType INVALID = new ScalarType(PrimitiveType.INVALID_TYPE);
|
||||
@ -488,7 +484,7 @@ public abstract class Type {
|
||||
} else if (t1.isArrayType() && t2.isArrayType()) {
|
||||
return ArrayType.canCastTo((ArrayType) t1, (ArrayType) t2);
|
||||
}
|
||||
return t1.isNull() || t1.getPrimitiveType() == PrimitiveType.VARCHAR;
|
||||
return t1.isNull() || t1.getPrimitiveType().isCharFamily();
|
||||
}
|
||||
|
||||
/**
|
||||
@ -612,7 +608,7 @@ public abstract class Type {
|
||||
* MAP<STRING,STRUCT<f1:INT>> --> 3
|
||||
*/
|
||||
private boolean exceedsMaxNestingDepth(int d) {
|
||||
if (d >= MAX_NESTING_DEPTH) {
|
||||
if (d > MAX_NESTING_DEPTH) {
|
||||
return true;
|
||||
}
|
||||
if (isStructType()) {
|
||||
@ -623,7 +619,9 @@ public abstract class Type {
|
||||
}
|
||||
}
|
||||
} else if (isArrayType()) {
|
||||
return false;
|
||||
ArrayType arrayType = (ArrayType) this;
|
||||
Type itemType = arrayType.getItemType();
|
||||
return itemType.exceedsMaxNestingDepth(d + 1);
|
||||
} else if (isMultiRowType()) {
|
||||
MultiRowType multiRowType = (MultiRowType) this;
|
||||
return multiRowType.getItemType().exceedsMaxNestingDepth(d + 1);
|
||||
|
||||
@ -25,7 +25,6 @@ import org.apache.doris.catalog.PrimitiveType;
|
||||
import org.apache.doris.catalog.ScalarType;
|
||||
import org.apache.doris.catalog.Type;
|
||||
import org.apache.doris.common.AnalysisException;
|
||||
import org.apache.doris.common.Config;
|
||||
import org.apache.doris.qe.ConnectContext;
|
||||
|
||||
import mockit.Mock;
|
||||
@ -47,7 +46,6 @@ public class ColumnDefTest {
|
||||
stringCol = new TypeDef(ScalarType.createChar(10));
|
||||
floatCol = new TypeDef(ScalarType.createType(PrimitiveType.FLOAT));
|
||||
booleanCol = new TypeDef(ScalarType.createType(PrimitiveType.BOOLEAN));
|
||||
Config.enable_array_type = true;
|
||||
|
||||
ctx = new ConnectContext(null);
|
||||
new MockUp<ConnectContext>() {
|
||||
|
||||
@ -21,7 +21,6 @@ import org.apache.doris.catalog.ArrayType;
|
||||
import org.apache.doris.catalog.Env;
|
||||
import org.apache.doris.catalog.PrimitiveType;
|
||||
import org.apache.doris.common.AnalysisException;
|
||||
import org.apache.doris.common.Config;
|
||||
import org.apache.doris.common.ExceptionChecker;
|
||||
import org.apache.doris.common.util.SqlParserUtils;
|
||||
import org.apache.doris.qe.ConnectContext;
|
||||
@ -44,7 +43,6 @@ public class InsertArrayStmtTest {
|
||||
public static void setUp() throws Exception {
|
||||
UtFrameUtils.createDorisCluster(RUNNING_DIR);
|
||||
connectContext = UtFrameUtils.createDefaultCtx();
|
||||
Config.enable_array_type = true;
|
||||
createDatabase("create database test;");
|
||||
}
|
||||
|
||||
|
||||
@ -45,7 +45,6 @@ public class CreateTableTest {
|
||||
@BeforeClass
|
||||
public static void beforeClass() throws Exception {
|
||||
Config.disable_storage_medium_check = true;
|
||||
Config.enable_array_type = true;
|
||||
UtFrameUtils.createDorisCluster(runningDir);
|
||||
|
||||
// create connect context
|
||||
@ -569,5 +568,43 @@ public class CreateTableTest {
|
||||
createTable("create table test.table2(k1 INT, k2 Array<Array<int>>) duplicate key (k1) "
|
||||
+ "distributed by hash(k1) buckets 1 properties('replication_num' = '1');");
|
||||
});
|
||||
ExceptionChecker.expectThrowsNoException(() -> {
|
||||
createTable("CREATE TABLE test.table3 (\n"
|
||||
+ " `k1` INT(11) NULL COMMENT \"\",\n"
|
||||
+ " `k2` ARRAY<ARRAY<SMALLINT>> NULL COMMENT \"\",\n"
|
||||
+ " `k3` ARRAY<ARRAY<ARRAY<INT(11)>>> NULL COMMENT \"\",\n"
|
||||
+ " `k4` ARRAY<ARRAY<ARRAY<ARRAY<BIGINT>>>> NULL COMMENT \"\",\n"
|
||||
+ " `k5` ARRAY<ARRAY<ARRAY<ARRAY<ARRAY<CHAR>>>>> NULL COMMENT \"\",\n"
|
||||
+ " `k6` ARRAY<ARRAY<ARRAY<ARRAY<ARRAY<ARRAY<VARCHAR(20)>>>>>> NULL COMMENT \"\",\n"
|
||||
+ " `k7` ARRAY<ARRAY<ARRAY<ARRAY<ARRAY<ARRAY<ARRAY<DATE>>>>>>> NULL COMMENT \"\",\n"
|
||||
+ " `k8` ARRAY<ARRAY<ARRAY<ARRAY<ARRAY<ARRAY<ARRAY<ARRAY<DATETIME>>>>>>>> NULL COMMENT \"\",\n"
|
||||
+ " `k11` ARRAY<ARRAY<ARRAY<ARRAY<ARRAY<ARRAY<ARRAY<ARRAY<ARRAY<DECIMAL(20, 6)>>>>>>>>> NULL COMMENT \"\"\n"
|
||||
+ ") ENGINE=OLAP\n"
|
||||
+ "DUPLICATE KEY(`k1`)\n"
|
||||
+ "DISTRIBUTED BY HASH(`k1`) BUCKETS 3\n"
|
||||
+ "PROPERTIES (\n"
|
||||
+ "\"replication_allocation\" = \"tag.location.default: 1\"\n"
|
||||
+ ");");
|
||||
});
|
||||
ExceptionChecker.expectThrowsWithMsg(AnalysisException.class, "Type exceeds the maximum nesting depth of 9",
|
||||
() -> {
|
||||
createTable("CREATE TABLE test.table4 (\n"
|
||||
+ " `k1` INT(11) NULL COMMENT \"\",\n"
|
||||
+ " `k2` ARRAY<ARRAY<ARRAY<ARRAY<ARRAY<ARRAY<ARRAY<ARRAY<ARRAY<ARRAY<DECIMAL(20, 6)>>>>>>>>>> NULL COMMENT \"\"\n"
|
||||
+ ") ENGINE=OLAP\n"
|
||||
+ "DUPLICATE KEY(`k1`)\n"
|
||||
+ "DISTRIBUTED BY HASH(`k1`) BUCKETS 3\n"
|
||||
+ "PROPERTIES (\n"
|
||||
+ "\"replication_allocation\" = \"tag.location.default: 1\"\n"
|
||||
+ ");");
|
||||
});
|
||||
|
||||
ExceptionChecker.expectThrowsNoException(() -> {
|
||||
createTable("create table test.table5(\n"
|
||||
+ "\tk1 int,\n"
|
||||
+ "\tv1 array<int>\n"
|
||||
+ ") distributed by hash(k1) buckets 1\n"
|
||||
+ "properties(\"replication_num\" = \"1\");");
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
@ -21,7 +21,6 @@ import org.apache.doris.analysis.AlterViewStmt;
|
||||
import org.apache.doris.analysis.CreateDbStmt;
|
||||
import org.apache.doris.analysis.CreateTableStmt;
|
||||
import org.apache.doris.analysis.CreateViewStmt;
|
||||
import org.apache.doris.common.Config;
|
||||
import org.apache.doris.common.DdlException;
|
||||
import org.apache.doris.common.ExceptionChecker;
|
||||
import org.apache.doris.qe.ConnectContext;
|
||||
@ -44,7 +43,6 @@ public class CreateViewTest {
|
||||
@BeforeClass
|
||||
public static void beforeClass() throws Exception {
|
||||
UtFrameUtils.createDorisCluster(runningDir);
|
||||
Config.enable_array_type = true;
|
||||
// create connect context
|
||||
connectContext = UtFrameUtils.createDefaultCtx();
|
||||
// create database
|
||||
|
||||
Reference in New Issue
Block a user