From 18f850c94f33aad8a4a632e9d323bcffda635a73 Mon Sep 17 00:00:00 2001 From: zclllyybb Date: Thu, 11 Jan 2024 13:11:52 +0800 Subject: [PATCH] [enhance](auto-partition) forbid null column for auto partition (#29749) --- .../docs/advanced/partition/auto-partition.md | 9 +-- docs/en/docs/data-table/data-partition.md | 1 + .../docs/advanced/partition/auto-partition.md | 9 +-- docs/zh-CN/docs/advanced/variables.md | 2 +- docs/zh-CN/docs/data-table/data-partition.md | 1 + .../apache/doris/analysis/PartitionDesc.java | 7 ++- .../plans/commands/info/CreateTableInfo.java | 19 +++++- .../org/apache/doris/qe/SessionVariable.java | 6 +- .../analysis/RangePartitionPruneTest.java | 2 +- .../service/FrontendServiceImplTest.java | 4 +- .../test_auto_partition_behavior.groovy | 62 +++++++++++++++++++ .../test_auto_partition_load.groovy | 2 +- .../test_auto_range_partition.groovy | 2 +- 13 files changed, 109 insertions(+), 17 deletions(-) diff --git a/docs/en/docs/advanced/partition/auto-partition.md b/docs/en/docs/advanced/partition/auto-partition.md index 398135ecd9..25715b2c87 100644 --- a/docs/en/docs/advanced/partition/auto-partition.md +++ b/docs/en/docs/advanced/partition/auto-partition.md @@ -43,8 +43,8 @@ Suppose our table DDL is as follows: ```sql CREATE TABLE `DAILY_TRADE_VALUE` ( - `TRADE_DATE` datev2 NULL COMMENT '交易日期', - `TRADE_ID` varchar(40) NULL COMMENT '交易编号', + `TRADE_DATE` datev2 NOT NULL COMMENT '交易日期', + `TRADE_ID` varchar(40) NOT NULL COMMENT '交易编号', ...... ) UNIQUE KEY(`TRADE_DATE`, `TRADE_ID`) @@ -145,6 +145,7 @@ When building a table, use the following syntax to populate [CREATE-TABLE](../.. 2. In AUTO RANGE PARTITION, the partition function supports only `date_trunc` and the partition column supports only `DATEV2` or `DATETIMEV2` format; 3. In AUTO LIST PARTITION, function calls are not supported. Partitioned columns support `BOOLEAN`, `TINYINT`, `SMALLINT`, `INT`, `BIGINT`, `LARGEINT`, `DATE`, `DATETIME`, `CHAR`, `VARCHAR` data-types, and partitioned values are enum values. 4. In AUTO LIST PARTITION, a separate new PARTITION is created for each fetch of a partition column for which the corresponding partition does not currently exist. +5. The partition column for AUTO PARTITION must be a NOT NULL column. ## Sample Scenarios @@ -153,8 +154,8 @@ In the example in the Usage Scenarios section, the table DDL can be rewritten af ```sql CREATE TABLE `DAILY_TRADE_VALUE` ( - `TRADE_DATE` datev2 NULL, - `TRADE_ID` varchar(40) NULL, + `TRADE_DATE` datev2 NOT NULL, + `TRADE_ID` varchar(40) NOT NULL, ...... ) UNIQUE KEY(`TRADE_DATE`, `TRADE_ID`) diff --git a/docs/en/docs/data-table/data-partition.md b/docs/en/docs/data-table/data-partition.md index 3bcce73402..84f7ae01c0 100644 --- a/docs/en/docs/data-table/data-partition.md +++ b/docs/en/docs/data-table/data-partition.md @@ -146,6 +146,7 @@ It is also possible to use one layer of data partitioning, If you do not write t 1. Partition * You can specify one or more columns as the partitioning columns, but they have to be KEY columns. The usage of multi-column partitions is described further below. + * Range Partition supports the use of NULL partition columns when `allowPartitionColumnNullable` is `true`. List Partition never supports NULL partition columns. * Regardless of the type of the partitioning columns, double quotes are required for partition values. * There is no theoretical limit on the number of partitions. * If users create a table without specifying the partitions, the system will automatically generate a Partition with the same name as the table. This Partition contains all data in the table and is neither visible to users nor modifiable. diff --git a/docs/zh-CN/docs/advanced/partition/auto-partition.md b/docs/zh-CN/docs/advanced/partition/auto-partition.md index c998b38db4..476e0d2980 100644 --- a/docs/zh-CN/docs/advanced/partition/auto-partition.md +++ b/docs/zh-CN/docs/advanced/partition/auto-partition.md @@ -43,8 +43,8 @@ under the License. ```sql CREATE TABLE `DAILY_TRADE_VALUE` ( - `TRADE_DATE` datev2 NULL COMMENT '交易日期', - `TRADE_ID` varchar(40) NULL COMMENT '交易编号', + `TRADE_DATE` datev2 NOT NULL COMMENT '交易日期', + `TRADE_ID` varchar(40) NOT NULL COMMENT '交易编号', ...... ) UNIQUE KEY(`TRADE_DATE`, `TRADE_ID`) @@ -145,6 +145,7 @@ PROPERTIES ( 2. 在AUTO RANGE PARTITION中,分区函数仅支持`date_trunc`,分区列仅支持`DATEV2`或者`DATETIMEV2`格式; 3. 在AUTO LIST PARTITION中,不支持函数调用,分区列支持 `BOOLEAN`, `TINYINT`, `SMALLINT`, `INT`, `BIGINT`, `LARGEINT`, `DATE`, `DATETIME`, `CHAR`, `VARCHAR` 数据类型,分区值为枚举值。 4. 在AUTO LIST PARTITION中,分区列的每个当前不存在对应分区的取值,都会创建一个独立的新PARTITION。 +5. 自动分区的分区列必须为 NOT NULL 列。 ## 场景示例 @@ -153,8 +154,8 @@ PROPERTIES ( ```sql CREATE TABLE `DAILY_TRADE_VALUE` ( - `TRADE_DATE` datev2 NULL COMMENT '交易日期', - `TRADE_ID` varchar(40) NULL COMMENT '交易编号', + `TRADE_DATE` datev2 NOT NULL COMMENT '交易日期', + `TRADE_ID` varchar(40) NOT NULL COMMENT '交易编号', ...... ) UNIQUE KEY(`TRADE_DATE`, `TRADE_ID`) diff --git a/docs/zh-CN/docs/advanced/variables.md b/docs/zh-CN/docs/advanced/variables.md index 5d32681192..9c042d5049 100644 --- a/docs/zh-CN/docs/advanced/variables.md +++ b/docs/zh-CN/docs/advanced/variables.md @@ -583,7 +583,7 @@ try (Connection conn = DriverManager.getConnection("jdbc:mysql://127.0.0.1:9030/ CREATE USER user1 IDENTIFIED BY "12345" PASSWORD_HISTORY DEFAULT; ALTER USER user1 PASSWORD_HISTORY DEFAULT; ``` - + * `validate_password_policy` 密码强度校验策略。默认为 `NONE` 或 `0`,即不做校验。可以设置为 `STRONG` 或 `2`。当设置为 `STRONG` 或 `2` 时,通过 `ALTER USER` 或 `SET PASSWORD` 命令设置密码时,密码必须包含“大写字母”,“小写字母”,“数字”和“特殊字符”中的3项,并且长度必须大于等于8。特殊字符包括:`~!@#$%^&*()_+|<>,.?/:;'[]{}"`。 diff --git a/docs/zh-CN/docs/data-table/data-partition.md b/docs/zh-CN/docs/data-table/data-partition.md index 03a570821c..a392f0dd07 100644 --- a/docs/zh-CN/docs/data-table/data-partition.md +++ b/docs/zh-CN/docs/data-table/data-partition.md @@ -148,6 +148,7 @@ Doris 支持两层的数据划分。第一层是 Partition,支持 Range 和 Li 1. **Partition** - Partition 列可以指定一列或多列,分区列必须为 KEY 列。多列分区的使用方式在后面 **多列分区** 小结介绍。 + - 当 `allowPartitionColumnNullable` 为 `true` 时,Range Partition 支持使用 NULL 分区列。List Partition 始终不支持 NULL 分区列。 - 不论分区列是什么类型,在写分区值时,都需要加双引号。 - 分区数量理论上没有上限。 - 当不使用 Partition 建表时,系统会自动生成一个和表名同名的,全值范围的 Partition。该 Partition 对用户不可见,并且不可删改。 diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/PartitionDesc.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/PartitionDesc.java index fc572cb443..9bc2920281 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/PartitionDesc.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/PartitionDesc.java @@ -190,9 +190,14 @@ public class PartitionDesc { throw new AnalysisException("Complex type column can't be partition column: " + columnDef.getType().toString()); } + // prohibit to create auto partition with null column anyhow + if (this.isAutoCreatePartitions && columnDef.isAllowNull()) { + throw new AnalysisException("The auto partition column must be NOT NULL"); + } if (!ConnectContext.get().getSessionVariable().isAllowPartitionColumnNullable() && columnDef.isAllowNull()) { - throw new AnalysisException("The partition column must be NOT NULL"); + throw new AnalysisException( + "The partition column must be NOT NULL with allow_partition_column_nullable OFF"); } if (this instanceof ListPartitionDesc && columnDef.isAllowNull()) { throw new AnalysisException("The list partition column must be NOT NULL"); diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/info/CreateTableInfo.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/info/CreateTableInfo.java index a03f5486a8..f8a1428731 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/info/CreateTableInfo.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/info/CreateTableInfo.java @@ -138,6 +138,7 @@ public class CreateTableInfo { this.autoPartitionExprs = autoPartitionExprs; this.partitionType = partitionType; this.partitionColumns = partitionColumns; + appendColumnFromExprs(); this.partitions = partitions; this.distribution = distribution; this.rollups = Utils.copyRequiredList(rollups); @@ -173,6 +174,7 @@ public class CreateTableInfo { this.autoPartitionExprs = autoPartitionExprs; this.partitionType = partitionType; this.partitionColumns = partitionColumns; + appendColumnFromExprs(); this.partitions = partitions; this.distribution = distribution; this.rollups = Utils.copyRequiredList(rollups); @@ -650,8 +652,13 @@ public class CreateTableInfo { throw new AnalysisException("Complex type column can't be partition column: " + column.getType().toString()); } + // prohibit to create auto partition with null column anyhow + if (this.isAutoPartition && column.isNullable()) { + throw new AnalysisException("The auto partition column must be NOT NULL"); + } if (!ctx.getSessionVariable().isAllowPartitionColumnNullable() && column.isNullable()) { - throw new AnalysisException("The partition column must be NOT NULL"); + throw new AnalysisException( + "The partition column must be NOT NULL with allow_partition_column_nullable OFF"); } if (partitionType.equalsIgnoreCase(PartitionType.LIST.name()) && column.isNullable()) { throw new AnalysisException("The list partition column must be NOT NULL"); @@ -882,4 +889,14 @@ public class CreateTableInfo { } }).collect(Collectors.toList()); } + + private void appendColumnFromExprs() { + for (Expression autoExpr : autoPartitionExprs) { + for (Expression child : autoExpr.children()) { + if (child instanceof UnboundSlot) { + partitionColumns.add(((UnboundSlot) child).getName()); + } + } + } + } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java index f84414e27e..d2f05fc66a 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java +++ b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java @@ -767,7 +767,11 @@ public class SessionVariable implements Serializable, Writable { @VariableMgr.VarAttr(name = SHOW_HIDDEN_COLUMNS, flag = VariableMgr.SESSION_ONLY) public boolean showHiddenColumns = false; - @VariableMgr.VarAttr(name = ALLOW_PARTITION_COLUMN_NULLABLE) + @VariableMgr.VarAttr(name = ALLOW_PARTITION_COLUMN_NULLABLE, description = { + "是否允许 NULLABLE 列作为 PARTITION 列。开启后,RANGE PARTITION 允许 NULLABLE PARTITION 列" + + "(LIST PARTITION当前不支持)。默认开。", + "Whether to allow NULLABLE columns as PARTITION columns. When ON, RANGE PARTITION allows " + + "NULLABLE PARTITION columns (LIST PARTITION is not supported currently). ON by default." }) public boolean allowPartitionColumnNullable = true; @VariableMgr.VarAttr(name = DELETE_WITHOUT_PARTITION, needForward = true) diff --git a/fe/fe-core/src/test/java/org/apache/doris/analysis/RangePartitionPruneTest.java b/fe/fe-core/src/test/java/org/apache/doris/analysis/RangePartitionPruneTest.java index 961286347c..bdeda24890 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/analysis/RangePartitionPruneTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/analysis/RangePartitionPruneTest.java @@ -105,7 +105,7 @@ public class RangePartitionPruneTest extends PartitionPruneTestBase { + "PROPERTIES ('replication_num' = '1');"; String autoCreatePartitionTable = new String("CREATE TABLE test.test_to_date_trunc(\n" - + " event_day DATETIME\n" + + " event_day DATETIME NOT NULL\n" + ")\n" + "DUPLICATE KEY(event_day)\n" + "AUTO PARTITION BY range date_trunc(event_day, \"day\") (\n" diff --git a/fe/fe-core/src/test/java/org/apache/doris/service/FrontendServiceImplTest.java b/fe/fe-core/src/test/java/org/apache/doris/service/FrontendServiceImplTest.java index a477db599a..90b26321c0 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/service/FrontendServiceImplTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/service/FrontendServiceImplTest.java @@ -82,7 +82,7 @@ public class FrontendServiceImplTest { @Test public void testCreatePartitionRange() throws Exception { String createOlapTblStmt = new String("CREATE TABLE test.partition_range(\n" - + " event_day DATETIME,\n" + + " event_day DATETIME NOT NULL,\n" + " site_id INT DEFAULT '10',\n" + " city_code VARCHAR(100)\n" + ")\n" @@ -123,7 +123,7 @@ public class FrontendServiceImplTest { String createOlapTblStmt = new String("CREATE TABLE test.partition_list(\n" + " event_day DATETIME,\n" + " site_id INT DEFAULT '10',\n" - + " city_code VARCHAR(100) not null\n" + + " city_code VARCHAR(100) NOT NULL\n" + ")\n" + "DUPLICATE KEY(event_day, site_id, city_code)\n" + "AUTO PARTITION BY list (city_code) (\n" diff --git a/regression-test/suites/partition_p0/auto_partition/test_auto_partition_behavior.groovy b/regression-test/suites/partition_p0/auto_partition/test_auto_partition_behavior.groovy index f42fbcee4a..1a583560a7 100644 --- a/regression-test/suites/partition_p0/auto_partition/test_auto_partition_behavior.groovy +++ b/regression-test/suites/partition_p0/auto_partition/test_auto_partition_behavior.groovy @@ -192,4 +192,66 @@ suite("test_auto_partition_behavior") { } sql """ insert overwrite table rewrite partition(p1) values ("Xxx") """ qt_sql_overwrite2 """ select * from rewrite """ // Xxx + + // prohibit NULLABLE auto partition column + // legacy + sql " set experimental_enable_nereids_planner=false " + test { + sql "drop table if exists test_null1" + sql """ + create table test_null1( + k0 datetime(6) null + ) + auto partition by range date_trunc(k0, 'hour') + ( + ) + DISTRIBUTED BY HASH(`k0`) BUCKETS 2 + properties("replication_num" = "1"); + """ + exception "The auto partition column must be NOT NULL" + } + test { + sql "drop table if exists test_null2" + sql """ + create table test_null2( + k0 int null + ) + auto partition by list (k0) + ( + ) + DISTRIBUTED BY HASH(`k0`) BUCKETS 2 + properties("replication_num" = "1"); + """ + exception "The auto partition column must be NOT NULL" + } + // nereids + sql " set experimental_enable_nereids_planner=true " + test { + sql "drop table if exists test_null1" + sql """ + create table test_null1( + k0 datetime(6) null + ) + auto partition by range date_trunc(k0, 'hour') + ( + ) + DISTRIBUTED BY HASH(`k0`) BUCKETS 2 + properties("replication_num" = "1"); + """ + exception "The auto partition column must be NOT NULL" + } + test { + sql "drop table if exists test_null2" + sql """ + create table test_null2( + k0 int null + ) + auto partition by list (k0) + ( + ) + DISTRIBUTED BY HASH(`k0`) BUCKETS 2 + properties("replication_num" = "1"); + """ + exception "The auto partition column must be NOT NULL" + } } diff --git a/regression-test/suites/partition_p0/auto_partition/test_auto_partition_load.groovy b/regression-test/suites/partition_p0/auto_partition/test_auto_partition_load.groovy index 351d7bb320..81b440e0f8 100644 --- a/regression-test/suites/partition_p0/auto_partition/test_auto_partition_load.groovy +++ b/regression-test/suites/partition_p0/auto_partition/test_auto_partition_load.groovy @@ -21,7 +21,7 @@ suite("test_auto_partition_load") { sql """ CREATE TABLE `${tblName1}` ( `k1` INT, - `k2` DATETIME, + `k2` DATETIME NOT NULL, `k3` DATETIMEV2(6) ) ENGINE=OLAP DUPLICATE KEY(`k1`) diff --git a/regression-test/suites/partition_p0/auto_partition/test_auto_range_partition.groovy b/regression-test/suites/partition_p0/auto_partition/test_auto_range_partition.groovy index 1c70a84228..33574990bc 100644 --- a/regression-test/suites/partition_p0/auto_partition/test_auto_range_partition.groovy +++ b/regression-test/suites/partition_p0/auto_partition/test_auto_range_partition.groovy @@ -92,7 +92,7 @@ suite("test_auto_range_partition") { sql """ CREATE TABLE `${tblName3}` ( `k1` INT, - `k2` DATETIMEV2(3), + `k2` DATETIMEV2(3) NOT NULL, `k3` DATETIMEV2(6) ) ENGINE=OLAP DUPLICATE KEY(`k1`)