[fix] (inverted index) Disallow variant columns from using inverted index format v1 (#43599) (#45179)

pick from master #43599
This commit is contained in:
Sun Chenyang
2024-12-11 10:05:54 +08:00
committed by GitHub
parent d0737c45f8
commit ca85238ccf
17 changed files with 188 additions and 18 deletions

View File

@ -2586,12 +2586,15 @@ public class SchemaChangeHandler extends AlterHandler {
+ " ) already exist.");
}
}
boolean disableInvertedIndexV1ForVariant = olapTable.getInvertedIndexStorageFormat()
== TInvertedIndexStorageFormat.V1 && ConnectContext.get().getSessionVariable()
.getDisableInvertedIndexV1ForVaraint();
for (String col : indexDef.getColumns()) {
Column column = olapTable.getColumn(col);
if (column != null) {
indexDef.checkColumn(column, olapTable.getKeysType(),
olapTable.getTableProperty().getEnableUniqueKeyMergeOnWrite());
olapTable.getTableProperty().getEnableUniqueKeyMergeOnWrite(),
disableInvertedIndexV1ForVariant);
indexDef.getColumnUniqueIds().add(column.getUniqueId());
} else {
throw new DdlException("index column does not exist in table. invalid column: " + col);

View File

@ -42,6 +42,7 @@ import org.apache.doris.common.util.PropertyAnalyzer;
import org.apache.doris.datasource.es.EsUtil;
import org.apache.doris.mysql.privilege.PrivPredicate;
import org.apache.doris.qe.ConnectContext;
import org.apache.doris.thrift.TInvertedIndexStorageFormat;
import com.google.common.base.Strings;
import com.google.common.collect.Lists;
@ -561,7 +562,9 @@ public class CreateTableStmt extends DdlStmt {
if (CollectionUtils.isNotEmpty(indexDefs)) {
Set<String> distinct = new TreeSet<>(String.CASE_INSENSITIVE_ORDER);
Set<Pair<IndexType, List<String>>> distinctCol = new HashSet<>();
boolean disableInvertedIndexV1ForVariant = PropertyAnalyzer.analyzeInvertedIndexStorageFormat(
new HashMap<>(properties)) == TInvertedIndexStorageFormat.V1
&& ConnectContext.get().getSessionVariable().getDisableInvertedIndexV1ForVaraint();
for (IndexDef indexDef : indexDefs) {
indexDef.analyze();
if (!engineName.equalsIgnoreCase(DEFAULT_ENGINE_NAME)) {
@ -571,7 +574,8 @@ public class CreateTableStmt extends DdlStmt {
boolean found = false;
for (Column column : columns) {
if (column.getName().equalsIgnoreCase(indexColName)) {
indexDef.checkColumn(column, getKeysDesc().getKeysType(), enableUniqueKeyMergeOnWrite);
indexDef.checkColumn(column, getKeysDesc().getKeysType(),
enableUniqueKeyMergeOnWrite, disableInvertedIndexV1ForVariant);
found = true;
break;
}

View File

@ -213,8 +213,8 @@ public class IndexDef {
return (this.indexType == IndexType.INVERTED);
}
public void checkColumn(Column column, KeysType keysType, boolean enableUniqueKeyMergeOnWrite)
throws AnalysisException {
public void checkColumn(Column column, KeysType keysType, boolean enableUniqueKeyMergeOnWrite,
boolean disableInvertedIndexV1ForVariant) throws AnalysisException {
if (indexType == IndexType.BITMAP || indexType == IndexType.INVERTED || indexType == IndexType.BLOOMFILTER
|| indexType == IndexType.NGRAM_BF) {
String indexColName = column.getName();
@ -229,6 +229,14 @@ public class IndexDef {
throw new AnalysisException(colType + " is not supported in " + indexType.toString() + " index. "
+ "invalid index: " + indexName);
}
// In inverted index format v1, each subcolumn of a variant has its own index file, leading to high IOPS.
// when the subcolumn type changes, it may result in missing files, causing link file failure.
if (colType.isVariantType() && disableInvertedIndexV1ForVariant) {
throw new AnalysisException(colType + " is not supported in inverted index format V1,"
+ "Please set properties(\"inverted_index_storage_format\"= \"v2\"),"
+ "or upgrade to a newer version");
}
if (!column.isKey()) {
if (keysType == KeysType.AGG_KEYS) {
throw new AnalysisException("index should only be used in columns of DUP_KEYS/UNIQUE_KEYS table"

View File

@ -51,6 +51,7 @@ import org.apache.doris.nereids.parser.PartitionTableInfo;
import org.apache.doris.nereids.types.DataType;
import org.apache.doris.nereids.util.Utils;
import org.apache.doris.qe.ConnectContext;
import org.apache.doris.thrift.TInvertedIndexStorageFormat;
import com.google.common.base.Preconditions;
import com.google.common.base.Strings;
@ -534,6 +535,14 @@ public class CreateTableInfo {
if (!indexes.isEmpty()) {
Set<String> distinct = new TreeSet<>(String.CASE_INSENSITIVE_ORDER);
Set<Pair<IndexDef.IndexType, List<String>>> distinctCol = new HashSet<>();
boolean disableInvertedIndexV1ForVariant = false;
try {
disableInvertedIndexV1ForVariant = PropertyAnalyzer.analyzeInvertedIndexStorageFormat(
new HashMap<>(properties)) == TInvertedIndexStorageFormat.V1
&& ConnectContext.get().getSessionVariable().getDisableInvertedIndexV1ForVaraint();
} catch (Exception e) {
throw new AnalysisException(e.getMessage(), e.getCause());
}
for (IndexDefinition indexDef : indexes) {
indexDef.validate();
@ -545,7 +554,8 @@ public class CreateTableInfo {
boolean found = false;
for (ColumnDefinition column : columns) {
if (column.getName().equalsIgnoreCase(indexColName)) {
indexDef.checkColumn(column, keysType, isEnableMergeOnWrite);
indexDef.checkColumn(column, keysType, isEnableMergeOnWrite,
disableInvertedIndexV1ForVariant);
found = true;
break;
}

View File

@ -94,7 +94,7 @@ public class IndexDefinition {
* checkColumn
*/
public void checkColumn(ColumnDefinition column, KeysType keysType,
boolean enableUniqueKeyMergeOnWrite) throws AnalysisException {
boolean enableUniqueKeyMergeOnWrite, boolean disableInvertedIndexV1ForVariant) throws AnalysisException {
if (indexType == IndexType.BITMAP || indexType == IndexType.INVERTED
|| indexType == IndexType.BLOOMFILTER || indexType == IndexType.NGRAM_BF) {
String indexColName = column.getName();
@ -110,6 +110,14 @@ public class IndexDefinition {
throw new AnalysisException(colType + " is not supported in " + indexType.toString()
+ " index. " + "invalid index: " + name);
}
// In inverted index format v1, each subcolumn of a variant has its own index file, leading to high IOPS.
// when the subcolumn type changes, it may result in missing files, causing link file failure.
if (colType.isVariantType() && disableInvertedIndexV1ForVariant) {
throw new AnalysisException(colType + " is not supported in inverted index format V1,"
+ "Please set properties(\"inverted_index_storage_format\"= \"v2\"),"
+ "or upgrade to a newer version");
}
if (!column.isKey()) {
if (keysType == KeysType.AGG_KEYS) {
throw new AnalysisException("index should only be used in columns of DUP_KEYS/UNIQUE_KEYS table"

View File

@ -634,6 +634,8 @@ public class SessionVariable implements Serializable, Writable {
public static final String MAX_FETCH_REMOTE_TABLET_COUNT = "max_fetch_remote_schema_tablet_count";
public static final String DISABLE_INVERTED_INDEX_V1_FOR_VARIANT = "disable_inverted_index_v1_for_variant";
// CLOUD_VARIABLES_BEGIN
public static final String CLOUD_CLUSTER = "cloud_cluster";
public static final String DISABLE_EMPTY_PARTITION_PRUNE = "disable_empty_partition_prune";
@ -1166,6 +1168,9 @@ public class SessionVariable implements Serializable, Writable {
@VariableMgr.VarAttr(name = WAIT_FULL_BLOCK_SCHEDULE_TIMES)
public int waitFullBlockScheduleTimes = 2;
@VariableMgr.VarAttr(name = DISABLE_INVERTED_INDEX_V1_FOR_VARIANT)
private boolean disableInvertedIndexV1ForVaraint = true;
public int getBeNumberForTest() {
return beNumberForTest;
}
@ -4430,4 +4435,12 @@ public class SessionVariable implements Serializable, Writable {
public boolean isEnableCooldownReplicaAffinity() {
return enableCooldownReplicaAffinity;
}
public void setDisableInvertedIndexV1ForVaraint(boolean disableInvertedIndexV1ForVaraint) {
this.disableInvertedIndexV1ForVaraint = disableInvertedIndexV1ForVaraint;
}
public boolean getDisableInvertedIndexV1ForVaraint() {
return disableInvertedIndexV1ForVaraint;
}
}

View File

@ -17,6 +17,9 @@
package org.apache.doris.analysis;
import org.apache.doris.catalog.Column;
import org.apache.doris.catalog.KeysType;
import org.apache.doris.catalog.PrimitiveType;
import org.apache.doris.common.AnalysisException;
import com.google.common.collect.Lists;
@ -60,6 +63,16 @@ public class IndexDefTest {
} catch (AnalysisException e) {
Assert.assertTrue(e instanceof AnalysisException);
}
try {
def = new IndexDef("variant_index", false, Lists.newArrayList("col1"),
IndexDef.IndexType.INVERTED, null, "comment");
boolean isIndexFormatV1 = true;
def.checkColumn(new Column("col1", PrimitiveType.VARIANT), KeysType.UNIQUE_KEYS, true, isIndexFormatV1);
Assert.fail("No exception throws.");
} catch (AnalysisException e) {
Assert.assertTrue(e instanceof AnalysisException);
Assert.assertTrue(e.getMessage().contains("not supported in inverted index format V1"));
}
}
@Test

View File

@ -0,0 +1,46 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
package org.apache.doris.nereids.trees.plans.commands;
import org.apache.doris.catalog.AggregateType;
import org.apache.doris.catalog.KeysType;
import org.apache.doris.nereids.exceptions.AnalysisException;
import org.apache.doris.nereids.trees.plans.commands.info.ColumnDefinition;
import org.apache.doris.nereids.trees.plans.commands.info.IndexDefinition;
import org.apache.doris.nereids.types.VariantType;
import com.google.common.collect.Lists;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.Test;
public class IndexDefinitionTest {
@Test
void testVariantIndexFormatV1() throws AnalysisException {
IndexDefinition def = new IndexDefinition("variant_index", Lists.newArrayList("col1"), "INVERTED",
null, "comment");
try {
boolean isIndexFormatV1 = true;
def.checkColumn(new ColumnDefinition("col1", VariantType.INSTANCE, false, AggregateType.NONE, true,
null, "comment"), KeysType.UNIQUE_KEYS, true, isIndexFormatV1);
Assertions.fail("No exception throws.");
} catch (AnalysisException e) {
Assertions.assertTrue(e instanceof AnalysisException);
Assertions.assertTrue(e.getMessage().contains("not supported in inverted index format V1"));
}
}
}

View File

@ -129,6 +129,7 @@ suite("test_single_compaction_with_variant_inverted", "p2") {
sql """ DROP TABLE IF EXISTS ${tableName}; """
sql """ set disable_inverted_index_v1_for_variant = false """
sql """
CREATE TABLE ${tableName} (
`id` int(11) NULL,
@ -147,6 +148,7 @@ suite("test_single_compaction_with_variant_inverted", "p2") {
"compaction_policy" = "time_series"
);
"""
sql """ set disable_inverted_index_v1_for_variant = true """
def tablets = sql_return_maparray """ show tablets from ${tableName}; """

View File

@ -64,6 +64,7 @@ suite("test_show_nested_index_file_http_action_with_variant", "nonConcurrent,p0"
def tableName = "test_show_nested_index_file_http_action_with_variant_" + format
sql "DROP TABLE IF EXISTS ${tableName}"
sql """ set disable_inverted_index_v1_for_variant = false """
sql """
CREATE TABLE IF NOT EXISTS ${tableName} (
k bigint,
@ -74,6 +75,7 @@ suite("test_show_nested_index_file_http_action_with_variant", "nonConcurrent,p0"
DISTRIBUTED BY HASH(k) BUCKETS 1
properties("replication_num" = "1", "disable_auto_compaction" = "true", "inverted_index_storage_format" = "${format}");
"""
sql """ set disable_inverted_index_v1_for_variant = true """
load_json_data.call(tableName, """${getS3Url() + '/regression/gharchive.m/2015-01-01-0.json'}""")
load_json_data.call(tableName, """${getS3Url() + '/regression/gharchive.m/2015-01-01-1.json'}""")

View File

@ -60,6 +60,7 @@ suite("test_variant_index_format_v1", "p2") {
def table_name = "github_events"
sql """DROP TABLE IF EXISTS ${table_name}"""
sql """ set disable_inverted_index_v1_for_variant = false """
sql """
CREATE TABLE IF NOT EXISTS ${table_name} (
k bigint,
@ -70,7 +71,7 @@ suite("test_variant_index_format_v1", "p2") {
DISTRIBUTED BY HASH(k) BUCKETS 1
properties("replication_num" = "1", "disable_auto_compaction" = "true", "inverted_index_storage_format" = "V1");
"""
sql """ set disable_inverted_index_v1_for_variant = true """
set_be_config.call("memory_limitation_per_thread_for_schema_change_bytes", "6294967296")
load_json_data.call(table_name, """${getS3Url() + '/regression/gharchive.m/2015-01-01-0.json'}""")
load_json_data.call(table_name, """${getS3Url() + '/regression/gharchive.m/2015-01-01-1.json'}""")

View File

@ -51,6 +51,7 @@ suite("test_single_replica_load", "p2") {
def tableName = "test_single_replica_load"
sql "DROP TABLE IF EXISTS ${tableName}"
sql """ set disable_inverted_index_v1_for_variant = false """
sql """
CREATE TABLE IF NOT EXISTS ${tableName} (
k bigint,
@ -61,6 +62,7 @@ suite("test_single_replica_load", "p2") {
DISTRIBUTED BY HASH(k) BUCKETS 1
properties("replication_num" = "2", "disable_auto_compaction" = "true", "inverted_index_storage_format" = "V1");
"""
sql """ set disable_inverted_index_v1_for_variant = true """
load_json_data.call(tableName, """${getS3Url() + '/regression/gharchive.m/2015-01-01-0.json'}""")
load_json_data.call(tableName, """${getS3Url() + '/regression/gharchive.m/2015-01-01-0.json'}""")
load_json_data.call(tableName, """${getS3Url() + '/regression/gharchive.m/2015-01-01-0.json'}""")

View File

@ -35,8 +35,7 @@ suite("variant_mv") {
repo VARIANT NULL,
payload VARIANT NULL,
public BOOLEAN NULL,
created_at DATETIME NULL,
INDEX idx_payload (`payload`) USING INVERTED PROPERTIES("parser" = "english") COMMENT 'inverted index for payload'
created_at DATETIME NULL
)
DUPLICATE KEY(`id`)
DISTRIBUTED BY HASH(id) BUCKETS 10
@ -55,8 +54,7 @@ suite("variant_mv") {
repo VARIANT NULL,
payload VARIANT NULL,
public BOOLEAN NULL,
created_at DATETIME NULL,
INDEX idx_payload (`payload`) USING INVERTED PROPERTIES("parser" = "english") COMMENT 'inverted index for payload'
created_at DATETIME NULL
)
DUPLICATE KEY(`id`)
DISTRIBUTED BY HASH(id) BUCKETS 10

View File

@ -371,7 +371,7 @@ suite("regression_test_variant", "p0"){
"is_being_synced" = "false",
"storage_medium" = "hdd",
"storage_format" = "V2",
"inverted_index_storage_format" = "V1",
"inverted_index_storage_format" = "V2",
"enable_unique_key_merge_on_write" = "true",
"light_schema_change" = "true",
"store_row_column" = "true",

View File

@ -61,8 +61,7 @@ suite("regression_test_variant_mtmv"){
`repo` VARIANT NULL,
`payload` VARIANT NULL,
`public` BOOLEAN NULL,
`created_at` DATETIME NULL,
INDEX idx_payload (`payload`) USING INVERTED PROPERTIES("parser" = "english", "lower_case" = "true") COMMENT 'inverted index for payload'
`created_at` DATETIME NULL
) ENGINE=OLAP
DUPLICATE KEY(`id`)
DISTRIBUTED BY HASH(`id`) BUCKETS 1

View File

@ -121,7 +121,8 @@ suite("regression_test_variant_rowstore", "variant_type"){
PROPERTIES (
"enable_unique_key_merge_on_write" = "true",
"store_row_column"="true",
"replication_num" = "1"
"replication_num" = "1",
"inverted_index_storage_format"= "v2"
);
"""
sql """insert into table_rs_invalid_json values (1, '1|[""]')"""

View File

@ -26,7 +26,7 @@ suite("regression_test_variant_var_index", "p0"){
)
DUPLICATE KEY(`k`)
DISTRIBUTED BY HASH(k) BUCKETS 1
properties("replication_num" = "1", "disable_auto_compaction" = "true");
properties("replication_num" = "1", "disable_auto_compaction" = "true", "inverted_index_storage_format" = "V2");
"""
sql """insert into var_index values(1, '{"a" : 123, "b" : "xxxyyy", "c" : 111999111}')"""
@ -45,4 +45,64 @@ suite("regression_test_variant_var_index", "p0"){
sql """insert into var_index values(8, '{"timestamp": [123]}')"""
sql """insert into var_index values(9, '{"timestamp": 17.0}'),(10, '{"timestamp": "17.0"}')"""
qt_sql "select * from var_index order by k limit 10"
sql """ set disable_inverted_index_v1_for_variant = true """
sql "DROP TABLE IF EXISTS var_index"
try {
sql """
CREATE TABLE IF NOT EXISTS var_index (
k bigint,
v variant,
INDEX idx_var(v) USING INVERTED PROPERTIES("parser" = "english") COMMENT ''
)
DUPLICATE KEY(`k`)
DISTRIBUTED BY HASH(k) BUCKETS 1
properties("replication_num" = "1", "disable_auto_compaction" = "true", "inverted_index_storage_format" = "V1");
"""
} catch (Exception e) {
log.info(e.getMessage())
assertTrue(e.getMessage().contains("not supported in inverted index format V1"))
}
sql """
CREATE TABLE IF NOT EXISTS var_index (
k bigint,
v variant
)
DUPLICATE KEY(`k`)
DISTRIBUTED BY HASH(k) BUCKETS 1
properties("replication_num" = "1", "disable_auto_compaction" = "true", "inverted_index_storage_format" = "V1");
"""
try {
sql """ALTER TABLE var_index ADD INDEX idx_var(v) USING INVERTED"""
} catch (Exception e) {
log.info(e.getMessage())
assertTrue(e.getMessage().contains("not supported in inverted index format V1"))
}
sql """ set disable_inverted_index_v1_for_variant = false """
sql """
CREATE TABLE IF NOT EXISTS var_index (
k bigint,
v variant,
INDEX idx_var(v) USING INVERTED PROPERTIES("parser" = "english") COMMENT ''
)
DUPLICATE KEY(`k`)
DISTRIBUTED BY HASH(k) BUCKETS 1
properties("replication_num" = "1", "disable_auto_compaction" = "true", "inverted_index_storage_format" = "V1");
"""
sql "DROP TABLE IF EXISTS var_index"
sql """
CREATE TABLE IF NOT EXISTS var_index (
k bigint,
v variant
)
DUPLICATE KEY(`k`)
DISTRIBUTED BY HASH(k) BUCKETS 1
properties("replication_num" = "1", "disable_auto_compaction" = "true", "inverted_index_storage_format" = "V1");
"""
sql """ALTER TABLE var_index ADD INDEX idx_var(v) USING INVERTED"""
sql """ set disable_inverted_index_v1_for_variant = true """
}