From 97234a64853a89c34db219999ace002b262af15c Mon Sep 17 00:00:00 2001 From: airborne12 Date: Wed, 3 Jan 2024 09:54:31 +0800 Subject: [PATCH] [Enhancement](inverted index) strictly checkout inverted index properties (#29421) --- .../doris/analysis/InvertedIndexUtil.java | 99 ++++++-- .../inverted_index_p0/test_properties.groovy | 233 ++++++++++++++++++ 2 files changed, 311 insertions(+), 21 deletions(-) create mode 100644 regression-test/suites/inverted_index_p0/test_properties.groovy diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/InvertedIndexUtil.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/InvertedIndexUtil.java index 2cd6269233..44e8bd1534 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/InvertedIndexUtil.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/InvertedIndexUtil.java @@ -20,8 +20,11 @@ package org.apache.doris.analysis; import org.apache.doris.catalog.PrimitiveType; import org.apache.doris.common.AnalysisException; +import java.util.Arrays; import java.util.HashMap; +import java.util.HashSet; import java.util.Map; +import java.util.Set; public class InvertedIndexUtil { @@ -43,9 +46,11 @@ public class InvertedIndexUtil { public static String INVERTED_INDEX_CHAR_FILTER_CHAR_REPLACE = "char_replace"; - public static String INVERTED_INDEX_PARSER_IGNORE_ABOVE = "ignore_above"; + public static String INVERTED_INDEX_SUPPORT_PHRASE_KEY = "support_phrase"; - public static String INVERTED_INDEX_PARSER_LOWERCASE = "lower_case"; + public static String INVERTED_INDEX_PARSER_IGNORE_ABOVE_KEY = "ignore_above"; + + public static String INVERTED_INDEX_PARSER_LOWERCASE_KEY = "lower_case"; public static String getInvertedIndexParser(Map properties) { String parser = properties == null ? null : properties.get(INVERTED_INDEX_PARSER_KEY); @@ -100,26 +105,9 @@ public class InvertedIndexUtil { if (properties != null) { parser = properties.get(INVERTED_INDEX_PARSER_KEY); if (parser == null && !properties.isEmpty()) { - throw new AnalysisException("invalid index properties, please check the properties"); + throw new AnalysisException("Invalid index properties, parser must not be none"); } - String ignoreAbove = properties.get(INVERTED_INDEX_PARSER_IGNORE_ABOVE); - if (ignoreAbove != null) { - try { - int ignoreAboveValue = Integer.parseInt(ignoreAbove); - if (ignoreAboveValue <= 0) { - throw new AnalysisException("invalid index properties, ignore_above must be positive"); - } - } catch (NumberFormatException e) { - throw new AnalysisException("invalid index properties, ignore_above must be integer"); - } - } - String lowerCase = properties.get(INVERTED_INDEX_PARSER_LOWERCASE); - if (lowerCase != null) { - if (!"true".equals(lowerCase) && !"false".equals(lowerCase)) { - throw new AnalysisException("invalid index properties, lowercase must be true or false"); - } - } - + checkInvertedIndexProperties(properties); } // default is "none" if not set @@ -141,4 +129,73 @@ public class InvertedIndexUtil { + " is not supported for column: " + indexColName + " of type " + colType); } } + + public static void checkInvertedIndexProperties(Map properties) throws AnalysisException { + Set allowedKeys = new HashSet<>(Arrays.asList( + INVERTED_INDEX_PARSER_KEY, + INVERTED_INDEX_PARSER_MODE_KEY, + INVERTED_INDEX_SUPPORT_PHRASE_KEY, + INVERTED_INDEX_PARSER_CHAR_FILTER_TYPE, + INVERTED_INDEX_PARSER_CHAR_FILTER_PATTERN, + INVERTED_INDEX_PARSER_CHAR_FILTER_REPLACEMENT, + INVERTED_INDEX_PARSER_IGNORE_ABOVE_KEY, + INVERTED_INDEX_PARSER_LOWERCASE_KEY + )); + + for (String key : properties.keySet()) { + if (!allowedKeys.contains(key)) { + throw new AnalysisException("Invalid inverted index property key: " + key); + } + } + + String parser = properties.get(INVERTED_INDEX_PARSER_KEY); + String parserMode = properties.get(INVERTED_INDEX_PARSER_MODE_KEY); + String supportPhrase = properties.get(INVERTED_INDEX_SUPPORT_PHRASE_KEY); + String charFilterType = properties.get(INVERTED_INDEX_PARSER_CHAR_FILTER_TYPE); + String charFilterPattern = properties.get(INVERTED_INDEX_PARSER_CHAR_FILTER_PATTERN); + String ignoreAbove = properties.get(INVERTED_INDEX_PARSER_IGNORE_ABOVE_KEY); + String lowerCase = properties.get(INVERTED_INDEX_PARSER_LOWERCASE_KEY); + + if (parser != null && !parser.matches("none|english|unicode|chinese|standard")) { + throw new AnalysisException("Invalid inverted index 'parser' value: " + parser + + ", parser must be none, english, unicode or chinese"); + } + + if (!"chinese".equals(parser) && parserMode != null) { + throw new AnalysisException("parser_mode is only available for chinese parser"); + } + + if ("chinese".equals(parser) && (parserMode != null && !parserMode.matches("fine_grained|coarse_grained"))) { + throw new AnalysisException("Invalid inverted index 'parser_mode' value: " + parserMode + + ", parser_mode must be fine_grained or coarse_grained"); + } + + if (supportPhrase != null && !supportPhrase.matches("true|false")) { + throw new AnalysisException("Invalid inverted index 'support_phrase' value: " + supportPhrase + + ", support_phrase must be true or false"); + } + + if (INVERTED_INDEX_CHAR_FILTER_CHAR_REPLACE.equals(charFilterType) && (charFilterPattern == null + || charFilterPattern.isEmpty())) { + throw new AnalysisException("Missing 'char_filter_pattern' for 'char_replace' filter type"); + } + + if (ignoreAbove != null) { + try { + int ignoreAboveValue = Integer.parseInt(ignoreAbove); + if (ignoreAboveValue <= 0) { + throw new AnalysisException("Invalid inverted index 'ignore_above' value: " + ignoreAboveValue + + ", ignore_above must be positive"); + } + } catch (NumberFormatException e) { + throw new AnalysisException( + "Invalid inverted index 'ignore_above' value, ignore_above must be integer"); + } + } + + if (lowerCase != null && !lowerCase.matches("true|false")) { + throw new AnalysisException( + "Invalid inverted index 'lower_case' value: " + lowerCase + ", lower_case must be true or false"); + } + } } diff --git a/regression-test/suites/inverted_index_p0/test_properties.groovy b/regression-test/suites/inverted_index_p0/test_properties.groovy new file mode 100644 index 0000000000..84b0aef8f3 --- /dev/null +++ b/regression-test/suites/inverted_index_p0/test_properties.groovy @@ -0,0 +1,233 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + + +suite("test_properties", "p0"){ + // prepare test table + def indexTblName = "test_properties" + + sql "DROP TABLE IF EXISTS ${indexTblName}" + def success = false; + + def create_table_with_inverted_index_properties = { create_sql, error_msg-> + try { + sql create_sql + success = true; + } catch(Throwable ex) { + logger.info("create_table_with_inverted_index_properties result: " + ex) + if (ex != null) { + def msg = ex.toString() + assertTrue(msg != null && msg.contains(error_msg), "Expect exception msg contains '${error_msg}', but meet '${msg}'") + } + success = false; + } finally { + sql "DROP TABLE IF EXISTS ${indexTblName}" + } + } + + def empty_parser = """ + CREATE TABLE IF NOT EXISTS ${indexTblName}( + `id`int(11)NULL, + `c` text NULL, + INDEX c_idx(`c`) USING INVERTED PROPERTIES("parser"="") COMMENT '' + ) ENGINE=OLAP + DUPLICATE KEY(`id`) + COMMENT 'OLAP' + DISTRIBUTED BY HASH(`id`) BUCKETS 1 + PROPERTIES( + "replication_allocation" = "tag.location.default: 1" + ); + """ + create_table_with_inverted_index_properties(empty_parser, "Invalid inverted index 'parser' value") + assertEquals(success, false) + + def wrong_parser = """ + CREATE TABLE IF NOT EXISTS ${indexTblName}( + `id`int(11)NULL, + `c` text NULL, + INDEX c_idx(`c`) USING INVERTED PROPERTIES("parser"="german") COMMENT '' + ) ENGINE=OLAP + DUPLICATE KEY(`id`) + COMMENT 'OLAP' + DISTRIBUTED BY HASH(`id`) BUCKETS 1 + PROPERTIES( + "replication_allocation" = "tag.location.default: 1" + ); + """ + create_table_with_inverted_index_properties(wrong_parser, "Invalid inverted index 'parser' value") + assertEquals(success, false) + + def wrong_parser_mode = """ + CREATE TABLE IF NOT EXISTS ${indexTblName}( + `id` int(11) NULL, + `c` text NULL, + INDEX c_idx(`c`) USING INVERTED PROPERTIES("parser"="english", "parser_mode"="fine_grained") COMMENT '' + ) ENGINE=OLAP + DUPLICATE KEY(`id`) + COMMENT 'OLAP' + DISTRIBUTED BY HASH(`id`) BUCKETS 1 + PROPERTIES( + "replication_allocation" = "tag.location.default: 1" + ); + """ + create_table_with_inverted_index_properties(wrong_parser_mode, "parser_mode is only available for chinese parser") + assertEquals(success, false) + + def valid_parser_and_mode = """ + CREATE TABLE IF NOT EXISTS ${indexTblName}( + `id` int(11) NULL, + `c` text NULL, + INDEX c_idx(`c`) USING INVERTED PROPERTIES("parser"="chinese", "parser_mode"="fine_grained") COMMENT '' + ) ENGINE=OLAP + DUPLICATE KEY(`id`) + COMMENT 'OLAP' + DISTRIBUTED BY HASH(`id`) BUCKETS 1 + PROPERTIES( + "replication_allocation" = "tag.location.default: 1" + ); + """ + create_table_with_inverted_index_properties(valid_parser_and_mode, "") + assertEquals(success, true) + + def missing_char_filter_pattern = """ + CREATE TABLE IF NOT EXISTS ${indexTblName}( + `id` int(11) NULL, + `c` text NULL, + INDEX c_idx(`c`) USING INVERTED PROPERTIES("parser"="english", "char_filter_type"="char_replace") COMMENT '' + ) ENGINE=OLAP + DUPLICATE KEY(`id`) + COMMENT 'OLAP' + DISTRIBUTED BY HASH(`id`) BUCKETS 1 + PROPERTIES( + "replication_allocation" = "tag.location.default: 1" + ); + """ + create_table_with_inverted_index_properties(missing_char_filter_pattern, "Missing 'char_filter_pattern' for 'char_replace' filter type") + assertEquals(success, false) + + def invalid_property_key = """ + CREATE TABLE IF NOT EXISTS ${indexTblName}( + `id` int(11) NULL, + `c` text NULL, + INDEX c_idx(`c`) USING INVERTED PROPERTIES("invalid_key"="value") COMMENT '' + ) ENGINE=OLAP + DUPLICATE KEY(`id`) + COMMENT 'OLAP' + DISTRIBUTED BY HASH(`id`) BUCKETS 1 + PROPERTIES( + "replication_allocation" = "tag.location.default: 1" + ); + """ + create_table_with_inverted_index_properties(invalid_property_key, "Invalid index properties, parser must not be none") + assertEquals(success, false) + + def invalid_property_key2 = """ + CREATE TABLE IF NOT EXISTS ${indexTblName}( + `id` int(11) NULL, + `c` text NULL, + INDEX c_idx(`c`) USING INVERTED PROPERTIES("parser"="english", "invalid_key"="value") COMMENT '' + ) ENGINE=OLAP + DUPLICATE KEY(`id`) + COMMENT 'OLAP' + DISTRIBUTED BY HASH(`id`) BUCKETS 1 + PROPERTIES( + "replication_allocation" = "tag.location.default: 1" + ); + """ + create_table_with_inverted_index_properties(invalid_property_key2, "Invalid inverted index property key:") + assertEquals(success, false) + + def invalid_ignore_above = """ + CREATE TABLE IF NOT EXISTS ${indexTblName}( + `id` int(11) NULL, + `c` text NULL, + INDEX c_idx(`c`) USING INVERTED PROPERTIES("parser"="english", "ignore_above"="-1") COMMENT '' + ) ENGINE=OLAP + DUPLICATE KEY(`id`) + COMMENT 'OLAP' + DISTRIBUTED BY HASH(`id`) BUCKETS 1 + PROPERTIES( + "replication_allocation" = "tag.location.default: 1" + ); + """ + create_table_with_inverted_index_properties(invalid_ignore_above, "Invalid inverted index 'ignore_above' value") + assertEquals(success, false) + + def non_numeric_ignore_above = """ + CREATE TABLE IF NOT EXISTS ${indexTblName}( + `id` int(11) NULL, + `c` text NULL, + INDEX c_idx(`c`) USING INVERTED PROPERTIES("parser"="english", "ignore_above"="non_numeric") COMMENT '' + ) ENGINE=OLAP + DUPLICATE KEY(`id`) + COMMENT 'OLAP' + DISTRIBUTED BY HASH(`id`) BUCKETS 1 + PROPERTIES( + "replication_allocation" = "tag.location.default: 1" + ); + """ + create_table_with_inverted_index_properties(non_numeric_ignore_above, "ignore_above must be integer") + assertEquals(success, false) + + def invalid_lower_case = """ + CREATE TABLE IF NOT EXISTS ${indexTblName}( + `id` int(11) NULL, + `c` text NULL, + INDEX c_idx(`c`) USING INVERTED PROPERTIES("parser"="english", "lower_case"="invalid") COMMENT '' + ) ENGINE=OLAP + DUPLICATE KEY(`id`) + COMMENT 'OLAP' + DISTRIBUTED BY HASH(`id`) BUCKETS 1 + PROPERTIES( + "replication_allocation" = "tag.location.default: 1" + ); + """ + create_table_with_inverted_index_properties(invalid_lower_case, "Invalid inverted index 'lower_case' value") + assertEquals(success, false) + + def invalid_support_phrase = """ + CREATE TABLE IF NOT EXISTS ${indexTblName}( + `id` int(11) NULL, + `c` text NULL, + INDEX c_idx(`c`) USING INVERTED PROPERTIES("parser"="english", "support_phrase"="invalid") COMMENT '' + ) ENGINE=OLAP + DUPLICATE KEY(`id`) + COMMENT 'OLAP' + DISTRIBUTED BY HASH(`id`) BUCKETS 1 + PROPERTIES( + "replication_allocation" = "tag.location.default: 1" + ); + """ + create_table_with_inverted_index_properties(invalid_support_phrase, "Invalid inverted index 'support_phrase' value") + assertEquals(success, false) + + def invalid_support_phrase2 = """ + CREATE TABLE IF NOT EXISTS ${indexTblName}( + `id` int(11) NULL, + `c` text NULL, + INDEX c_idx(`c`) USING INVERTED PROPERTIES("parser"="english", "support_phase"="true") COMMENT '' + ) ENGINE=OLAP + DUPLICATE KEY(`id`) + COMMENT 'OLAP' + DISTRIBUTED BY HASH(`id`) BUCKETS 1 + PROPERTIES( + "replication_allocation" = "tag.location.default: 1" + ); + """ + create_table_with_inverted_index_properties(invalid_support_phrase2, "Invalid inverted index property key: support_phase") + assertEquals(success, false) +}