[Enhancement](inverted index) strictly checkout inverted index properties (#29421)

This commit is contained in:
airborne12
2024-01-03 09:54:31 +08:00
committed by GitHub
parent 298d0c6904
commit 97234a6485
2 changed files with 311 additions and 21 deletions

View File

@ -20,8 +20,11 @@ package org.apache.doris.analysis;
import org.apache.doris.catalog.PrimitiveType;
import org.apache.doris.common.AnalysisException;
import java.util.Arrays;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;
public class InvertedIndexUtil {
@ -43,9 +46,11 @@ public class InvertedIndexUtil {
public static String INVERTED_INDEX_CHAR_FILTER_CHAR_REPLACE = "char_replace";
public static String INVERTED_INDEX_PARSER_IGNORE_ABOVE = "ignore_above";
public static String INVERTED_INDEX_SUPPORT_PHRASE_KEY = "support_phrase";
public static String INVERTED_INDEX_PARSER_LOWERCASE = "lower_case";
public static String INVERTED_INDEX_PARSER_IGNORE_ABOVE_KEY = "ignore_above";
public static String INVERTED_INDEX_PARSER_LOWERCASE_KEY = "lower_case";
public static String getInvertedIndexParser(Map<String, String> properties) {
String parser = properties == null ? null : properties.get(INVERTED_INDEX_PARSER_KEY);
@ -100,26 +105,9 @@ public class InvertedIndexUtil {
if (properties != null) {
parser = properties.get(INVERTED_INDEX_PARSER_KEY);
if (parser == null && !properties.isEmpty()) {
throw new AnalysisException("invalid index properties, please check the properties");
throw new AnalysisException("Invalid index properties, parser must not be none");
}
String ignoreAbove = properties.get(INVERTED_INDEX_PARSER_IGNORE_ABOVE);
if (ignoreAbove != null) {
try {
int ignoreAboveValue = Integer.parseInt(ignoreAbove);
if (ignoreAboveValue <= 0) {
throw new AnalysisException("invalid index properties, ignore_above must be positive");
}
} catch (NumberFormatException e) {
throw new AnalysisException("invalid index properties, ignore_above must be integer");
}
}
String lowerCase = properties.get(INVERTED_INDEX_PARSER_LOWERCASE);
if (lowerCase != null) {
if (!"true".equals(lowerCase) && !"false".equals(lowerCase)) {
throw new AnalysisException("invalid index properties, lowercase must be true or false");
}
}
checkInvertedIndexProperties(properties);
}
// default is "none" if not set
@ -141,4 +129,73 @@ public class InvertedIndexUtil {
+ " is not supported for column: " + indexColName + " of type " + colType);
}
}
public static void checkInvertedIndexProperties(Map<String, String> properties) throws AnalysisException {
Set<String> allowedKeys = new HashSet<>(Arrays.asList(
INVERTED_INDEX_PARSER_KEY,
INVERTED_INDEX_PARSER_MODE_KEY,
INVERTED_INDEX_SUPPORT_PHRASE_KEY,
INVERTED_INDEX_PARSER_CHAR_FILTER_TYPE,
INVERTED_INDEX_PARSER_CHAR_FILTER_PATTERN,
INVERTED_INDEX_PARSER_CHAR_FILTER_REPLACEMENT,
INVERTED_INDEX_PARSER_IGNORE_ABOVE_KEY,
INVERTED_INDEX_PARSER_LOWERCASE_KEY
));
for (String key : properties.keySet()) {
if (!allowedKeys.contains(key)) {
throw new AnalysisException("Invalid inverted index property key: " + key);
}
}
String parser = properties.get(INVERTED_INDEX_PARSER_KEY);
String parserMode = properties.get(INVERTED_INDEX_PARSER_MODE_KEY);
String supportPhrase = properties.get(INVERTED_INDEX_SUPPORT_PHRASE_KEY);
String charFilterType = properties.get(INVERTED_INDEX_PARSER_CHAR_FILTER_TYPE);
String charFilterPattern = properties.get(INVERTED_INDEX_PARSER_CHAR_FILTER_PATTERN);
String ignoreAbove = properties.get(INVERTED_INDEX_PARSER_IGNORE_ABOVE_KEY);
String lowerCase = properties.get(INVERTED_INDEX_PARSER_LOWERCASE_KEY);
if (parser != null && !parser.matches("none|english|unicode|chinese|standard")) {
throw new AnalysisException("Invalid inverted index 'parser' value: " + parser
+ ", parser must be none, english, unicode or chinese");
}
if (!"chinese".equals(parser) && parserMode != null) {
throw new AnalysisException("parser_mode is only available for chinese parser");
}
if ("chinese".equals(parser) && (parserMode != null && !parserMode.matches("fine_grained|coarse_grained"))) {
throw new AnalysisException("Invalid inverted index 'parser_mode' value: " + parserMode
+ ", parser_mode must be fine_grained or coarse_grained");
}
if (supportPhrase != null && !supportPhrase.matches("true|false")) {
throw new AnalysisException("Invalid inverted index 'support_phrase' value: " + supportPhrase
+ ", support_phrase must be true or false");
}
if (INVERTED_INDEX_CHAR_FILTER_CHAR_REPLACE.equals(charFilterType) && (charFilterPattern == null
|| charFilterPattern.isEmpty())) {
throw new AnalysisException("Missing 'char_filter_pattern' for 'char_replace' filter type");
}
if (ignoreAbove != null) {
try {
int ignoreAboveValue = Integer.parseInt(ignoreAbove);
if (ignoreAboveValue <= 0) {
throw new AnalysisException("Invalid inverted index 'ignore_above' value: " + ignoreAboveValue
+ ", ignore_above must be positive");
}
} catch (NumberFormatException e) {
throw new AnalysisException(
"Invalid inverted index 'ignore_above' value, ignore_above must be integer");
}
}
if (lowerCase != null && !lowerCase.matches("true|false")) {
throw new AnalysisException(
"Invalid inverted index 'lower_case' value: " + lowerCase + ", lower_case must be true or false");
}
}
}