[opt](inverted index) the "unicode" tokenizer can be configured to disable stop words. (#34467)
This commit is contained in:
@ -52,6 +52,8 @@ public class InvertedIndexUtil {
|
||||
|
||||
public static String INVERTED_INDEX_PARSER_LOWERCASE_KEY = "lower_case";
|
||||
|
||||
public static String INVERTED_INDEX_PARSER_STOPWORDS_KEY = "stopwords";
|
||||
|
||||
public static String getInvertedIndexParser(Map<String, String> properties) {
|
||||
String parser = properties == null ? null : properties.get(INVERTED_INDEX_PARSER_KEY);
|
||||
// default is "none" if not set
|
||||
@ -136,7 +138,8 @@ public class InvertedIndexUtil {
|
||||
INVERTED_INDEX_PARSER_CHAR_FILTER_PATTERN,
|
||||
INVERTED_INDEX_PARSER_CHAR_FILTER_REPLACEMENT,
|
||||
INVERTED_INDEX_PARSER_IGNORE_ABOVE_KEY,
|
||||
INVERTED_INDEX_PARSER_LOWERCASE_KEY
|
||||
INVERTED_INDEX_PARSER_LOWERCASE_KEY,
|
||||
INVERTED_INDEX_PARSER_STOPWORDS_KEY
|
||||
));
|
||||
|
||||
for (String key : properties.keySet()) {
|
||||
@ -152,6 +155,7 @@ public class InvertedIndexUtil {
|
||||
String charFilterPattern = properties.get(INVERTED_INDEX_PARSER_CHAR_FILTER_PATTERN);
|
||||
String ignoreAbove = properties.get(INVERTED_INDEX_PARSER_IGNORE_ABOVE_KEY);
|
||||
String lowerCase = properties.get(INVERTED_INDEX_PARSER_LOWERCASE_KEY);
|
||||
String stopWords = properties.get(INVERTED_INDEX_PARSER_STOPWORDS_KEY);
|
||||
|
||||
if (parser != null && !parser.matches("none|english|unicode|chinese|standard")) {
|
||||
throw new AnalysisException("Invalid inverted index 'parser' value: " + parser
|
||||
@ -194,5 +198,10 @@ public class InvertedIndexUtil {
|
||||
throw new AnalysisException(
|
||||
"Invalid inverted index 'lower_case' value: " + lowerCase + ", lower_case must be true or false");
|
||||
}
|
||||
|
||||
if (stopWords != null && !stopWords.matches("none")) {
|
||||
throw new AnalysisException("Invalid inverted index 'stopWords' value: " + stopWords
|
||||
+ ", stopWords must be none");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user