[feature](inverted index) add ignore_above property to prevent long s… (#28585)

When string is too long, clucene will throw an error. 
And the string is too long to analyze. So we ignore the string in index process when the string is longer than 256 bytes by default.
We add an poperty `ignore_above` for user to customize.
This commit is contained in:
qiye
2023-12-19 18:54:36 +08:00
committed by GitHub
parent 73a3d84c5e
commit 9c9249e911
14 changed files with 72 additions and 19 deletions

View File

@ -43,6 +43,8 @@ public class InvertedIndexUtil {
public static String INVERTED_INDEX_CHAR_FILTER_CHAR_REPLACE = "char_replace";
public static String INVERTED_INDEX_PARSER_IGNORE_ABOVE = "ignore_above";
public static String getInvertedIndexParser(Map<String, String> properties) {
String parser = properties == null ? null : properties.get(INVERTED_INDEX_PARSER_KEY);
// default is "none" if not set
@ -98,6 +100,17 @@ public class InvertedIndexUtil {
if (parser == null && !properties.isEmpty()) {
throw new AnalysisException("invalid index properties, please check the properties");
}
String ignoreAbove = properties.get(INVERTED_INDEX_PARSER_IGNORE_ABOVE);
if (ignoreAbove != null) {
try {
int ignoreAboveValue = Integer.parseInt(ignoreAbove);
if (ignoreAboveValue <= 0) {
throw new AnalysisException("invalid index properties, ignore_above must be positive");
}
} catch (NumberFormatException e) {
throw new AnalysisException("invalid index properties, ignore_above must be integer");
}
}
}
// default is "none" if not set