[fix](chinese) fix the issue where the be crashes due to the missing chinese dict (#30712)

This commit is contained in:
zzzxl
2024-02-05 14:07:34 +08:00
committed by yiguolei
parent a5d9004974
commit 255ca143f8
4 changed files with 33 additions and 19 deletions

View File

@ -272,6 +272,7 @@ namespace ErrorCode {
E(INVERTED_INDEX_BUILD_WAITTING, -6008, false); \
E(INVERTED_INDEX_NOT_IMPLEMENTED, -6009, false); \
E(INVERTED_INDEX_COMPACTION_ERROR, -6010, false); \
E(INVERTED_INDEX_ANALYZER_ERROR, -6011, false); \
E(KEY_NOT_FOUND, -7000, false); \
E(KEY_ALREADY_EXISTS, -7001, false); \
E(ENTRY_NOT_FOUND, -7002, false);

View File

@ -244,23 +244,28 @@ public:
}
Status create_analyzer(std::unique_ptr<lucene::analysis::Analyzer>& analyzer) {
switch (_parser_type) {
case InvertedIndexParserType::PARSER_STANDARD:
case InvertedIndexParserType::PARSER_UNICODE:
analyzer = std::make_unique<lucene::analysis::standard95::StandardAnalyzer>();
break;
case InvertedIndexParserType::PARSER_ENGLISH:
analyzer = std::make_unique<lucene::analysis::SimpleAnalyzer<char>>();
break;
case InvertedIndexParserType::PARSER_CHINESE:
analyzer = create_chinese_analyzer();
break;
default:
analyzer = std::make_unique<lucene::analysis::SimpleAnalyzer<char>>();
break;
try {
switch (_parser_type) {
case InvertedIndexParserType::PARSER_STANDARD:
case InvertedIndexParserType::PARSER_UNICODE:
analyzer = std::make_unique<lucene::analysis::standard95::StandardAnalyzer>();
break;
case InvertedIndexParserType::PARSER_ENGLISH:
analyzer = std::make_unique<lucene::analysis::SimpleAnalyzer<char>>();
break;
case InvertedIndexParserType::PARSER_CHINESE:
analyzer = create_chinese_analyzer();
break;
default:
analyzer = std::make_unique<lucene::analysis::SimpleAnalyzer<char>>();
break;
}
setup_analyzer_lowercase(analyzer);
return Status::OK();
} catch (CLuceneError& e) {
return Status::Error<doris::ErrorCode::INVERTED_INDEX_ANALYZER_ERROR>(
"inverted index create analyzer failed: {}", e.what());
}
setup_analyzer_lowercase(analyzer);
return Status::OK();
}
void setup_analyzer_lowercase(std::unique_ptr<lucene::analysis::Analyzer>& analyzer) {

View File

@ -142,8 +142,16 @@ Status FunctionTokenize::execute_impl(FunctionContext* /*context*/, Block& block
inverted_index_ctx.parser_mode = get_parser_mode_string_from_properties(properties);
inverted_index_ctx.char_filter_map =
get_parser_char_filter_map_from_properties(properties);
auto analyzer =
doris::segment_v2::InvertedIndexReader::create_analyzer(&inverted_index_ctx);
std::unique_ptr<lucene::analysis::Analyzer> analyzer;
try {
analyzer = doris::segment_v2::InvertedIndexReader::create_analyzer(
&inverted_index_ctx);
} catch (CLuceneError& e) {
return Status::Error<doris::ErrorCode::INVERTED_INDEX_ANALYZER_ERROR>(
"inverted index create analyzer failed: {}", e.what());
}
inverted_index_ctx.analyzer = analyzer.get();
_do_tokenize(*col_left, inverted_index_ctx, *dest_nested_column, dest_offsets,
dest_nested_null_map);