[fix](chinese) fix the issue where the be crashes due to the missing chinese dict (#30712)
This commit is contained in:
Submodule be/src/clucene updated: f4829cc50f...63ae98a8bc
@ -272,6 +272,7 @@ namespace ErrorCode {
|
||||
E(INVERTED_INDEX_BUILD_WAITTING, -6008, false); \
|
||||
E(INVERTED_INDEX_NOT_IMPLEMENTED, -6009, false); \
|
||||
E(INVERTED_INDEX_COMPACTION_ERROR, -6010, false); \
|
||||
E(INVERTED_INDEX_ANALYZER_ERROR, -6011, false); \
|
||||
E(KEY_NOT_FOUND, -7000, false); \
|
||||
E(KEY_ALREADY_EXISTS, -7001, false); \
|
||||
E(ENTRY_NOT_FOUND, -7002, false);
|
||||
|
||||
@ -244,23 +244,28 @@ public:
|
||||
}
|
||||
|
||||
Status create_analyzer(std::unique_ptr<lucene::analysis::Analyzer>& analyzer) {
|
||||
switch (_parser_type) {
|
||||
case InvertedIndexParserType::PARSER_STANDARD:
|
||||
case InvertedIndexParserType::PARSER_UNICODE:
|
||||
analyzer = std::make_unique<lucene::analysis::standard95::StandardAnalyzer>();
|
||||
break;
|
||||
case InvertedIndexParserType::PARSER_ENGLISH:
|
||||
analyzer = std::make_unique<lucene::analysis::SimpleAnalyzer<char>>();
|
||||
break;
|
||||
case InvertedIndexParserType::PARSER_CHINESE:
|
||||
analyzer = create_chinese_analyzer();
|
||||
break;
|
||||
default:
|
||||
analyzer = std::make_unique<lucene::analysis::SimpleAnalyzer<char>>();
|
||||
break;
|
||||
try {
|
||||
switch (_parser_type) {
|
||||
case InvertedIndexParserType::PARSER_STANDARD:
|
||||
case InvertedIndexParserType::PARSER_UNICODE:
|
||||
analyzer = std::make_unique<lucene::analysis::standard95::StandardAnalyzer>();
|
||||
break;
|
||||
case InvertedIndexParserType::PARSER_ENGLISH:
|
||||
analyzer = std::make_unique<lucene::analysis::SimpleAnalyzer<char>>();
|
||||
break;
|
||||
case InvertedIndexParserType::PARSER_CHINESE:
|
||||
analyzer = create_chinese_analyzer();
|
||||
break;
|
||||
default:
|
||||
analyzer = std::make_unique<lucene::analysis::SimpleAnalyzer<char>>();
|
||||
break;
|
||||
}
|
||||
setup_analyzer_lowercase(analyzer);
|
||||
return Status::OK();
|
||||
} catch (CLuceneError& e) {
|
||||
return Status::Error<doris::ErrorCode::INVERTED_INDEX_ANALYZER_ERROR>(
|
||||
"inverted index create analyzer failed: {}", e.what());
|
||||
}
|
||||
setup_analyzer_lowercase(analyzer);
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
void setup_analyzer_lowercase(std::unique_ptr<lucene::analysis::Analyzer>& analyzer) {
|
||||
|
||||
@ -142,8 +142,16 @@ Status FunctionTokenize::execute_impl(FunctionContext* /*context*/, Block& block
|
||||
inverted_index_ctx.parser_mode = get_parser_mode_string_from_properties(properties);
|
||||
inverted_index_ctx.char_filter_map =
|
||||
get_parser_char_filter_map_from_properties(properties);
|
||||
auto analyzer =
|
||||
doris::segment_v2::InvertedIndexReader::create_analyzer(&inverted_index_ctx);
|
||||
|
||||
std::unique_ptr<lucene::analysis::Analyzer> analyzer;
|
||||
try {
|
||||
analyzer = doris::segment_v2::InvertedIndexReader::create_analyzer(
|
||||
&inverted_index_ctx);
|
||||
} catch (CLuceneError& e) {
|
||||
return Status::Error<doris::ErrorCode::INVERTED_INDEX_ANALYZER_ERROR>(
|
||||
"inverted index create analyzer failed: {}", e.what());
|
||||
}
|
||||
|
||||
inverted_index_ctx.analyzer = analyzer.get();
|
||||
_do_tokenize(*col_left, inverted_index_ctx, *dest_nested_column, dest_offsets,
|
||||
dest_nested_null_map);
|
||||
|
||||
Reference in New Issue
Block a user