From 3131c4a0da6076df5e00b7b8ac074baca13c96cf Mon Sep 17 00:00:00 2001 From: Tyshawn Date: Mon, 22 Apr 2024 14:58:52 +0000 Subject: [PATCH] [FTS][BUG.FIX] fix core at oceanbase::sql::ObDASDomainUtils::segment_and_calc_word_count --- src/share/text_analysis/ob_token_stream.cpp | 2 +- unittest/storage/test_fts_plugin.cpp | 18 ++++++++++++++++++ 2 files changed, 19 insertions(+), 1 deletion(-) diff --git a/src/share/text_analysis/ob_token_stream.cpp b/src/share/text_analysis/ob_token_stream.cpp index 48cb71980..40a0d5c5b 100644 --- a/src/share/text_analysis/ob_token_stream.cpp +++ b/src/share/text_analysis/ob_token_stream.cpp @@ -275,7 +275,7 @@ int ObBasicEnglishNormalizer::get_next(ObDatum &next_token, int64_t &token_freq) } } - for (uint32_t i = raw_token_len - 1; i >= first_alnum_pos; --i) { + for (int32_t i = raw_token_len - 1; 0 <= i && i < raw_token_len && i >= first_alnum_pos; --i) { const char *character = token + i; if (ob_isalnum(cs_, *character)) { last_alnum_pos = i; diff --git a/unittest/storage/test_fts_plugin.cpp b/unittest/storage/test_fts_plugin.cpp index 569888e1a..56476b3f7 100644 --- a/unittest/storage/test_fts_plugin.cpp +++ b/unittest/storage/test_fts_plugin.cpp @@ -22,6 +22,7 @@ #include "storage/fts/ob_fts_plugin_helper.h" #include "storage/fts/ob_fts_plugin_mgr.h" #include "storage/fts/ob_whitespace_ft_parser.h" +#include "storage/fts/ob_fts_stop_word.h" #include "sql/das/ob_das_utils.h" namespace oceanbase @@ -186,6 +187,23 @@ TEST_F(TestDefaultFTParser, test_space_ft_parser_segment) LOG_INFO("after space segment", KCSTRING(fulltext), K(ft_len), K(ft_parser_param_)); } +TEST_F(TestDefaultFTParser, test_space_ft_parser_segment_bug_56324268) +{ + common::ObArray words; + ObNoStopWordAddWord add_word(ObCollationType::CS_TYPE_LATIN1_SWEDISH_CI, allocator_, words); + const char *fulltext = "\201 想 将 数据 添加 到 数据库\f\026 "; + const int64_t ft_len = strlen(fulltext); + + ft_parser_param_.fulltext_ = fulltext; + ft_parser_param_.ft_length_ = ft_len; + ft_parser_param_.add_word_ = &add_word; + ft_parser_param_.cs_ = common::ObCharset::get_charset(ObCollationType::CS_TYPE_LATIN1_SWEDISH_CI); + + LOG_INFO("before space segment", KCSTRING(fulltext), K(ft_len), K(ft_parser_param_)); + ASSERT_EQ(OB_SUCCESS, ObSpaceFTParser::segment(&ft_parser_param_, fulltext, ft_len)); + LOG_INFO("after space segment", KCSTRING(fulltext), K(words), K(ft_len), K(ft_parser_param_)); +} + TEST_F(TestDefaultFTParser, test_default_ft_parser_desc) { ASSERT_EQ(OB_INVALID_ARGUMENT, desc_.segment(&ft_parser_param_));