diff --git a/src/storage/fts/ob_whitespace_ft_parser.cpp b/src/storage/fts/ob_whitespace_ft_parser.cpp index a2776290d5..b12c17bc94 100644 --- a/src/storage/fts/ob_whitespace_ft_parser.cpp +++ b/src/storage/fts/ob_whitespace_ft_parser.cpp @@ -76,6 +76,8 @@ namespace storage || OB_UNLIKELY(0 >= word_len)) { ret = OB_INVALID_ARGUMENT; LOG_WARN("invalid arguments", K(ret), KPC(param), KP(allocator), KP(word), K(word_len)); + } else if (word_len < FT_MIN_WORD_LEN || word_len > FT_MAX_WORD_LEN) { + LOG_DEBUG("skip too small or large word", K(ret), K(word_len)); } else if (OB_ISNULL(buf = static_cast(allocator->alloc(word_len)))) { ret = OB_ALLOCATE_MEMORY_FAILED; LOG_WARN("fail to allocate word memory", K(ret), K(word_len)); diff --git a/src/storage/fts/ob_whitespace_ft_parser.h b/src/storage/fts/ob_whitespace_ft_parser.h index 5dff4464da..11e0d2f597 100644 --- a/src/storage/fts/ob_whitespace_ft_parser.h +++ b/src/storage/fts/ob_whitespace_ft_parser.h @@ -25,6 +25,9 @@ namespace storage class ObSpaceFTParser final { +public: + static const int64_t FT_MIN_WORD_LEN = 3; + static const int64_t FT_MAX_WORD_LEN = 84; public: static int segment( lib::ObFTParserParam *param, diff --git a/tools/docker/standalone/Dockerfile b/tools/docker/standalone/Dockerfile index 3cb3fbca9e..3ba1193eeb 100644 --- a/tools/docker/standalone/Dockerfile +++ b/tools/docker/standalone/Dockerfile @@ -5,7 +5,7 @@ ARG VERSION ARG STEP RUN yum install -y yum-utils && \ - yum-config-manager --add-repo https://mirrors.aliyun.com/oceanbase/OceanBase.repo && \ + yum-config-manager --add-repo https://mirrors.oceanbase.com/oceanbase/OceanBase.repo && \ sed -i 's/$releasever/7/' /etc/yum.repos.d/OceanBase.repo && \ yum install -y ob-deploy obclient ob-sysbench libaio bc libselinux-utils zip && \ rm -rf /usr/obd/mirror/remote/* && \ @@ -36,6 +36,7 @@ COPY init_store_for_fast_start.py /root/boot/ ENV PATH /root/boot:$PATH ENV LD_LIBRARY_PATH /home/admin/oceanbase/lib:/root/ob/lib:$LD_LIBRARY_PATH +STOPSIGNAL SIGTERM WORKDIR /root CMD _boot diff --git a/tools/docker/standalone/step_2_boot/_boot b/tools/docker/standalone/step_2_boot/_boot index b83b4e4035..8e7081bc43 100755 --- a/tools/docker/standalone/step_2_boot/_boot +++ b/tools/docker/standalone/step_2_boot/_boot @@ -122,6 +122,13 @@ function deploy_failed { fi } +function cleanup() { + obd cluster stop $OB_CLUSTER_NAME + exit 0 +} + +trap 'cleanup' SIGTERM + # We should decide whether the observer's data exists and # whether the obd has the information of the cluster @@ -261,5 +268,7 @@ else echo "Please check the log file ${OB_HOME_PATH}/log/observer.log" fi fi -exec tail -f /dev/null +while :; do + sleep 1 +done diff --git a/unittest/storage/test_fts_plugin.cpp b/unittest/storage/test_fts_plugin.cpp index 56476b3f7f..ff1562dd9e 100644 --- a/unittest/storage/test_fts_plugin.cpp +++ b/unittest/storage/test_fts_plugin.cpp @@ -79,8 +79,8 @@ class ObTestAddWord final : public lib::ObFTParserParam::ObIAddWord { public: static const char *TEST_FULLTEXT; - static const int64_t TEST_WORD_COUNT = 9; - static const int64_t TEST_WORD_COUNT_WITHOUT_STOPWORD = 6; + static const int64_t TEST_WORD_COUNT = 5; + static const int64_t TEST_WORD_COUNT_WITHOUT_STOPWORD = 4; public: ObTestAddWord(); virtual ~ObTestAddWord() = default; @@ -99,8 +99,8 @@ private: const char *ObTestAddWord::TEST_FULLTEXT = "OceanBase fulltext search is No.1 in the world."; ObTestAddWord::ObTestAddWord() - : words_{"oceanbase", "fulltext", "search", "is", "no", "1", "in", "the", "world"}, - words_without_stopword_{"oceanbase", "fulltext", "search", "no", "1", "world"}, + : words_{"oceanbase", "fulltext", "search", "the", "world"}, + words_without_stopword_{"oceanbase", "fulltext", "search", "world"}, ith_word_(0) { } @@ -451,6 +451,42 @@ TEST_F(ObTestFTParseHelper, test_parse_fulltext) } } +TEST_F(ObTestFTParseHelper, test_min_and_max_word_len) +{ + common::ObSEArray words; + int64_t doc_length = 0; + + // word len = 2; + const char *word_len_2 = "ab"; + ASSERT_EQ(OB_SUCCESS, parse_helper_.segment(cs_type_, word_len_2, std::strlen(word_len_2), doc_length, words)); + ASSERT_EQ(0, words.count()); + + // word len = 3; + const char *word_len_3 = "abc"; + ASSERT_EQ(OB_SUCCESS, parse_helper_.segment(cs_type_, word_len_3, std::strlen(word_len_3), doc_length, words)); + ASSERT_EQ(1, words.count()); + + // word len = 4; + const char *word_len_4 = "abcd"; + ASSERT_EQ(OB_SUCCESS, parse_helper_.segment(cs_type_, word_len_4, std::strlen(word_len_4), doc_length, words)); + ASSERT_EQ(1, words.count()); + + // word len = 76; + const char *word_len_76 = "abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz"; + ASSERT_EQ(OB_SUCCESS, parse_helper_.segment(cs_type_, word_len_76, std::strlen(word_len_76), doc_length, words)); + ASSERT_EQ(1, words.count()); + + // word len = 84; + const char *word_len_84 = "abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz123456"; + ASSERT_EQ(OB_SUCCESS, parse_helper_.segment(cs_type_, word_len_84, std::strlen(word_len_84), doc_length, words)); + ASSERT_EQ(1, words.count()); + + // word len = 85; + const char *word_len_85 = "abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz1234567"; + ASSERT_EQ(OB_SUCCESS, parse_helper_.segment(cs_type_, word_len_85, std::strlen(word_len_85), doc_length, words)); + ASSERT_EQ(0, words.count()); +} + class ObTestNgramFTParseHelper : public ::testing::Test { public: