[FEAT MERGE] Full-text Search Index + [CP]Adaptive DAS Group Rescan + Json Multi-Value Index
Co-authored-by: saltonz <saltonzh@gmail.com> Co-authored-by: pe-99y <315053752@qq.com> Co-authored-by: JinmaoLi <ljm.csmaster@gmail.com>
This commit is contained in:
@ -76,6 +76,8 @@ ob_unittest(test_json_path)
|
||||
ob_unittest(test_json_schema)
|
||||
ob_unittest(test_json_tree)
|
||||
|
||||
ob_unittest(test_text_analyzer text_analysis/test_text_analyzer.cpp)
|
||||
|
||||
if(OB_BUILD_CLOSE_MODULES)
|
||||
ob_unittest(test_xml_bin)
|
||||
ob_unittest(test_xml_parser)
|
||||
|
File diff suppressed because one or more lines are too long
208
unittest/share/text_analysis/test_text_analyzer.cpp
Normal file
208
unittest/share/text_analysis/test_text_analyzer.cpp
Normal file
@ -0,0 +1,208 @@
|
||||
/**
|
||||
* Copyright (c) 2023 OceanBase
|
||||
* OceanBase CE is licensed under Mulan PubL v2.
|
||||
* You can use this software according to the terms and conditions of the Mulan PubL v2.
|
||||
* You may obtain a copy of Mulan PubL v2 at:
|
||||
* http://license.coscl.org.cn/MulanPubL-2.0
|
||||
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
|
||||
* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
|
||||
* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
|
||||
* See the Mulan PubL v2 for more details.
|
||||
*/
|
||||
|
||||
#define USING_LOG_PREFIX SHARE
|
||||
|
||||
#include <gtest/gtest.h>
|
||||
#define protected public
|
||||
#define private public
|
||||
|
||||
#include "share/datum/ob_datum_funcs.h"
|
||||
#include "share/rc/ob_tenant_base.h"
|
||||
#include "share/text_analysis/ob_text_analyzer.h"
|
||||
|
||||
|
||||
namespace oceanbase
|
||||
{
|
||||
namespace share
|
||||
{
|
||||
|
||||
class TestTextAnalyzer : public ::testing::Test
|
||||
{
|
||||
public:
|
||||
TestTextAnalyzer() : allocator_(), analysis_ctx_(), token_cmp_func_() {}
|
||||
virtual ~TestTextAnalyzer() {}
|
||||
virtual void SetUp();
|
||||
virtual void TearDowm() {}
|
||||
private:
|
||||
void analyze_test(
|
||||
ObITextAnalyzer &analyzer,
|
||||
const char *raw_doc,
|
||||
const int64_t raw_doc_len,
|
||||
const char **target_tokens,
|
||||
const int64_t *target_token_len,
|
||||
const int64_t *target_token_freq,
|
||||
const int64_t target_token_cnt);
|
||||
void find_token_in_target_array(
|
||||
const ObDatum &query_token,
|
||||
const char **target_tokens,
|
||||
const int64_t *target_token_len,
|
||||
const int64_t target_token_cnt,
|
||||
int64_t &idx);
|
||||
private:
|
||||
ObArenaAllocator allocator_;
|
||||
ObTextAnalysisCtx analysis_ctx_;
|
||||
common::ObDatumCmpFuncType token_cmp_func_;
|
||||
};
|
||||
|
||||
void TestTextAnalyzer::SetUp()
|
||||
{
|
||||
share::ObTenantEnv::get_tenant_local()->id_ = 500;
|
||||
analysis_ctx_.cs_ = ObCharset::get_charset(CS_TYPE_UTF8MB4_GENERAL_CI);
|
||||
sql::ObExprBasicFuncs *basic_funcs = ObDatumFuncs::get_basic_func(ObVarcharType, CS_TYPE_UTF8MB4_GENERAL_CI);
|
||||
token_cmp_func_ = basic_funcs->null_first_cmp_;
|
||||
}
|
||||
|
||||
void TestTextAnalyzer::analyze_test(
|
||||
ObITextAnalyzer &analyzer,
|
||||
const char *raw_doc,
|
||||
const int64_t raw_doc_len,
|
||||
const char **target_tokens,
|
||||
const int64_t *target_token_len,
|
||||
const int64_t *target_token_freq,
|
||||
const int64_t target_token_cnt)
|
||||
{
|
||||
ObDatum doc_datum;
|
||||
doc_datum.set_string(raw_doc, raw_doc_len);
|
||||
LOG_DEBUG("start test one tokenization", K(analyzer), K(doc_datum), K(doc_datum.get_string()));
|
||||
|
||||
ObITokenStream *token_stream;
|
||||
ASSERT_EQ(OB_SUCCESS, analyzer.analyze(doc_datum, token_stream));
|
||||
ASSERT_NE(nullptr, token_stream);
|
||||
|
||||
int ret = OB_SUCCESS;
|
||||
int64_t token_cnt = 0;
|
||||
while (OB_SUCC(ret)) {
|
||||
ObDatum token;
|
||||
int64_t token_freq = 0;
|
||||
if (OB_FAIL(token_stream->get_next(token, token_freq))) {
|
||||
if (OB_ITER_END != ret) {
|
||||
LOG_WARN("Failed to get next token from token stream", KPC(token_stream));
|
||||
}
|
||||
} else {
|
||||
ASSERT_TRUE(token_cnt < target_token_cnt);
|
||||
LOG_INFO("print token", K(token), K(token.get_string()), K(token_freq));
|
||||
int64_t idx = -1;
|
||||
find_token_in_target_array(token, target_tokens, target_token_len, target_token_cnt, idx);
|
||||
ASSERT_TRUE(idx >= 0 && idx < target_token_cnt) << "idx:" << idx;
|
||||
ASSERT_EQ(token_freq, target_token_freq[idx]) << "token_freq:" << token_freq << "target_token_freq" << target_token_freq[idx];
|
||||
++token_cnt;
|
||||
}
|
||||
}
|
||||
ASSERT_EQ(OB_ITER_END, ret);
|
||||
ASSERT_EQ(token_cnt, target_token_cnt);
|
||||
}
|
||||
|
||||
void TestTextAnalyzer::find_token_in_target_array(
|
||||
const ObDatum &query_token,
|
||||
const char **target_tokens,
|
||||
const int64_t *target_token_len,
|
||||
const int64_t target_token_cnt,
|
||||
int64_t &idx)
|
||||
{
|
||||
idx = -1;
|
||||
for (int64_t i = 0; i < target_token_cnt; ++i) {
|
||||
ObDatum target_token_datum;
|
||||
target_token_datum.set_string(target_tokens[i], target_token_len[i]);
|
||||
int cmp_ret = 0;
|
||||
ASSERT_EQ(OB_SUCCESS, token_cmp_func_(target_token_datum, query_token, cmp_ret));
|
||||
if (0 == cmp_ret) {
|
||||
idx = i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (idx == -1) {
|
||||
LOG_INFO("query token not found", K(query_token), K(query_token.get_string()));
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(TestTextAnalyzer, test_basic_english_analyzer)
|
||||
{
|
||||
ObEnglishTextAnalyzer analyzer;
|
||||
analysis_ctx_.need_grouping_ = false;
|
||||
|
||||
ASSERT_EQ(OB_SUCCESS, analyzer.init(analysis_ctx_, allocator_));
|
||||
|
||||
const int64_t doc_len_1 = 64;
|
||||
const char doc_1[doc_len_1] = {"Try to tokenize basic english doc."};
|
||||
const int64_t token_cnt_1 = 6;
|
||||
const char *tokens1[token_cnt_1] = {"try", "to", "tokenize", "basic", "english", "doc"};
|
||||
const int64_t tokens_len_1[token_cnt_1] = {3, 2, 8, 5, 7, 3};
|
||||
const int64_t tokens_freq_1[token_cnt_1] = {1, 1, 1, 1, 1, 1};
|
||||
analyze_test(analyzer, doc_1, doc_len_1, tokens1, tokens_len_1, tokens_freq_1, token_cnt_1);
|
||||
|
||||
// not deduplicated
|
||||
const int64_t doc_len_2 = 64;
|
||||
const char doc_2[doc_len_2] = {"oceanbase@oceanbase.com, \t https://www.oceanbase.com/"};
|
||||
const int64_t token_cnt_2 = 7;
|
||||
const char *tokens_2[token_cnt_2] = {"oceanbase", "oceanbase", "com", "https", "www", "oceanbase", "com"};
|
||||
const int64_t tokens_2_len[token_cnt_2] = {9, 9, 3, 5, 3, 9, 3};
|
||||
const int64_t tokens_freq_2[token_cnt_2] = {1, 1, 1, 1, 1, 1, 1};
|
||||
analyze_test(analyzer, doc_2, doc_len_2, tokens_2, tokens_2_len, tokens_freq_2, token_cnt_2);
|
||||
|
||||
// won't trim extremely short phrase for now
|
||||
const int64_t doc_len_3 = 64;
|
||||
const char doc_3[doc_len_3] = {"if (a==b and c > !d) then x=1;"};
|
||||
const int64_t token_cnt_3 = 9;
|
||||
const char *tokens_3[token_cnt_3] = {"if", "a", "b", "and", "c", "d", "then", "x", "1"};
|
||||
const int64_t tokens_len_3[token_cnt_3] = {2, 1, 1, 3, 1, 1, 4, 1, 1};
|
||||
const int64_t tokens_freq_3[token_cnt_3] = {1, 1, 1, 1, 1, 1, 1, 1, 1};
|
||||
analyze_test(analyzer, doc_3, doc_len_3, tokens_3, tokens_len_3, tokens_freq_3, token_cnt_3);
|
||||
|
||||
// test paragraphs
|
||||
const int64_t doc_len_4 = 128;
|
||||
const char doc_4[doc_len_4] = {"PARAGRAPH1\nPARAGRAPH2\nPARAGRAPH3"};
|
||||
const int64_t token_cnt_4 = 3;
|
||||
const char *tokens_4[token_cnt_4] = {"paragraph1","paragraph2","paragraph3"};
|
||||
const int64_t tokens_len_4[token_cnt_4] = {10,10,10};
|
||||
const int64_t tokens_freq_4[token_cnt_4] = {1, 1, 1};
|
||||
analyze_test(analyzer, doc_4, doc_len_4, tokens_4, tokens_len_4, tokens_freq_4, token_cnt_4);
|
||||
|
||||
// test non-english text
|
||||
const int64_t doc_len_5 = 128;
|
||||
const char doc_5[doc_len_5] = {"乘骐骥以驰骋兮,来吾道夫先路"};
|
||||
const int64_t token_cnt_5 = 1;
|
||||
const char *tokens_5[token_cnt_5] = {"乘骐骥以驰骋兮,来吾道夫先路"};
|
||||
const int64_t tokens_len_5[token_cnt_5] = {42};
|
||||
const int64_t tokens_freq_5[token_cnt_5] = {1};
|
||||
analyze_test(analyzer, doc_5, doc_len_5, tokens_5, tokens_len_5, tokens_freq_5, token_cnt_5);
|
||||
|
||||
analyzer.reset();
|
||||
|
||||
// grouping test
|
||||
analysis_ctx_.need_grouping_ = true;
|
||||
ASSERT_EQ(OB_SUCCESS, analyzer.init(analysis_ctx_, allocator_));
|
||||
analyze_test(analyzer, doc_1, doc_len_1, tokens1, tokens_len_1, tokens_freq_1, token_cnt_1);
|
||||
analyze_test(analyzer, doc_3, doc_len_3, tokens_3, tokens_len_3, tokens_freq_3, token_cnt_3);
|
||||
analyze_test(analyzer, doc_4, doc_len_4, tokens_4, tokens_len_4, tokens_freq_4, token_cnt_4);
|
||||
|
||||
const int64_t doc_len_6 = 64;
|
||||
const char doc_6[doc_len_6] = {"oceanbase@oceanbase.com, \t https://www.oceanbase.com/"};
|
||||
const int64_t token_cnt_6 = 4;
|
||||
const char *tokens_6[token_cnt_6] = {"oceanbase", "com", "https", "www"};
|
||||
const int64_t tokens_len_6[token_cnt_6] = {9, 3, 5, 3};
|
||||
const int64_t tokens_freq_6[token_cnt_6] = {3, 2, 1, 1};
|
||||
analyze_test(analyzer, doc_6, doc_len_6, tokens_6, tokens_len_6, tokens_freq_6, token_cnt_6);
|
||||
}
|
||||
|
||||
}; // namespace share
|
||||
}; // namespace oceanbase
|
||||
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
system("rm -f test_text_analyzer.log*");
|
||||
OB_LOGGER.set_file_name("test_text_analyzer.log", true, false);
|
||||
oceanbase::common::ObLogger::get_logger().set_log_level("INFO");
|
||||
// oceanbase::common::ObLogger::get_logger().set_log_level("DEBUG");
|
||||
testing::InitGoogleTest(&argc, argv);
|
||||
return RUN_ALL_TESTS();
|
||||
}
|
@ -1215,7 +1215,7 @@ TEST_F(ObQueryRangeTest, serialize_geo_queryrange)
|
||||
pre_mbr.x_max_ = 60;
|
||||
pre_mbr.y_min_ = 60;
|
||||
pre_mbr.y_max_ = 90;
|
||||
pre_mbr.mbr_type_ = ObGeoRelationType::T_INTERSECTS;
|
||||
pre_mbr.mbr_type_ = ObDomainOpType::T_GEO_INTERSECTS;
|
||||
OK(mbr_array.push_back(pre_mbr));
|
||||
ObGeoColumnInfo info1;
|
||||
info1.srid_ = 0;
|
||||
@ -1277,7 +1277,7 @@ TEST_F(ObQueryRangeTest, serialize_geo_keypart)
|
||||
{
|
||||
// build geo keypart
|
||||
ObKeyPart pre_key_part(allocator_);
|
||||
OK(pre_key_part.create_geo_key());
|
||||
OK(pre_key_part.create_domain_key());
|
||||
ObObj wkb;
|
||||
// ST_GeomFromText('POINT(5 5)')
|
||||
char hexstring[25] ={'\x01', '\x01', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00',
|
||||
@ -1285,8 +1285,8 @@ TEST_F(ObQueryRangeTest, serialize_geo_keypart)
|
||||
'\x00', '\x00', '\x00', '\x14', '\x40', '\x00', '\x00', '\x00',
|
||||
'\x00'};
|
||||
wkb.set_string(ObGeometryType ,hexstring, 25);
|
||||
OK(ob_write_obj(allocator_, wkb, pre_key_part.geo_keypart_->wkb_));
|
||||
pre_key_part.geo_keypart_->geo_type_ = ObGeoRelationType::T_DWITHIN;
|
||||
OK(ob_write_obj(allocator_, wkb, pre_key_part.domain_keypart_->const_param_));
|
||||
pre_key_part.domain_keypart_->domain_op_ = ObDomainOpType::T_GEO_DWITHIN;
|
||||
char buf[512 * 1024] = {'\0'};
|
||||
int64_t pos = 0;
|
||||
int64_t data_len = 0;
|
||||
@ -1296,8 +1296,8 @@ TEST_F(ObQueryRangeTest, serialize_geo_keypart)
|
||||
pos = 0;
|
||||
ObKeyPart dec_key_part(allocator_);
|
||||
OK(dec_key_part.deserialize(buf, data_len, pos));
|
||||
EXPECT_EQ(dec_key_part.geo_keypart_->wkb_, pre_key_part.geo_keypart_->wkb_);
|
||||
EXPECT_EQ(dec_key_part.geo_keypart_->geo_type_, pre_key_part.geo_keypart_->geo_type_);
|
||||
EXPECT_EQ(dec_key_part.domain_keypart_->const_param_, pre_key_part.domain_keypart_->const_param_);
|
||||
EXPECT_EQ(dec_key_part.domain_keypart_->domain_op_, pre_key_part.domain_keypart_->domain_op_);
|
||||
}
|
||||
|
||||
int main(int argc, char **argv)
|
||||
|
@ -6,6 +6,10 @@ add_library(mock_access_service SHARED
|
||||
mock_access_service.cpp)
|
||||
target_link_libraries(mock_access_service PUBLIC oceanbase)
|
||||
|
||||
add_library(mock_ft_parser SHARED
|
||||
mock_ft_parser.cpp)
|
||||
target_link_libraries(mock_ft_parser PUBLIC oceanbase)
|
||||
|
||||
function(storage_unittest case)
|
||||
ob_unittest(${ARGV})
|
||||
target_link_libraries(${case} PRIVATE mockcontainer)
|
||||
@ -16,6 +20,11 @@ function(storage_dml_unittest case)
|
||||
target_link_libraries(${case} PRIVATE mockcontainer mock_ls_tablet_service mock_access_service)
|
||||
endfunction()
|
||||
|
||||
function(storage_fts_unittest case)
|
||||
ob_unittest(${ARGV})
|
||||
target_link_libraries(${case} PRIVATE mockcontainer mock_ft_parser)
|
||||
endfunction()
|
||||
|
||||
add_subdirectory(mockcontainer)
|
||||
add_subdirectory(transaction)
|
||||
add_subdirectory(tx)
|
||||
@ -69,6 +78,7 @@ storage_unittest(test_checkpoint_diagnose checkpoint/test_checkpoint_diagnose.cp
|
||||
#storage_unittest(test_create_tablet_memtable test_create_tablet_memtable.cpp)
|
||||
storage_unittest(test_tenant_meta_obj_pool test_tenant_meta_obj_pool.cpp)
|
||||
storage_unittest(test_tablet_pointer_map test_tablet_pointer_map.cpp)
|
||||
storage_fts_unittest(test_fts_plugin test_fts_plugin.cpp)
|
||||
storage_unittest(test_storage_logger_manager slog/test_storage_logger_manager.cpp)
|
||||
storage_unittest(test_storage_log_read_write slog/test_storage_log_read_write.cpp)
|
||||
storage_unittest(test_storage_log_replay slog/test_storage_log_replay.cpp)
|
||||
|
24
unittest/storage/mock_ft_parser.cpp
Normal file
24
unittest/storage/mock_ft_parser.cpp
Normal file
@ -0,0 +1,24 @@
|
||||
/**
|
||||
* Copyright (c) 2024 OceanBase
|
||||
* OceanBase is licensed under Mulan PubL v2.
|
||||
* You can use this software according to the terms and conditions of the Mulan PubL v2.
|
||||
* You may obtain a copy of Mulan PubL v2 at:
|
||||
* http://license.coscl.org.cn/MulanPubL-2.0
|
||||
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
|
||||
* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
|
||||
* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
|
||||
* See the Mulan PubL v2 for more details.
|
||||
*/
|
||||
|
||||
#include "mock_ft_parser.h"
|
||||
|
||||
OB_DECLARE_PLUGIN(mock_ft_parser)
|
||||
{
|
||||
oceanbase::lib::ObPluginType::OB_FT_PARSER_PLUGIN,
|
||||
"mock_ft_parser",
|
||||
OB_PLUGIN_AUTHOR_OCEANBASE,
|
||||
"This is mock fulltext parser plugin.",
|
||||
0x00001,
|
||||
oceanbase::lib::ObPluginLicenseType::OB_MULAN_V2_LICENSE,
|
||||
&oceanbase::storage::mock_ft_parser,
|
||||
};
|
56
unittest/storage/mock_ft_parser.h
Normal file
56
unittest/storage/mock_ft_parser.h
Normal file
@ -0,0 +1,56 @@
|
||||
/**
|
||||
* Copyright (c) 2024 OceanBase
|
||||
* OceanBase is licensed under Mulan PubL v2.
|
||||
* You can use this software according to the terms and conditions of the Mulan PubL v2.
|
||||
* You may obtain a copy of Mulan PubL v2 at:
|
||||
* http://license.coscl.org.cn/MulanPubL-2.0
|
||||
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
|
||||
* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
|
||||
* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
|
||||
* See the Mulan PubL v2 for more details.
|
||||
*/
|
||||
|
||||
#ifndef MOCK_FT_PARSER_H_
|
||||
#define MOCK_FT_PARSER_H_
|
||||
|
||||
#include "lib/ob_plugin.h"
|
||||
|
||||
namespace oceanbase
|
||||
{
|
||||
namespace storage
|
||||
{
|
||||
|
||||
class ObMockFTParserDesc final : public lib::ObIFTParserDesc
|
||||
{
|
||||
public:
|
||||
ObMockFTParserDesc() = default;
|
||||
virtual ~ObMockFTParserDesc() = default;
|
||||
virtual int init(lib::ObPluginParam *param) override;
|
||||
virtual int deinit(lib::ObPluginParam *param) override;
|
||||
virtual int segment(lib::ObFTParserParam *param) const override;
|
||||
};
|
||||
|
||||
int ObMockFTParserDesc::init(lib::ObPluginParam *param)
|
||||
{
|
||||
UNUSEDx(param);
|
||||
return OB_SUCCESS;
|
||||
}
|
||||
|
||||
int ObMockFTParserDesc::deinit(lib::ObPluginParam *param)
|
||||
{
|
||||
UNUSED(param);
|
||||
return OB_SUCCESS;
|
||||
}
|
||||
|
||||
int ObMockFTParserDesc::segment(lib::ObFTParserParam *param) const
|
||||
{
|
||||
UNUSED(param);
|
||||
return OB_SUCCESS;
|
||||
}
|
||||
|
||||
static ObMockFTParserDesc mock_ft_parser;
|
||||
|
||||
} // end storage
|
||||
} // end oceanbase
|
||||
|
||||
#endif // MOCK_FT_PARSER_H_
|
552
unittest/storage/test_fts_plugin.cpp
Normal file
552
unittest/storage/test_fts_plugin.cpp
Normal file
@ -0,0 +1,552 @@
|
||||
/**
|
||||
* Copyright (c) 2023 OceanBase
|
||||
* OceanBase is licensed under Mulan PubL v2.
|
||||
* You can use this software according to the terms and conditions of the Mulan PubL v2.
|
||||
* You may obtain a copy of Mulan PubL v2 at:
|
||||
* http://license.coscl.org.cn/MulanPubL-2.0
|
||||
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
|
||||
* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
|
||||
* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
|
||||
* See the Mulan PubL v2 for more details.
|
||||
*/
|
||||
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
#define USING_LOG_PREFIX STORAGE
|
||||
|
||||
#define protected public
|
||||
#define private public
|
||||
|
||||
#include "lib/ob_plugin.h"
|
||||
#include "share/rc/ob_tenant_base.h"
|
||||
#include "storage/fts/ob_fts_plugin_helper.h"
|
||||
#include "storage/fts/ob_fts_plugin_mgr.h"
|
||||
#include "storage/fts/ob_whitespace_ft_parser.h"
|
||||
#include "sql/das/ob_das_utils.h"
|
||||
|
||||
namespace oceanbase
|
||||
{
|
||||
|
||||
static storage::ObTenantFTPluginMgr ft_plugin_mgr(OB_SYS_TENANT_ID);
|
||||
|
||||
namespace storage
|
||||
{
|
||||
|
||||
ObTenantFTPluginMgr &ObTenantFTPluginMgr::get_ft_plugin_mgr()
|
||||
{
|
||||
return ft_plugin_mgr;
|
||||
}
|
||||
|
||||
typedef common::hash::ObHashMap<ObFTWord, int64_t> ObFTWordMap;
|
||||
|
||||
int segment_and_calc_word_count(
|
||||
common::ObIAllocator &allocator,
|
||||
storage::ObFTParseHelper *helper,
|
||||
const common::ObCollationType &type,
|
||||
const ObString &fulltext,
|
||||
ObFTWordMap &words_count)
|
||||
{
|
||||
int ret = OB_SUCCESS;
|
||||
int64_t doc_length = 0;
|
||||
common::ObSEArray<ObFTWord, 256> words;
|
||||
if (OB_ISNULL(helper)
|
||||
|| OB_UNLIKELY(ObCollationType::CS_TYPE_INVALID == type
|
||||
|| ObCollationType::CS_TYPE_EXTENDED_MARK < type)
|
||||
|| OB_UNLIKELY(!words_count.created())) {
|
||||
ret = OB_INVALID_ARGUMENT;
|
||||
LOG_WARN("invalid arguments", K(ret), KPC(helper), K(type), K(words_count.created()));
|
||||
} else if (OB_FAIL(helper->segment(type, fulltext.ptr(), fulltext.length(), doc_length, words))) {
|
||||
LOG_WARN("fail to segment", K(ret), KPC(helper), K(type), K(fulltext));
|
||||
} else {
|
||||
for (int64_t i = 0; OB_SUCC(ret) && i < words.count(); ++i) {
|
||||
const ObFTWord &ft_word = words.at(i);
|
||||
int64_t word_count = 0;
|
||||
if (OB_FAIL(words_count.get_refactored(ft_word, word_count)) && OB_HASH_NOT_EXIST != ret) {
|
||||
LOG_WARN("fail to get ft word", K(ret), K(ft_word));
|
||||
} else {
|
||||
word_count = OB_HASH_NOT_EXIST == ret ? 1 : ++word_count;
|
||||
if (OB_FAIL(words_count.set_refactored(ft_word, word_count, 1/*overwrite*/))) {
|
||||
LOG_WARN("fail to set ft word and count", K(ret), K(ft_word));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
class ObTestAddWord final : public lib::ObFTParserParam::ObIAddWord
|
||||
{
|
||||
public:
|
||||
static const char *TEST_FULLTEXT;
|
||||
static const int64_t TEST_WORD_COUNT = 9;
|
||||
static const int64_t TEST_WORD_COUNT_WITHOUT_STOPWORD = 6;
|
||||
public:
|
||||
ObTestAddWord();
|
||||
virtual ~ObTestAddWord() = default;
|
||||
virtual int operator()(
|
||||
lib::ObFTParserParam *param,
|
||||
const char *word,
|
||||
const int64_t word_len) override;
|
||||
virtual int64_t get_add_word_count() const override { return ith_word_; }
|
||||
VIRTUAL_TO_STRING_KV(K_(ith_word));
|
||||
private:
|
||||
const char *words_[TEST_WORD_COUNT];
|
||||
const char *words_without_stopword_[TEST_WORD_COUNT_WITHOUT_STOPWORD];
|
||||
int64_t ith_word_;
|
||||
};
|
||||
|
||||
const char *ObTestAddWord::TEST_FULLTEXT = "OceanBase fulltext search is No.1 in the world.";
|
||||
|
||||
ObTestAddWord::ObTestAddWord()
|
||||
: words_{"oceanbase", "fulltext", "search", "is", "no", "1", "in", "the", "world"},
|
||||
words_without_stopword_{"oceanbase", "fulltext", "search", "no", "1", "world"},
|
||||
ith_word_(0)
|
||||
{
|
||||
}
|
||||
|
||||
int ObTestAddWord::operator()(
|
||||
lib::ObFTParserParam *param,
|
||||
const char *word,
|
||||
const int64_t word_len)
|
||||
{
|
||||
int ret = OB_SUCCESS;
|
||||
if (OB_ISNULL(param) || OB_ISNULL(word) || OB_UNLIKELY(0 >= word_len)) {
|
||||
ret = OB_INVALID_ARGUMENT;
|
||||
LOG_WARN("invalid arguments", K(ret), KP(word), KP(param), K(word_len));
|
||||
} else if (OB_UNLIKELY(0 != strncmp(words_[ith_word_], word, word_len))) {
|
||||
ret = OB_ERR_UNEXPECTED;
|
||||
LOG_WARN("the ith word isn't default word", K(ret), K(ith_word_), KCSTRING(words_[ith_word_]),
|
||||
KCSTRING(word), K(word_len));
|
||||
} else {
|
||||
++ith_word_;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
class TestDefaultFTParser : public ::testing::Test
|
||||
{
|
||||
public:
|
||||
TestDefaultFTParser();
|
||||
virtual ~TestDefaultFTParser() = default;
|
||||
|
||||
virtual void SetUp() override;
|
||||
virtual void TearDown() override;
|
||||
|
||||
private:
|
||||
lib::ObPluginParam plugin_param_;
|
||||
lib::ObFTParserParam ft_parser_param_;
|
||||
ObTestAddWord add_word_;
|
||||
ObWhiteSpaceFTParserDesc desc_;
|
||||
common::ObArenaAllocator allocator_;
|
||||
};
|
||||
|
||||
TestDefaultFTParser::TestDefaultFTParser()
|
||||
: plugin_param_(),
|
||||
ft_parser_param_(),
|
||||
add_word_(),
|
||||
desc_(),
|
||||
allocator_()
|
||||
{
|
||||
plugin_param_.desc_ = &desc_;
|
||||
}
|
||||
|
||||
void TestDefaultFTParser::SetUp()
|
||||
{
|
||||
ASSERT_EQ(OB_SUCCESS, desc_.init(&plugin_param_));
|
||||
|
||||
ft_parser_param_.allocator_ = &allocator_;
|
||||
ft_parser_param_.add_word_ = &add_word_;
|
||||
ft_parser_param_.cs_ = common::ObCharset::get_charset(ObCollationType::CS_TYPE_UTF8MB4_BIN);
|
||||
ft_parser_param_.parser_version_ = 0x00001;
|
||||
ASSERT_TRUE(nullptr != ft_parser_param_.cs_);
|
||||
}
|
||||
|
||||
void TestDefaultFTParser::TearDown()
|
||||
{
|
||||
ft_parser_param_.reset();
|
||||
|
||||
ASSERT_EQ(OB_SUCCESS, desc_.deinit(&plugin_param_));
|
||||
}
|
||||
|
||||
TEST_F(TestDefaultFTParser, test_space_ft_parser_segment)
|
||||
{
|
||||
const char *fulltext = ObTestAddWord::TEST_FULLTEXT;
|
||||
const int64_t ft_len = strlen(fulltext);
|
||||
|
||||
ASSERT_EQ(OB_INVALID_ARGUMENT, ObSpaceFTParser::segment(nullptr, nullptr, 0));
|
||||
ASSERT_EQ(OB_INVALID_ARGUMENT, ObSpaceFTParser::segment(&ft_parser_param_, nullptr, 0));
|
||||
ASSERT_EQ(OB_INVALID_ARGUMENT, ObSpaceFTParser::segment(&ft_parser_param_, fulltext, 0));
|
||||
ASSERT_EQ(OB_INVALID_ARGUMENT, ObSpaceFTParser::segment(&ft_parser_param_, fulltext, -1));
|
||||
|
||||
ft_parser_param_.fulltext_ = fulltext;
|
||||
ft_parser_param_.ft_length_ = ft_len;
|
||||
|
||||
LOG_INFO("before space segment", KCSTRING(fulltext), K(ft_len), K(ft_parser_param_));
|
||||
ASSERT_EQ(OB_SUCCESS, ObSpaceFTParser::segment(&ft_parser_param_, fulltext, ft_len));
|
||||
LOG_INFO("after space segment", KCSTRING(fulltext), K(ft_len), K(ft_parser_param_));
|
||||
}
|
||||
|
||||
TEST_F(TestDefaultFTParser, test_default_ft_parser_desc)
|
||||
{
|
||||
ASSERT_EQ(OB_INVALID_ARGUMENT, desc_.segment(&ft_parser_param_));
|
||||
|
||||
ft_parser_param_.fulltext_ = ObTestAddWord::TEST_FULLTEXT;
|
||||
ft_parser_param_.ft_length_ = strlen(ft_parser_param_.fulltext_);
|
||||
|
||||
ASSERT_EQ(OB_SUCCESS, desc_.segment(&ft_parser_param_));
|
||||
|
||||
ASSERT_EQ(OB_SUCCESS, desc_.deinit(&plugin_param_));
|
||||
ASSERT_EQ(OB_NOT_INIT, desc_.segment(&ft_parser_param_));
|
||||
|
||||
ASSERT_EQ(OB_SUCCESS, desc_.init(&plugin_param_));
|
||||
ASSERT_EQ(OB_INVALID_ARGUMENT, desc_.segment(nullptr));
|
||||
}
|
||||
|
||||
class ObTestFTPluginHelper : public ::testing::Test
|
||||
{
|
||||
public:
|
||||
static const char *TEST_FULLTEXT;
|
||||
static const char *file_name;
|
||||
public:
|
||||
ObTestFTPluginHelper();
|
||||
virtual ~ObTestFTPluginHelper() = default;
|
||||
|
||||
virtual void SetUp() override;
|
||||
virtual void TearDown() override;
|
||||
|
||||
private:
|
||||
share::ObPluginSoHandler handler_;
|
||||
const char *plugin_name_;
|
||||
const ObCharsetInfo *cs_;
|
||||
common::ObArenaAllocator allocator_;
|
||||
};
|
||||
|
||||
const char *ObTestFTPluginHelper::TEST_FULLTEXT = "Test fulltext plugin.";
|
||||
const char *ObTestFTPluginHelper::file_name = "libmock_ft_parser.so";
|
||||
|
||||
ObTestFTPluginHelper::ObTestFTPluginHelper()
|
||||
: handler_(),
|
||||
plugin_name_("mock_ft_parser"),
|
||||
cs_(nullptr),
|
||||
allocator_()
|
||||
{
|
||||
}
|
||||
|
||||
void ObTestFTPluginHelper::SetUp()
|
||||
{
|
||||
ASSERT_EQ(OB_SUCCESS, handler_.open(plugin_name_, file_name));
|
||||
|
||||
cs_ = common::ObCharset::get_charset(ObCollationType::CS_TYPE_UTF8MB4_BIN);
|
||||
ASSERT_TRUE(nullptr != cs_);
|
||||
}
|
||||
|
||||
void ObTestFTPluginHelper::TearDown()
|
||||
{
|
||||
cs_ = nullptr;
|
||||
ASSERT_EQ(OB_SUCCESS, handler_.close());
|
||||
}
|
||||
|
||||
TEST_F(ObTestFTPluginHelper, test_fts_plugin)
|
||||
{
|
||||
int64_t version = -1;
|
||||
ASSERT_EQ(OB_SUCCESS, handler_.get_plugin_version(version));
|
||||
ASSERT_EQ(OB_PLUGIN_INTERFACE_VERSION, version);
|
||||
|
||||
int64_t size = -1;
|
||||
ASSERT_EQ(OB_SUCCESS, handler_.get_plugin_size(size));
|
||||
ASSERT_EQ(sizeof(lib::ObPlugin), size);
|
||||
|
||||
lib::ObPlugin *plugin = nullptr;
|
||||
ASSERT_EQ(OB_SUCCESS, handler_.get_plugin(plugin));
|
||||
ASSERT_TRUE(nullptr != plugin);
|
||||
ASSERT_TRUE(plugin->is_valid());
|
||||
ASSERT_EQ(lib::ObPluginType::OB_FT_PARSER_PLUGIN, plugin->type_);
|
||||
LOG_INFO("jinzhu debug", KCSTRING(plugin->name_), KCSTRING(plugin->author_), KCSTRING(plugin->spec_));
|
||||
ASSERT_TRUE(0 == std::strncmp("mock_ft_parser", plugin->name_, std::strlen("mock_ft_parser")));
|
||||
ASSERT_TRUE(0 == std::strncmp(OB_PLUGIN_AUTHOR_OCEANBASE, plugin->author_, std::strlen(OB_PLUGIN_AUTHOR_OCEANBASE)));
|
||||
ASSERT_TRUE(0 == std::strncmp("This is mock fulltext parser plugin.", plugin->spec_, std::strlen("This is mock fulltext parser plugin.")));
|
||||
ASSERT_EQ(0x00001, plugin->version_);
|
||||
ASSERT_EQ(lib::ObPluginLicenseType::OB_MULAN_V2_LICENSE, plugin->license_);
|
||||
ASSERT_TRUE(nullptr != plugin->desc_);
|
||||
|
||||
lib::ObIFTParserDesc *desc = nullptr;
|
||||
ASSERT_EQ(OB_SUCCESS, ObFTParseHelper::get_fulltext_parser_desc(handler_, desc));
|
||||
ASSERT_TRUE(nullptr != desc);
|
||||
|
||||
ObTestAddWord test_add_word;
|
||||
ASSERT_EQ(OB_SUCCESS, ObFTParseHelper::segment(1/*plugin_vserion*/, desc, cs_, TEST_FULLTEXT,
|
||||
strlen(TEST_FULLTEXT), allocator_, test_add_word));
|
||||
}
|
||||
|
||||
TEST_F(ObTestFTPluginHelper, test_main_program_for_plugin)
|
||||
{
|
||||
ASSERT_EQ(OB_SUCCESS, handler_.close());
|
||||
ASSERT_EQ(OB_SUCCESS, handler_.open(plugin_name_, nullptr/*use main program*/));
|
||||
|
||||
int64_t version = -1;
|
||||
ASSERT_EQ(OB_SUCCESS, handler_.get_plugin_version(version));
|
||||
ASSERT_EQ(OB_PLUGIN_INTERFACE_VERSION, version);
|
||||
|
||||
int64_t size = -1;
|
||||
ASSERT_EQ(OB_SUCCESS, handler_.get_plugin_size(size));
|
||||
ASSERT_EQ(sizeof(lib::ObPlugin), size);
|
||||
|
||||
lib::ObPlugin *plugin = nullptr;
|
||||
ASSERT_EQ(OB_SUCCESS, handler_.get_plugin(plugin));
|
||||
ASSERT_TRUE(nullptr != plugin);
|
||||
ASSERT_TRUE(plugin->is_valid());
|
||||
ASSERT_EQ(lib::ObPluginType::OB_FT_PARSER_PLUGIN, plugin->type_);
|
||||
LOG_INFO("jinzhu debug", KCSTRING(plugin->name_), KCSTRING(plugin->author_), KCSTRING(plugin->spec_));
|
||||
ASSERT_TRUE(0 == std::strncmp("mock_ft_parser", plugin->name_, std::strlen("mock_ft_parser")));
|
||||
ASSERT_TRUE(0 == std::strncmp(OB_PLUGIN_AUTHOR_OCEANBASE, plugin->author_, std::strlen(OB_PLUGIN_AUTHOR_OCEANBASE)));
|
||||
ASSERT_TRUE(0 == std::strncmp("This is mock fulltext parser plugin.", plugin->spec_, std::strlen("This is mock fulltext parser plugin.")));
|
||||
ASSERT_EQ(0x00001, plugin->version_);
|
||||
ASSERT_EQ(lib::ObPluginLicenseType::OB_MULAN_V2_LICENSE, plugin->license_);
|
||||
ASSERT_TRUE(nullptr != plugin->desc_);
|
||||
|
||||
lib::ObIFTParserDesc *desc = nullptr;
|
||||
ASSERT_EQ(OB_SUCCESS, ObFTParseHelper::get_fulltext_parser_desc(handler_, desc));
|
||||
ASSERT_TRUE(nullptr != desc);
|
||||
|
||||
ObTestAddWord test_add_word;
|
||||
ASSERT_EQ(OB_SUCCESS, ObFTParseHelper::segment(1/*plugin_vserion*/, desc, cs_, TEST_FULLTEXT,
|
||||
strlen(TEST_FULLTEXT), allocator_, test_add_word));
|
||||
|
||||
ASSERT_EQ(0, ObCharset::strcmp(ObCollationType::CS_TYPE_UTF8MB4_GENERAL_CI, "OceanBase", "Oceanbase"));
|
||||
}
|
||||
|
||||
TEST_F(ObTestFTPluginHelper, test_no_exist_symbol)
|
||||
{
|
||||
void *sym_ptr = nullptr;
|
||||
ASSERT_EQ(OB_SEARCH_NOT_FOUND, handler_.get_symbol_ptr("test_no_exist_symbol", sym_ptr));
|
||||
ASSERT_EQ(OB_INVALID_ARGUMENT, handler_.get_symbol_ptr(nullptr, sym_ptr));
|
||||
|
||||
ASSERT_EQ(OB_SUCCESS, handler_.close());
|
||||
ASSERT_EQ(OB_FILE_NOT_OPENED, handler_.get_symbol_ptr("test_no_exist_symbol", sym_ptr));
|
||||
|
||||
ASSERT_EQ(OB_ERR_SYS, handler_.open(plugin_name_, "./test_no_exist_file.so"));
|
||||
ASSERT_EQ(OB_INVALID_ARGUMENT, handler_.open(nullptr/*plugin name*/, nullptr/*file_name*/));
|
||||
|
||||
ASSERT_EQ(OB_SUCCESS, handler_.open(plugin_name_, nullptr/*use main program*/));
|
||||
ASSERT_EQ(OB_INIT_TWICE, handler_.open(plugin_name_, nullptr/*use main program*/));
|
||||
}
|
||||
|
||||
class ObTestFTParseHelper : public ::testing::Test
|
||||
{
|
||||
public:
|
||||
static const char *name_;
|
||||
typedef common::hash::ObHashMap<ObFTWord, int64_t> ObFTWordMap;
|
||||
public:
|
||||
ObTestFTParseHelper();
|
||||
virtual ~ObTestFTParseHelper() = default;
|
||||
|
||||
static void SetUpTestCase();
|
||||
static void TearDownTestCase();
|
||||
virtual void SetUp() override;
|
||||
virtual void TearDown() override;
|
||||
|
||||
private:
|
||||
const common::ObString plugin_name_;
|
||||
const common::ObCollationType cs_type_;
|
||||
common::ObArenaAllocator allocator_;
|
||||
ObFTParseHelper parse_helper_;
|
||||
};
|
||||
|
||||
const char *ObTestFTParseHelper::name_ = "space.1";
|
||||
|
||||
ObTestFTParseHelper::ObTestFTParseHelper()
|
||||
: plugin_name_(STRLEN(name_), name_),
|
||||
cs_type_(ObCollationType::CS_TYPE_UTF8MB4_BIN),
|
||||
allocator_()
|
||||
{
|
||||
}
|
||||
|
||||
void ObTestFTParseHelper::SetUp()
|
||||
{
|
||||
ASSERT_EQ(OB_SUCCESS, parse_helper_.init(&allocator_, plugin_name_));
|
||||
}
|
||||
|
||||
void ObTestFTParseHelper::TearDown()
|
||||
{
|
||||
parse_helper_.reset();
|
||||
}
|
||||
|
||||
void ObTestFTParseHelper::SetUpTestCase()
|
||||
{
|
||||
ASSERT_EQ(common::OB_SUCCESS, ObTenantFTPluginMgr::register_plugins());
|
||||
ASSERT_EQ(common::OB_SUCCESS, ft_plugin_mgr.init());
|
||||
}
|
||||
|
||||
void ObTestFTParseHelper::TearDownTestCase()
|
||||
{
|
||||
ft_plugin_mgr.destroy();
|
||||
ObTenantFTPluginMgr::unregister_plugins();
|
||||
}
|
||||
|
||||
TEST_F(ObTestFTParseHelper, test_parse_fulltext)
|
||||
{
|
||||
common::ObSEArray<ObFTWord, 16> words;
|
||||
int64_t doc_length = 0;
|
||||
ASSERT_EQ(OB_SUCCESS, parse_helper_.segment(cs_type_, ObTestAddWord::TEST_FULLTEXT,
|
||||
std::strlen(ObTestAddWord::TEST_FULLTEXT), doc_length, words));
|
||||
|
||||
ObTestAddWord test_add_word;
|
||||
for (int64_t i = 0; i < words.count(); ++i) {
|
||||
ASSERT_TRUE(0 == strncmp(test_add_word.words_without_stopword_[i], words[i].word_.ptr(), words[i].word_.length()));
|
||||
}
|
||||
|
||||
ObFTWordMap ft_word_map;
|
||||
ASSERT_EQ(OB_SUCCESS, ft_word_map.create(words.count(), "TestParse"));
|
||||
ASSERT_EQ(OB_SUCCESS, segment_and_calc_word_count(allocator_, &parse_helper_,
|
||||
cs_type_, ObTestAddWord::TEST_FULLTEXT, ft_word_map));
|
||||
ASSERT_EQ(words.count(), ft_word_map.size());
|
||||
|
||||
ASSERT_EQ(OB_INVALID_ARGUMENT, parse_helper_.segment(cs_type_, nullptr, std::strlen(ObTestAddWord::TEST_FULLTEXT), doc_length, words));
|
||||
ASSERT_EQ(OB_INVALID_ARGUMENT, parse_helper_.segment(cs_type_, ObTestAddWord::TEST_FULLTEXT, 0, doc_length, words));
|
||||
ASSERT_EQ(OB_INVALID_ARGUMENT, parse_helper_.segment(cs_type_, ObTestAddWord::TEST_FULLTEXT, -1, doc_length, words));
|
||||
|
||||
parse_helper_.reset();
|
||||
ASSERT_EQ(OB_NOT_INIT, parse_helper_.segment(cs_type_, ObTestAddWord::TEST_FULLTEXT,
|
||||
std::strlen(ObTestAddWord::TEST_FULLTEXT), doc_length, words));
|
||||
|
||||
ASSERT_EQ(OB_INVALID_ARGUMENT, parse_helper_.init(nullptr, plugin_name_));
|
||||
ASSERT_EQ(OB_INVALID_ARGUMENT, parse_helper_.init(&allocator_, ObString()));
|
||||
|
||||
ASSERT_EQ(OB_SUCCESS, parse_helper_.init(&allocator_, plugin_name_));
|
||||
|
||||
ASSERT_EQ(OB_INVALID_ARGUMENT, parse_helper_.segment(CS_TYPE_INVALID, ObTestAddWord::TEST_FULLTEXT,
|
||||
std::strlen(ObTestAddWord::TEST_FULLTEXT), doc_length, words));
|
||||
ASSERT_EQ(OB_INVALID_ARGUMENT, parse_helper_.segment(CS_TYPE_EXTENDED_MARK, ObTestAddWord::TEST_FULLTEXT,
|
||||
std::strlen(ObTestAddWord::TEST_FULLTEXT), doc_length, words));
|
||||
|
||||
ASSERT_EQ(OB_INIT_TWICE, parse_helper_.init(&allocator_, plugin_name_));
|
||||
|
||||
parse_helper_.reset();
|
||||
ASSERT_EQ(OB_SUCCESS, parse_helper_.init(&allocator_, plugin_name_));
|
||||
|
||||
parse_helper_.reset();
|
||||
ASSERT_EQ(OB_SUCCESS, parse_helper_.init(&allocator_, plugin_name_));
|
||||
ASSERT_EQ(OB_SUCCESS, parse_helper_.segment(cs_type_, ObTestAddWord::TEST_FULLTEXT,
|
||||
std::strlen(ObTestAddWord::TEST_FULLTEXT), doc_length, words));
|
||||
for (int64_t i = 0; i < words.count(); ++i) {
|
||||
ASSERT_TRUE(0 == strncmp(test_add_word.words_without_stopword_[i], words[i].word_.ptr(), words[i].word_.length()));
|
||||
}
|
||||
}
|
||||
|
||||
class ObTestNgramFTParseHelper : public ::testing::Test
|
||||
{
|
||||
public:
|
||||
static const char *name_;
|
||||
static const int64_t TEST_WORD_COUNT = 29;
|
||||
typedef common::hash::ObHashMap<ObFTWord, int64_t> ObFTWordMap;
|
||||
public:
|
||||
ObTestNgramFTParseHelper();
|
||||
virtual ~ObTestNgramFTParseHelper() = default;
|
||||
|
||||
static void SetUpTestCase();
|
||||
static void TearDownTestCase();
|
||||
virtual void SetUp() override;
|
||||
virtual void TearDown() override;
|
||||
|
||||
private:
|
||||
const common::ObString plugin_name_;
|
||||
const char *ngram_words_[TEST_WORD_COUNT];
|
||||
const common::ObCollationType cs_type_;
|
||||
common::ObArenaAllocator allocator_;
|
||||
ObFTParseHelper parse_helper_;
|
||||
};
|
||||
|
||||
const char *ObTestNgramFTParseHelper::name_ = "ngram.1";
|
||||
|
||||
ObTestNgramFTParseHelper::ObTestNgramFTParseHelper()
|
||||
: plugin_name_(STRLEN(name_), name_),
|
||||
ngram_words_{"Oc", "ce", "ea", "an", "nB", "Ba", "as", "se", "fu", "ul", "ll", "lt", "te", "ex", "xt", "se", "ea", "ar", "rc", "ch", "is", "No", "in", "th", "he", "wo", "or", "rl", "ld"},
|
||||
cs_type_(ObCollationType::CS_TYPE_UTF8MB4_BIN),
|
||||
allocator_()
|
||||
{
|
||||
}
|
||||
|
||||
void ObTestNgramFTParseHelper::SetUp()
|
||||
{
|
||||
ASSERT_EQ(OB_SUCCESS, parse_helper_.init(&allocator_, plugin_name_));
|
||||
}
|
||||
|
||||
void ObTestNgramFTParseHelper::TearDown()
|
||||
{
|
||||
parse_helper_.reset();
|
||||
}
|
||||
|
||||
void ObTestNgramFTParseHelper::SetUpTestCase()
|
||||
{
|
||||
ASSERT_EQ(common::OB_SUCCESS, ObTenantFTPluginMgr::register_plugins());
|
||||
ASSERT_EQ(common::OB_SUCCESS, ft_plugin_mgr.init());
|
||||
}
|
||||
|
||||
void ObTestNgramFTParseHelper::TearDownTestCase()
|
||||
{
|
||||
ft_plugin_mgr.destroy();
|
||||
ObTenantFTPluginMgr::unregister_plugins();
|
||||
}
|
||||
|
||||
TEST_F(ObTestNgramFTParseHelper, test_parse_fulltext)
|
||||
{
|
||||
int64_t doc_length = 0;
|
||||
common::ObSEArray<ObFTWord, 16> words;
|
||||
ASSERT_EQ(OB_SUCCESS, parse_helper_.segment(cs_type_, ObTestAddWord::TEST_FULLTEXT,
|
||||
std::strlen(ObTestAddWord::TEST_FULLTEXT), doc_length, words));
|
||||
|
||||
for (int64_t i = 0; i < words.count(); ++i) {
|
||||
ASSERT_TRUE(0 == strncmp(ngram_words_[i], words[i].word_.ptr(), words[i].word_.length()));
|
||||
}
|
||||
|
||||
ObFTWordMap ft_word_map;
|
||||
ASSERT_EQ(OB_SUCCESS, ft_word_map.create(words.count(), "TestParse"));
|
||||
ASSERT_EQ(OB_SUCCESS, segment_and_calc_word_count(allocator_, &parse_helper_,
|
||||
cs_type_, ObTestAddWord::TEST_FULLTEXT, ft_word_map));
|
||||
ASSERT_EQ(words.count(), ft_word_map.size() + 2);
|
||||
|
||||
ASSERT_EQ(OB_INVALID_ARGUMENT, parse_helper_.segment(cs_type_, nullptr, std::strlen(ObTestAddWord::TEST_FULLTEXT), doc_length, words));
|
||||
ASSERT_EQ(OB_INVALID_ARGUMENT, parse_helper_.segment(cs_type_, ObTestAddWord::TEST_FULLTEXT, 0, doc_length, words));
|
||||
ASSERT_EQ(OB_INVALID_ARGUMENT, parse_helper_.segment(cs_type_, ObTestAddWord::TEST_FULLTEXT, -1, doc_length, words));
|
||||
|
||||
parse_helper_.reset();
|
||||
ASSERT_EQ(OB_NOT_INIT, parse_helper_.segment(cs_type_, ObTestAddWord::TEST_FULLTEXT,
|
||||
std::strlen(ObTestAddWord::TEST_FULLTEXT), doc_length, words));
|
||||
|
||||
ASSERT_EQ(OB_INVALID_ARGUMENT, parse_helper_.init(nullptr, plugin_name_));
|
||||
ASSERT_EQ(OB_INVALID_ARGUMENT, parse_helper_.init(&allocator_, ObString()));
|
||||
|
||||
const char *plugin_name = "space.1";
|
||||
ASSERT_EQ(OB_SUCCESS, parse_helper_.init(&allocator_, common::ObString(STRLEN(plugin_name), plugin_name)));
|
||||
|
||||
ASSERT_EQ(OB_INVALID_ARGUMENT, parse_helper_.segment(CS_TYPE_INVALID, ObTestAddWord::TEST_FULLTEXT,
|
||||
std::strlen(ObTestAddWord::TEST_FULLTEXT), doc_length, words));
|
||||
ASSERT_EQ(OB_INVALID_ARGUMENT, parse_helper_.segment(CS_TYPE_EXTENDED_MARK, ObTestAddWord::TEST_FULLTEXT,
|
||||
std::strlen(ObTestAddWord::TEST_FULLTEXT), doc_length, words));
|
||||
|
||||
ASSERT_EQ(OB_INIT_TWICE, parse_helper_.init(&allocator_, plugin_name_));
|
||||
|
||||
parse_helper_.reset();
|
||||
ASSERT_EQ(OB_SUCCESS, parse_helper_.init(&allocator_, plugin_name_));
|
||||
|
||||
parse_helper_.reset();
|
||||
ASSERT_EQ(OB_SUCCESS, parse_helper_.init(&allocator_, plugin_name_));
|
||||
ASSERT_EQ(OB_SUCCESS, parse_helper_.segment(cs_type_, ObTestAddWord::TEST_FULLTEXT,
|
||||
std::strlen(ObTestAddWord::TEST_FULLTEXT), doc_length, words));
|
||||
for (int64_t i = 0; i < words.count(); ++i) {
|
||||
ASSERT_TRUE(0 == strncmp(ngram_words_[i], words[i].word_.ptr(), words[i].word_.length()));
|
||||
}
|
||||
}
|
||||
|
||||
} // end namespace storage
|
||||
} // end namespace oceanbase
|
||||
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
system("rm -rf test_fts_plugin.log");
|
||||
OB_LOGGER.set_file_name("test_fts_plugin.log", true);
|
||||
OB_LOGGER.set_log_level("INFO");
|
||||
oceanbase::storage::ObTestFTPluginHelper::file_name = argv[0];
|
||||
testing::InitGoogleTest(&argc, argv);
|
||||
return RUN_ALL_TESTS();
|
||||
}
|
Reference in New Issue
Block a user