// Licensed to the Apache Software Foundation (ASF) under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information // regarding copyright ownership. The ASF licenses this file // to you under the Apache License, Version 2.0 (the // "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, // software distributed under the License is distributed on an // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY // KIND, either express or implied. See the License for the // specific language governing permissions and limitations // under the License. #include "olap/task/index_builder.h" #include #include #include "olap/olap_common.h" #include "olap/rowset/beta_rowset.h" #include "olap/rowset/rowset_factory.h" #include "olap/rowset/rowset_writer_context.h" #include "olap/storage_engine.h" #include "olap/tablet_fwd.h" #include "olap/tablet_schema.h" namespace doris { using namespace testing; class IndexBuilderTest : public ::testing::Test { protected: void SetUp() override { char buffer[MAX_PATH_LEN]; ASSERT_NE(getcwd(buffer, MAX_PATH_LEN), nullptr); _current_dir = std::string(buffer); _absolute_dir = _current_dir + "/" + std::string(dest_dir); ASSERT_TRUE(io::global_local_filesystem()->delete_directory(_absolute_dir).ok()); ASSERT_TRUE(io::global_local_filesystem()->create_directory(_absolute_dir).ok()); std::vector paths; paths.emplace_back(config::storage_root_path, -1); // tmp dir EXPECT_TRUE(io::global_local_filesystem()->delete_directory(tmp_dir).ok()); EXPECT_TRUE(io::global_local_filesystem()->create_directory(tmp_dir).ok()); paths.emplace_back(std::string(tmp_dir), 1024000000); auto tmp_file_dirs = std::make_unique(paths); EXPECT_TRUE(tmp_file_dirs->init().ok()); ExecEnv::GetInstance()->set_tmp_file_dir(std::move(tmp_file_dirs)); doris::EngineOptions options; options.store_paths = paths; _engine = std::make_unique(options); _data_dir = std::make_unique(_absolute_dir); ASSERT_TRUE(_data_dir->update_capacity().ok()); ExecEnv::GetInstance()->set_storage_engine(_engine.get()); _tablet_meta = create_tablet_meta(); // Create tablet meta // auto* tablet_schema = _tablet_meta->mutable_tablet_schema(); _tablet_schema = std::make_shared(); create_tablet_schema(_tablet_schema, KeysType::DUP_KEYS); // Initialize tablet _tablet = std::make_shared(*_engine, _tablet_meta, _data_dir.get()); ASSERT_TRUE(_tablet->init().ok()); } void TearDown() override { ASSERT_TRUE(io::global_local_filesystem()->delete_directory(_absolute_dir).ok()); ExecEnv::GetInstance()->set_storage_engine(nullptr); _tablet.reset(); } void create_tablet_schema(TabletSchemaSPtr tablet_schema, KeysType keystype, int num_value_col = 1) { // Set basic properties of TabletSchema directly tablet_schema->_keys_type = keystype; tablet_schema->_inverted_index_storage_format = InvertedIndexStorageFormatPB::V2; // Create the first key column TabletColumn column_1; column_1.set_type(FieldType::OLAP_FIELD_TYPE_INT); column_1.set_unique_id(1); column_1.set_name("k1"); column_1.set_is_key(true); tablet_schema->append_column(column_1); // Create the second key column TabletColumn column_2; column_2.set_type(FieldType::OLAP_FIELD_TYPE_INT); column_2.set_unique_id(2); column_2.set_name("k2"); column_2.set_is_key(false); tablet_schema->append_column(column_2); } TabletMetaSharedPtr create_tablet_meta() { TabletMetaPB tablet_meta_pb; tablet_meta_pb.set_table_id(1); tablet_meta_pb.set_tablet_id(15673); tablet_meta_pb.set_schema_hash(567997577); tablet_meta_pb.set_shard_id(0); tablet_meta_pb.set_creation_time(1575351212); TabletMetaSharedPtr tablet_meta(new TabletMeta()); tablet_meta->init_from_pb(tablet_meta_pb); return tablet_meta; } // Helper to create rowset meta void init_rs_meta(RowsetMetaSharedPtr& rs_meta, TabletSchemaSPtr tablet_schema, int64_t start, int64_t end) { RowsetMetaPB rowset_meta_pb; rowset_meta_pb.set_rowset_id(540081); rowset_meta_pb.set_tablet_id(15673); rowset_meta_pb.set_tablet_schema_hash(567997577); rowset_meta_pb.set_rowset_type(RowsetTypePB::BETA_ROWSET); rowset_meta_pb.set_rowset_state(RowsetStatePB::VISIBLE); rowset_meta_pb.set_start_version(start); rowset_meta_pb.set_end_version(end); rowset_meta_pb.set_num_rows(3929); rowset_meta_pb.set_total_disk_size(84699); rowset_meta_pb.set_data_disk_size(84464); rowset_meta_pb.set_index_disk_size(235); rowset_meta_pb.set_num_segments(2); rs_meta->init_from_pb(rowset_meta_pb); rs_meta->set_tablet_schema(tablet_schema); } std::unique_ptr _engine; TabletSharedPtr _tablet; TabletMetaSharedPtr _tablet_meta; TabletSchemaSPtr _tablet_schema; std::vector _columns; std::vector _alter_indexes; std::unique_ptr _data_dir = nullptr; std::string _current_dir; std::string _absolute_dir; constexpr static uint32_t MAX_PATH_LEN = 1024; constexpr static std::string_view dest_dir = "./ut_dir/index_builder_test"; constexpr static std::string_view tmp_dir = "./ut_dir/index_builder_test"; std::string local_segment_path(const std::string& tablet_path, const std::string& rowset_id, int segment_id) { return tablet_path + "/" + rowset_id + "_" + std::to_string(segment_id); } }; TEST_F(IndexBuilderTest, BasicBuildTest) { // 1. Prepare test data TOlapTableIndex index; index.index_id = 1; index.columns.emplace_back("col1"); _alter_indexes.push_back(index); // 2. Create IndexBuilder IndexBuilder builder(_tablet, _columns, _alter_indexes, false); // 3. Verify initialization auto status = builder.init(); EXPECT_TRUE(status.ok()); EXPECT_EQ(builder._alter_index_ids.size(), 1); } TEST_F(IndexBuilderTest, DropIndexTest) { // 0. prepare tablet path auto tablet_path = _absolute_dir + "/" + std::to_string(15676); _tablet->_tablet_path = tablet_path; ASSERT_TRUE(io::global_local_filesystem()->delete_directory(tablet_path).ok()); ASSERT_TRUE(io::global_local_filesystem()->create_directory(tablet_path).ok()); // 1. Prepare data for writing RowsetSharedPtr rowset; const int num_rows = 1000; // 2. First add an initial index to the schema (for k1 column) TabletIndex initial_index; initial_index._index_id = 1; initial_index._index_name = "k1_index"; initial_index._index_type = IndexType::INVERTED; initial_index._col_unique_ids.push_back(1); // unique_id for k1 _tablet_schema->append_index(std::move(initial_index)); // 3. Create a rowset writer context RowsetWriterContext writer_context; writer_context.rowset_id.init(15676); writer_context.tablet_id = 15676; writer_context.tablet_schema_hash = 567997577; writer_context.partition_id = 10; writer_context.rowset_type = BETA_ROWSET; writer_context.rowset_dir = tablet_path; writer_context.rowset_state = VISIBLE; writer_context.tablet_schema = _tablet_schema; writer_context.version.first = 10; writer_context.version.second = 10; ASSERT_TRUE(io::global_local_filesystem()->create_directory(writer_context.rowset_dir).ok()); // 4. Create a rowset writer std::unique_ptr rowset_writer; auto status = RowsetFactory::create_rowset_writer(writer_context, false, &rowset_writer); ASSERT_TRUE(status.ok()) << status.to_string(); // 5. Write data to the rowset { vectorized::Block block = _tablet_schema->create_block(); auto columns = block.mutate_columns(); // Add data for k1 and k2 columns for (int i = 0; i < num_rows; ++i) { // k1 column (int) int32_t k1 = i * 10; columns[0]->insert_data((const char*)&k1, sizeof(k1)); // k2 column (int) int32_t k2 = i % 100; columns[1]->insert_data((const char*)&k2, sizeof(k2)); } // Add the block to the rowset Status s = rowset_writer->add_block(&block); ASSERT_TRUE(s.ok()) << s.to_string(); // Flush the writer s = rowset_writer->flush(); ASSERT_TRUE(s.ok()) << s.to_string(); // Build the rowset ASSERT_TRUE(rowset_writer->build(rowset).ok()); // Add the rowset to the tablet ASSERT_TRUE(_tablet->add_rowset(rowset).ok()); } // 6. Verify index exists before dropping EXPECT_TRUE(_tablet_schema->has_inverted_index()); EXPECT_TRUE(_tablet_schema->has_inverted_index_with_index_id(1, "")); // 7. Prepare index for dropping TOlapTableIndex drop_index; drop_index.index_id = 1; drop_index.columns.emplace_back("k1"); _alter_indexes.push_back(drop_index); // 8. Create IndexBuilder with drop operation IndexBuilder builder(_tablet, _columns, _alter_indexes, true); // 9. Initialize and verify status = builder.init(); EXPECT_TRUE(status.ok()) << status.to_string(); EXPECT_EQ(builder._alter_index_ids.size(), 1); // 10. Execute drop operation status = builder.do_build_inverted_index(); EXPECT_TRUE(status.ok()) << status.to_string(); // 11. Verify the index has been removed // check old tablet path and new tablet path bool exists = false; EXPECT_TRUE(io::global_local_filesystem()->exists(tablet_path, &exists).ok()); EXPECT_TRUE(exists); // Check files in old and new directories std::vector files; bool dir_exists = false; EXPECT_TRUE(io::global_local_filesystem()->list(tablet_path, true, &files, &dir_exists).ok()); EXPECT_TRUE(dir_exists); int new_idx_file_count = 0; int new_dat_file_count = 0; int old_idx_file_count = 0; int old_dat_file_count = 0; for (const auto& file : files) { std::string filename = file.file_name; if (filename.find("15676_0.idx") != std::string::npos) { old_idx_file_count++; } if (filename.find("15676_0.dat") != std::string::npos) { old_dat_file_count++; } if (filename.find("020000000000000100000000000000000000000000000000_0.idx") != std::string::npos) { new_idx_file_count++; } if (filename.find("020000000000000100000000000000000000000000000000_0.dat") != std::string::npos) { new_dat_file_count++; } } // The index should have been removed EXPECT_EQ(old_idx_file_count, 1) << "Tablet path should have 1 .idx file before drop"; EXPECT_EQ(old_dat_file_count, 1) << "Tablet path should have 1 .dat file before drop"; EXPECT_EQ(new_idx_file_count, 0) << "Tablet path should have no .idx file after drop"; EXPECT_EQ(new_dat_file_count, 1) << "Tablet path should have 1 .dat file after drop"; //auto tablet_schema = _tablet->tablet_schema(); //EXPECT_FALSE(tablet_schema->has_inverted_index_with_index_id(1)); } TEST_F(IndexBuilderTest, BuildIndexAfterWritingDataTest) { // 0. prepare tablet path auto tablet_path = _absolute_dir + "/" + std::to_string(14673); _tablet->_tablet_path = tablet_path; ASSERT_TRUE(io::global_local_filesystem()->delete_directory(tablet_path).ok()); ASSERT_TRUE(io::global_local_filesystem()->create_directory(tablet_path).ok()); // 1. Prepare data for writing RowsetSharedPtr rowset; const int num_rows = 1000; // 2. Create a rowset writer context RowsetWriterContext writer_context; writer_context.rowset_id.init(15673); writer_context.tablet_id = 15673; writer_context.tablet_schema_hash = 567997577; writer_context.partition_id = 10; writer_context.rowset_type = BETA_ROWSET; writer_context.rowset_dir = _absolute_dir + "/" + std::to_string(15673); writer_context.rowset_state = VISIBLE; writer_context.tablet_schema = _tablet_schema; writer_context.version.first = 10; writer_context.version.second = 10; ASSERT_TRUE(io::global_local_filesystem()->create_directory(writer_context.rowset_dir).ok()); // 3. Create a rowset writer std::unique_ptr rowset_writer; auto status = RowsetFactory::create_rowset_writer(writer_context, false, &rowset_writer); ASSERT_TRUE(status.ok()) << status.to_string(); // 4. Write data to the rowset { vectorized::Block block = _tablet_schema->create_block(); auto columns = block.mutate_columns(); // Add data for k1 and k2 columns according to the schema for (int i = 0; i < num_rows; ++i) { // k1 column (int) int32_t k1 = i * 10; columns[0]->insert_data((const char*)&k1, sizeof(k1)); // k2 column (int) int32_t k2 = i % 100; columns[1]->insert_data((const char*)&k2, sizeof(k2)); } // Add the block to the rowset Status s = rowset_writer->add_block(&block); ASSERT_TRUE(s.ok()) << s.to_string(); // Flush the writer s = rowset_writer->flush(); ASSERT_TRUE(s.ok()) << s.to_string(); // Build the rowset ASSERT_TRUE(rowset_writer->build(rowset).ok()); // Add the rowset to the tablet ASSERT_TRUE(_tablet->add_rowset(rowset).ok()); } // 5. Prepare index for building TOlapTableIndex index1; index1.index_id = 1; index1.columns.emplace_back("k1"); index1.index_name = "k1_index"; index1.index_type = TIndexType::INVERTED; _alter_indexes.push_back(index1); TOlapTableIndex index2; index2.index_id = 2; index2.columns.emplace_back("k2"); index2.index_name = "k2_index"; index2.index_type = TIndexType::INVERTED; _alter_indexes.push_back(index2); // 6. Create IndexBuilder IndexBuilder builder(_tablet, _columns, _alter_indexes, false); // 7. Initialize and verify status = builder.init(); EXPECT_TRUE(status.ok()) << status.to_string(); EXPECT_EQ(builder._alter_index_ids.size(), 2); // 8. Build index status = builder.do_build_inverted_index(); EXPECT_TRUE(status.ok()) << status.to_string(); // check old tablet path and new tablet path auto old_tablet_path = _absolute_dir + "/" + std::to_string(15673); auto new_tablet_path = _absolute_dir + "/" + std::to_string(14673); bool old_exists = false; bool new_exists = false; EXPECT_TRUE(io::global_local_filesystem()->exists(old_tablet_path, &old_exists).ok()); EXPECT_TRUE(old_exists); EXPECT_TRUE(io::global_local_filesystem()->exists(new_tablet_path, &new_exists).ok()); EXPECT_TRUE(new_exists); // Check files in old and new directories std::vector old_files; bool old_dir_exists = false; EXPECT_TRUE(io::global_local_filesystem() ->list(old_tablet_path, true, &old_files, &old_dir_exists) .ok()); EXPECT_TRUE(old_dir_exists); int idx_file_count = 0; int dat_file_count = 0; for (const auto& file : old_files) { std::string filename = file.file_name; if (filename.find(".idx") != std::string::npos) { idx_file_count++; } if (filename.find(".dat") != std::string::npos) { dat_file_count++; } } EXPECT_EQ(idx_file_count, 0) << "Old directory should contain exactly 0 .idx file"; EXPECT_EQ(dat_file_count, 1) << "Old directory should contain exactly 1 .dat file"; std::vector new_files; bool new_dir_exists = false; EXPECT_TRUE(io::global_local_filesystem() ->list(new_tablet_path, true, &new_files, &new_dir_exists) .ok()); EXPECT_TRUE(new_dir_exists); int new_idx_file_count = 0; int new_dat_file_count = 0; for (const auto& file : new_files) { std::string filename = file.file_name; if (filename.find(".idx") != std::string::npos) { new_idx_file_count++; } if (filename.find(".dat") != std::string::npos) { new_dat_file_count++; } } EXPECT_EQ(new_idx_file_count, 1) << "New directory should contain exactly 1 .idx files"; EXPECT_EQ(new_dat_file_count, 1) << "New directory should contain exactly 1 .dat file"; // 9. Verify the result (indexes should be built successfully) //auto tablet_schema = _tablet->tablet_schema(); //EXPECT_TRUE(tablet_schema->has_inverted_index_with_index_id(1)); //EXPECT_TRUE(tablet_schema->has_inverted_index_with_index_id(2)); } TEST_F(IndexBuilderTest, AddIndexWhenOneExistsTest) { // 0. prepare tablet path auto tablet_path = _absolute_dir + "/" + std::to_string(14675); _tablet->_tablet_path = tablet_path; ASSERT_TRUE(io::global_local_filesystem()->delete_directory(tablet_path).ok()); ASSERT_TRUE(io::global_local_filesystem()->create_directory(tablet_path).ok()); // 1. Prepare data for writing RowsetSharedPtr rowset; const int num_rows = 1000; // 2. First add an initial index to the schema (for k1 column) TabletIndex initial_index; initial_index._index_id = 1; initial_index._index_name = "k1_index"; initial_index._index_type = IndexType::INVERTED; initial_index._col_unique_ids.push_back(1); // unique_id for k1 _tablet_schema->append_index(std::move(initial_index)); // 3. Create rowset writer context RowsetWriterContext writer_context; writer_context.rowset_id.init(15675); writer_context.tablet_id = 15675; writer_context.tablet_schema_hash = 567997577; writer_context.partition_id = 10; writer_context.rowset_type = BETA_ROWSET; writer_context.rowset_dir = _absolute_dir + "/" + std::to_string(15675); writer_context.rowset_state = VISIBLE; writer_context.tablet_schema = _tablet_schema; writer_context.version.first = 10; writer_context.version.second = 10; ASSERT_TRUE(io::global_local_filesystem()->create_directory(writer_context.rowset_dir).ok()); // 4. Create rowset writer std::unique_ptr rowset_writer; auto status = RowsetFactory::create_rowset_writer(writer_context, false, &rowset_writer); ASSERT_TRUE(status.ok()) << status.to_string(); // 5. Write data to rowset { vectorized::Block block = _tablet_schema->create_block(); auto columns = block.mutate_columns(); // Add data for k1 and k2 columns for (int i = 0; i < num_rows; ++i) { // k1 column (int) int32_t k1 = i * 10; columns[0]->insert_data((const char*)&k1, sizeof(k1)); // k2 column (int) int32_t k2 = i % 100; columns[1]->insert_data((const char*)&k2, sizeof(k2)); } // Add block to rowset Status s = rowset_writer->add_block(&block); ASSERT_TRUE(s.ok()) << s.to_string(); // Flush writer s = rowset_writer->flush(); ASSERT_TRUE(s.ok()) << s.to_string(); // Build rowset ASSERT_TRUE(rowset_writer->build(rowset).ok()); // Add rowset to tablet ASSERT_TRUE(_tablet->add_rowset(rowset).ok()); } // 6. Prepare new index information (only add for k2 column) TOlapTableIndex new_index; new_index.index_id = 2; // New index ID is 2 new_index.columns.emplace_back("k2"); new_index.index_name = "k2_index"; new_index.index_type = TIndexType::INVERTED; _alter_indexes.push_back(new_index); // 7. Create IndexBuilder IndexBuilder builder(_tablet, _columns, _alter_indexes, false); // 8. Initialize and verify status = builder.init(); EXPECT_TRUE(status.ok()) << status.to_string(); EXPECT_EQ(builder._alter_index_ids.size(), 1); // Only one new index needs to be built // 9. Build index status = builder.do_build_inverted_index(); EXPECT_TRUE(status.ok()) << status.to_string(); // check old tablet path and new tablet path auto old_tablet_path = _absolute_dir + "/" + std::to_string(15675); auto new_tablet_path = _absolute_dir + "/" + std::to_string(14675); bool old_exists = false; bool new_exists = false; EXPECT_TRUE(io::global_local_filesystem()->exists(old_tablet_path, &old_exists).ok()); EXPECT_TRUE(old_exists); EXPECT_TRUE(io::global_local_filesystem()->exists(new_tablet_path, &new_exists).ok()); EXPECT_TRUE(new_exists); // Check files in old and new directories std::vector old_files; bool old_dir_exists = false; EXPECT_TRUE(io::global_local_filesystem() ->list(old_tablet_path, true, &old_files, &old_dir_exists) .ok()); EXPECT_TRUE(old_dir_exists); int idx_file_count = 0; int dat_file_count = 0; for (const auto& file : old_files) { std::string filename = file.file_name; if (filename.find(".idx") != std::string::npos) { idx_file_count++; } if (filename.find(".dat") != std::string::npos) { dat_file_count++; } } EXPECT_EQ(idx_file_count, 1) << "Old directory should contain exactly 1 .idx file"; EXPECT_EQ(dat_file_count, 1) << "Old directory should contain exactly 1 .dat file"; std::vector new_files; bool new_dir_exists = false; EXPECT_TRUE(io::global_local_filesystem() ->list(new_tablet_path, true, &new_files, &new_dir_exists) .ok()); EXPECT_TRUE(new_dir_exists); int new_idx_file_count = 0; int new_dat_file_count = 0; for (const auto& file : new_files) { std::string filename = file.file_name; if (filename.find(".idx") != std::string::npos) { new_idx_file_count++; } if (filename.find(".dat") != std::string::npos) { new_dat_file_count++; } } EXPECT_EQ(new_idx_file_count, 1) << "New directory should contain exactly 1 .idx files"; EXPECT_EQ(new_dat_file_count, 1) << "New directory should contain exactly 1 .dat file"; // 10. Verify results (both indexes should exist) // Verify initial index (k1) still exists //EXPECT_TRUE(_tablet_schema->has_inverted_index_with_index_id(1)); // Verify newly added index (k2) is successfully built //EXPECT_TRUE(_tablet_schema->has_inverted_index_with_index_id(2)); } TEST_F(IndexBuilderTest, AddIndexWhenOneExistsTestV1) { // 1. Create new schema using V1 format auto v1_schema = std::make_shared(); create_tablet_schema(v1_schema, KeysType::DUP_KEYS); // 2. Modify to V1 format v1_schema->_inverted_index_storage_format = InvertedIndexStorageFormatPB::V1; // 3. First add an initial index to the schema (for k1 column) TabletIndex initial_index; initial_index._index_id = 1; initial_index._index_name = "k1_index"; initial_index._index_type = IndexType::INVERTED; initial_index._col_unique_ids.push_back(1); // unique_id for k1 v1_schema->append_index(std::move(initial_index)); // 4. Update schema in tablet TabletMetaPB tablet_meta_pb; _tablet_meta->to_meta_pb(&tablet_meta_pb); TabletSchemaPB v1_schema_pb; v1_schema->to_schema_pb(&v1_schema_pb); tablet_meta_pb.mutable_schema()->CopyFrom(v1_schema_pb); _tablet_meta->init_from_pb(tablet_meta_pb); // Reinitialize tablet to use new schema _tablet = std::make_shared(*_engine, _tablet_meta, _data_dir.get()); ASSERT_TRUE(_tablet->init().ok()); auto tablet_path = _absolute_dir + "/" + std::to_string(14674); _tablet->_tablet_path = tablet_path; ASSERT_TRUE(io::global_local_filesystem()->delete_directory(tablet_path).ok()); ASSERT_TRUE(io::global_local_filesystem()->create_directory(tablet_path).ok()); // 5. Prepare data RowsetSharedPtr rowset; const int num_rows = 1000; // 6. Create rowset writer context RowsetWriterContext writer_context; writer_context.rowset_id.init(15674); writer_context.tablet_id = 15674; writer_context.tablet_schema_hash = 567997577; writer_context.partition_id = 10; writer_context.rowset_type = BETA_ROWSET; writer_context.rowset_dir = _absolute_dir + "/" + std::to_string(15674); writer_context.rowset_state = VISIBLE; writer_context.tablet_schema = v1_schema; writer_context.version.first = 10; writer_context.version.second = 10; ASSERT_TRUE(io::global_local_filesystem()->create_directory(writer_context.rowset_dir).ok()); // 7. Create rowset writer std::unique_ptr rowset_writer; auto status = RowsetFactory::create_rowset_writer(writer_context, false, &rowset_writer); ASSERT_TRUE(status.ok()) << status.to_string(); // 8. Write data to rowset { vectorized::Block block = v1_schema->create_block(); auto columns = block.mutate_columns(); // Add data for k1 and k2 columns for (int i = 0; i < num_rows; ++i) { // k1 column (int) int32_t k1 = i * 10; columns[0]->insert_data((const char*)&k1, sizeof(k1)); // k2 column (int) int32_t k2 = i % 100; columns[1]->insert_data((const char*)&k2, sizeof(k2)); } // Add block to rowset Status s = rowset_writer->add_block(&block); ASSERT_TRUE(s.ok()) << s.to_string(); // Flush writer s = rowset_writer->flush(); ASSERT_TRUE(s.ok()) << s.to_string(); // Build rowset ASSERT_TRUE(rowset_writer->build(rowset).ok()); // Add rowset to tablet ASSERT_TRUE(_tablet->add_rowset(rowset).ok()); } // 9. Clear existing index list, prepare new index _alter_indexes.clear(); // 10. Prepare new index information (only add for k2 column) TOlapTableIndex new_index; new_index.index_id = 2; // New index ID is 2 new_index.columns.emplace_back("k2"); new_index.index_name = "k2_index"; new_index.index_type = TIndexType::INVERTED; _alter_indexes.push_back(new_index); // 11. Create IndexBuilder IndexBuilder builder(_tablet, _columns, _alter_indexes, false); // 12. Initialize and verify status = builder.init(); EXPECT_TRUE(status.ok()) << status.to_string(); EXPECT_EQ(builder._alter_index_ids.size(), 1); // Only one new index needs to be built // 13. Build index status = builder.do_build_inverted_index(); EXPECT_TRUE(status.ok()) << status.to_string(); // check old tablet path and new tablet path auto old_tablet_path = _absolute_dir + "/" + std::to_string(15674); auto new_tablet_path = _absolute_dir + "/" + std::to_string(14674); bool old_exists = false; bool new_exists = false; EXPECT_TRUE(io::global_local_filesystem()->exists(old_tablet_path, &old_exists).ok()); EXPECT_TRUE(old_exists); EXPECT_TRUE(io::global_local_filesystem()->exists(new_tablet_path, &new_exists).ok()); EXPECT_TRUE(new_exists); // Check files in old and new directories std::vector old_files; bool old_dir_exists = false; EXPECT_TRUE(io::global_local_filesystem() ->list(old_tablet_path, true, &old_files, &old_dir_exists) .ok()); EXPECT_TRUE(old_dir_exists); int idx_file_count = 0; int dat_file_count = 0; for (const auto& file : old_files) { std::string filename = file.file_name; if (filename.find(".idx") != std::string::npos) { idx_file_count++; } if (filename.find(".dat") != std::string::npos) { dat_file_count++; } } EXPECT_EQ(idx_file_count, 1) << "Old directory should contain exactly 1 .idx file"; EXPECT_EQ(dat_file_count, 1) << "Old directory should contain exactly 1 .dat file"; std::vector new_files; bool new_dir_exists = false; EXPECT_TRUE(io::global_local_filesystem() ->list(new_tablet_path, true, &new_files, &new_dir_exists) .ok()); EXPECT_TRUE(new_dir_exists); int new_idx_file_count = 0; int new_dat_file_count = 0; for (const auto& file : new_files) { std::string filename = file.file_name; if (filename.find(".idx") != std::string::npos) { new_idx_file_count++; } if (filename.find(".dat") != std::string::npos) { new_dat_file_count++; } } EXPECT_EQ(new_idx_file_count, 2) << "New directory should contain exactly 2 .idx files"; EXPECT_EQ(new_dat_file_count, 1) << "New directory should contain exactly 1 .dat file"; // 14. Verify results (both indexes should exist) // Verify initial index (k1) still exists //EXPECT_TRUE(v1_schema->has_inverted_index_with_index_id(1)); // Verify newly added index (k2) is successfully built //EXPECT_TRUE(_tablet->tablet_schema()->has_inverted_index_with_index_id(2)); // 15. Confirm storage format is still V1 //EXPECT_EQ(v1_schema->_inverted_index_storage_format, InvertedIndexStorageFormatPB::V1); } TEST_F(IndexBuilderTest, MultiSegmentBuildIndexTest) { // 0. prepare tablet path auto tablet_path = _absolute_dir + "/" + std::to_string(14677); _tablet->_tablet_path = tablet_path; ASSERT_TRUE(io::global_local_filesystem()->delete_directory(tablet_path).ok()); ASSERT_TRUE(io::global_local_filesystem()->create_directory(tablet_path).ok()); // 1. Prepare data for writing RowsetSharedPtr rowset; const int rows_per_segment = 500; const int num_segments = 3; // 2. Create a rowset writer context with segment size set to trigger multiple segments RowsetWriterContext writer_context; writer_context.rowset_id.init(15677); writer_context.tablet_id = 15677; writer_context.tablet_schema_hash = 567997577; writer_context.partition_id = 10; writer_context.rowset_type = BETA_ROWSET; writer_context.rowset_dir = _absolute_dir + "/" + std::to_string(15677); writer_context.rowset_state = VISIBLE; writer_context.tablet_schema = _tablet_schema; writer_context.version.first = 10; writer_context.version.second = 10; // Set small segment size to ensure we create multiple segments writer_context.max_rows_per_segment = rows_per_segment; ASSERT_TRUE(io::global_local_filesystem()->create_directory(writer_context.rowset_dir).ok()); // 3. Create a rowset writer std::unique_ptr rowset_writer; auto status = RowsetFactory::create_rowset_writer(writer_context, false, &rowset_writer); ASSERT_TRUE(status.ok()) << status.to_string(); // 4. Write data to the rowset in multiple batches to ensure we get multiple segments for (int segment = 0; segment < num_segments; segment++) { vectorized::Block block = _tablet_schema->create_block(); auto columns = block.mutate_columns(); // Add data for k1 and k2 columns for (int i = 0; i < rows_per_segment; ++i) { // k1 column (int) - make values different across segments int32_t k1 = (segment * rows_per_segment + i) * 10; columns[0]->insert_data((const char*)&k1, sizeof(k1)); // k2 column (int) int32_t k2 = (segment * rows_per_segment + i) % 100; columns[1]->insert_data((const char*)&k2, sizeof(k2)); } // Add the block to the rowset Status s = rowset_writer->add_block(&block); ASSERT_TRUE(s.ok()) << s.to_string(); // Flush to ensure we create a new segment s = rowset_writer->flush(); ASSERT_TRUE(s.ok()) << s.to_string(); } // 5. Build the rowset ASSERT_TRUE(rowset_writer->build(rowset).ok()); // Verify we have the expected number of segments ASSERT_EQ(rowset->num_segments(), num_segments) << "Rowset should have " << num_segments << " segments but has " << rowset->num_segments(); // 6. Add the rowset to the tablet ASSERT_TRUE(_tablet->add_rowset(rowset).ok()); // 7. Prepare indexes for building TOlapTableIndex index1; index1.index_id = 1; index1.columns.emplace_back("k1"); index1.index_name = "k1_index"; index1.index_type = TIndexType::INVERTED; _alter_indexes.push_back(index1); TOlapTableIndex index2; index2.index_id = 2; index2.columns.emplace_back("k2"); index2.index_name = "k2_index"; index2.index_type = TIndexType::INVERTED; _alter_indexes.push_back(index2); // 8. Create IndexBuilder IndexBuilder builder(_tablet, _columns, _alter_indexes, false); // 9. Initialize and verify status = builder.init(); EXPECT_TRUE(status.ok()) << status.to_string(); EXPECT_EQ(builder._alter_index_ids.size(), 2); // 10. Build indexes status = builder.do_build_inverted_index(); EXPECT_TRUE(status.ok()) << status.to_string(); // 11. Check paths and files auto old_tablet_path = _absolute_dir + "/" + std::to_string(15677); auto new_tablet_path = _absolute_dir + "/" + std::to_string(14677); bool old_exists = false; bool new_exists = false; EXPECT_TRUE(io::global_local_filesystem()->exists(old_tablet_path, &old_exists).ok()); EXPECT_TRUE(old_exists); EXPECT_TRUE(io::global_local_filesystem()->exists(new_tablet_path, &new_exists).ok()); EXPECT_TRUE(new_exists); // 12. Check files in old and new directories std::vector old_files; bool old_dir_exists = false; EXPECT_TRUE(io::global_local_filesystem() ->list(old_tablet_path, true, &old_files, &old_dir_exists) .ok()); EXPECT_TRUE(old_dir_exists); int old_idx_file_count = 0; int old_dat_file_count = 0; for (const auto& file : old_files) { std::string filename = file.file_name; if (filename.find(".idx") != std::string::npos) { old_idx_file_count++; } if (filename.find(".dat") != std::string::npos) { old_dat_file_count++; } } EXPECT_EQ(old_idx_file_count, 0) << "Old directory should contain exactly 0 .idx files"; EXPECT_EQ(old_dat_file_count, num_segments) << "Old directory should contain exactly " << num_segments << " .dat files"; std::vector new_files; bool new_dir_exists = false; EXPECT_TRUE(io::global_local_filesystem() ->list(new_tablet_path, true, &new_files, &new_dir_exists) .ok()); EXPECT_TRUE(new_dir_exists); int new_idx_file_count = 0; int new_dat_file_count = 0; for (const auto& file : new_files) { std::string filename = file.file_name; if (filename.find(".idx") != std::string::npos) { new_idx_file_count++; } if (filename.find(".dat") != std::string::npos) { new_dat_file_count++; } } EXPECT_EQ(new_idx_file_count, num_segments) << "New directory should contain exactly " << num_segments << " .idx files"; EXPECT_EQ(new_dat_file_count, num_segments) << "New directory should contain exactly " << num_segments << " .dat files"; } TEST_F(IndexBuilderTest, NonExistentColumnIndexTest) { // 0. prepare tablet path auto tablet_path = _absolute_dir + "/" + std::to_string(14678); _tablet->_tablet_path = tablet_path; ASSERT_TRUE(io::global_local_filesystem()->delete_directory(tablet_path).ok()); ASSERT_TRUE(io::global_local_filesystem()->create_directory(tablet_path).ok()); // 1. Prepare data for writing RowsetSharedPtr rowset; const int num_rows = 1000; // 2. Create a rowset writer context RowsetWriterContext writer_context; writer_context.rowset_id.init(15678); writer_context.tablet_id = 15678; writer_context.tablet_schema_hash = 567997577; writer_context.partition_id = 10; writer_context.rowset_type = BETA_ROWSET; writer_context.rowset_dir = _absolute_dir + "/" + std::to_string(15678); writer_context.rowset_state = VISIBLE; writer_context.tablet_schema = _tablet_schema; writer_context.version.first = 10; writer_context.version.second = 10; ASSERT_TRUE(io::global_local_filesystem()->create_directory(writer_context.rowset_dir).ok()); // 3. Create a rowset writer std::unique_ptr rowset_writer; auto status = RowsetFactory::create_rowset_writer(writer_context, false, &rowset_writer); ASSERT_TRUE(status.ok()) << status.to_string(); // 4. Write data to the rowset { vectorized::Block block = _tablet_schema->create_block(); auto columns = block.mutate_columns(); // Add data for k1 and k2 columns for (int i = 0; i < num_rows; ++i) { // k1 column (int) int32_t k1 = i * 10; columns[0]->insert_data((const char*)&k1, sizeof(k1)); // k2 column (int) int32_t k2 = i % 100; columns[1]->insert_data((const char*)&k2, sizeof(k2)); } // Add the block to the rowset Status s = rowset_writer->add_block(&block); ASSERT_TRUE(s.ok()) << s.to_string(); // Flush the writer s = rowset_writer->flush(); ASSERT_TRUE(s.ok()) << s.to_string(); // Build the rowset ASSERT_TRUE(rowset_writer->build(rowset).ok()); // Add the rowset to the tablet ASSERT_TRUE(_tablet->add_rowset(rowset).ok()); } // 5. Prepare indexes for building - including one for a non-existent column _alter_indexes.clear(); // Index for non-existent column "k3" TOlapTableIndex index2; index2.index_id = 2; index2.columns.emplace_back("k3"); // This column doesn't exist in the schema index2.index_name = "k3_index"; index2.index_type = TIndexType::INVERTED; _alter_indexes.push_back(index2); // 6. Create IndexBuilder IndexBuilder builder(_tablet, _columns, _alter_indexes, false); // 7. Initialize and verify status = builder.init(); // The init should succeed, as we'll skip non-existent columns later EXPECT_TRUE(status.ok()) << status.to_string(); // 8. Build indexes - should only build for existing columns status = builder.do_build_inverted_index(); EXPECT_TRUE(status.ok()) << status.to_string(); // 9. Check paths and files auto old_tablet_path = _absolute_dir + "/" + std::to_string(15678); auto new_tablet_path = _absolute_dir + "/" + std::to_string(14678); bool old_exists = false; bool new_exists = false; EXPECT_TRUE(io::global_local_filesystem()->exists(old_tablet_path, &old_exists).ok()); EXPECT_TRUE(old_exists); EXPECT_TRUE(io::global_local_filesystem()->exists(new_tablet_path, &new_exists).ok()); EXPECT_TRUE(new_exists); // 10. Check files in old and new directories std::vector old_files; bool old_dir_exists = false; EXPECT_TRUE(io::global_local_filesystem() ->list(old_tablet_path, true, &old_files, &old_dir_exists) .ok()); EXPECT_TRUE(old_dir_exists); int old_idx_file_count = 0; int old_dat_file_count = 0; for (const auto& file : old_files) { std::string filename = file.file_name; if (filename.find(".idx") != std::string::npos) { old_idx_file_count++; } if (filename.find(".dat") != std::string::npos) { old_dat_file_count++; } } EXPECT_EQ(old_idx_file_count, 0) << "Old directory should contain exactly 0 .idx files"; EXPECT_EQ(old_dat_file_count, 1) << "Old directory should contain exactly 1 .dat file"; std::vector new_files; bool new_dir_exists = false; EXPECT_TRUE(io::global_local_filesystem() ->list(new_tablet_path, true, &new_files, &new_dir_exists) .ok()); EXPECT_TRUE(new_dir_exists); int new_idx_file_count = 0; int new_dat_file_count = 0; for (const auto& file : new_files) { std::string filename = file.file_name; if (filename.find(".idx") != std::string::npos) { new_idx_file_count++; } if (filename.find(".dat") != std::string::npos) { new_dat_file_count++; } } // Should only have index for k1, not for non-existent k3 EXPECT_EQ(new_idx_file_count, 0) << "New directory should contain exactly 0 .idx file for the existing column"; EXPECT_EQ(new_dat_file_count, 1) << "New directory should contain exactly 1 .dat file"; // 11. Verify logs in the output to confirm k3 index was skipped // This would require examining the log output which isn't easily done in unit tests, // but the file count verification above should be sufficient to confirm behavior } TEST_F(IndexBuilderTest, RenameColumnIndexTest) { // 0. prepare tablet path auto tablet_path = _absolute_dir + "/" + std::to_string(14679); _tablet->_tablet_path = tablet_path; ASSERT_TRUE(io::global_local_filesystem()->delete_directory(tablet_path).ok()); ASSERT_TRUE(io::global_local_filesystem()->create_directory(tablet_path).ok()); auto schema = std::make_shared(); schema->_keys_type = KeysType::UNIQUE_KEYS; schema->_inverted_index_storage_format = InvertedIndexStorageFormatPB::V2; // Create the first key column TabletColumn column_1; column_1.set_type(FieldType::OLAP_FIELD_TYPE_INT); column_1.set_unique_id(1); column_1.set_name("k1"); column_1.set_is_key(true); schema->append_column(column_1); // Create the second key column TabletColumn column_2; column_2.set_type(FieldType::OLAP_FIELD_TYPE_INT); // not sequential unique_id column_2.set_unique_id(3); column_2.set_name("k2"); column_2.set_is_key(false); schema->append_column(column_2); // 1. Prepare data for writing RowsetSharedPtr rowset; const int num_rows = 1000; // 2. First add an initial index to the schema (for k1 column) TabletIndex initial_index; initial_index._index_id = 1; initial_index._index_name = "k1_index"; initial_index._index_type = IndexType::INVERTED; initial_index._col_unique_ids.push_back(1); // unique_id for k1 schema->append_index(std::move(initial_index)); // 3. Create a rowset writer context RowsetWriterContext writer_context; writer_context.rowset_id.init(15679); writer_context.tablet_id = 15679; writer_context.tablet_schema_hash = 567997577; writer_context.partition_id = 10; writer_context.rowset_type = BETA_ROWSET; writer_context.rowset_dir = _absolute_dir + "/" + std::to_string(15679); writer_context.rowset_state = VISIBLE; writer_context.tablet_schema = schema; writer_context.version.first = 10; writer_context.version.second = 10; ASSERT_TRUE(io::global_local_filesystem()->create_directory(writer_context.rowset_dir).ok()); // 4. Create a rowset writer std::unique_ptr rowset_writer; auto status = RowsetFactory::create_rowset_writer(writer_context, false, &rowset_writer); ASSERT_TRUE(status.ok()) << status.to_string(); // 5. Write data to the rowset { vectorized::Block block = schema->create_block(); auto columns = block.mutate_columns(); // Add data for k1 and k2 columns for (int i = 0; i < num_rows; ++i) { // k1 column (int) int32_t k1 = i * 10; columns[0]->insert_data((const char*)&k1, sizeof(k1)); // k2 column (int) int32_t k2 = i % 100; columns[1]->insert_data((const char*)&k2, sizeof(k2)); } // Add the block to the rowset Status s = rowset_writer->add_block(&block); ASSERT_TRUE(s.ok()) << s.to_string(); // Flush the writer s = rowset_writer->flush(); ASSERT_TRUE(s.ok()) << s.to_string(); // Build the rowset ASSERT_TRUE(rowset_writer->build(rowset).ok()); // Add the rowset to the tablet ASSERT_TRUE(_tablet->add_rowset(rowset).ok()); } // 6. Prepare indexes for building - valid k2 and non-existent k3 _alter_indexes.clear(); // Index for rename column "k2" to "k3" TOlapTableIndex index2; index2.index_id = 3; index2.columns.emplace_back("k3"); // This column doesn't exist in the schema index2.index_name = "k3_index"; index2.index_type = TIndexType::INVERTED; index2.column_unique_ids.push_back(3); index2.__isset.column_unique_ids = true; _alter_indexes.push_back(index2); // 7. Create IndexBuilder IndexBuilder builder(_tablet, _columns, _alter_indexes, false); // 8. Initialize and verify status = builder.init(); EXPECT_TRUE(status.ok()) << status.to_string(); EXPECT_EQ(builder._alter_index_ids.size(), 1); // Only k1 is considered for building // 9. Build indexes - should only build for existing columns status = builder.do_build_inverted_index(); EXPECT_TRUE(status.ok()) << status.to_string(); // 10. Check paths and files auto old_tablet_path = _absolute_dir + "/" + std::to_string(15679); auto new_tablet_path = _absolute_dir + "/" + std::to_string(14679); bool old_exists = false; bool new_exists = false; EXPECT_TRUE(io::global_local_filesystem()->exists(old_tablet_path, &old_exists).ok()); EXPECT_TRUE(old_exists); EXPECT_TRUE(io::global_local_filesystem()->exists(new_tablet_path, &new_exists).ok()); EXPECT_TRUE(new_exists); // 11. Check files in old and new directories std::vector old_files; bool old_dir_exists = false; EXPECT_TRUE(io::global_local_filesystem() ->list(old_tablet_path, true, &old_files, &old_dir_exists) .ok()); EXPECT_TRUE(old_dir_exists); int old_idx_file_count = 0; int old_dat_file_count = 0; for (const auto& file : old_files) { std::string filename = file.file_name; if (filename.find(".idx") != std::string::npos) { old_idx_file_count++; } if (filename.find(".dat") != std::string::npos) { old_dat_file_count++; } } EXPECT_EQ(old_idx_file_count, 1) << "Old directory should contain exactly 1 .idx file for the original k1 index"; EXPECT_EQ(old_dat_file_count, 1) << "Old directory should contain exactly 1 .dat file"; std::vector new_files; bool new_dir_exists = false; EXPECT_TRUE(io::global_local_filesystem() ->list(new_tablet_path, true, &new_files, &new_dir_exists) .ok()); EXPECT_TRUE(new_dir_exists); int new_idx_file_count = 0; int new_dat_file_count = 0; for (const auto& file : new_files) { std::string filename = file.file_name; if (filename.find(".idx") != std::string::npos) { new_idx_file_count++; } if (filename.find(".dat") != std::string::npos) { new_dat_file_count++; } } // Should have 2 index files: original k1 index and new k2 index (k3 should be skipped) EXPECT_EQ(new_idx_file_count, 1) << "New directory should contain exactly 1 .idx files (for k1 and k2, not k3)"; EXPECT_EQ(new_dat_file_count, 1) << "New directory should contain exactly 1 .dat file"; // 12. Verify the tablet schema - would need to examine tablet_schema here // k1 and k2 indexes should exist, k3 index should not // Note: In production code, additional verification of schema would be done here } TEST_F(IndexBuilderTest, AddNonExistentColumnIndexWhenOneExistsTest) { // 0. prepare tablet path auto tablet_path = _absolute_dir + "/" + std::to_string(14679); _tablet->_tablet_path = tablet_path; ASSERT_TRUE(io::global_local_filesystem()->delete_directory(tablet_path).ok()); ASSERT_TRUE(io::global_local_filesystem()->create_directory(tablet_path).ok()); // 1. Prepare data for writing RowsetSharedPtr rowset; const int num_rows = 1000; // 2. First add an initial index to the schema (for k1 column) TabletIndex initial_index; initial_index._index_id = 1; initial_index._index_name = "k1_index"; initial_index._index_type = IndexType::INVERTED; initial_index._col_unique_ids.push_back(1); // unique_id for k1 _tablet_schema->append_index(std::move(initial_index)); // 3. Create a rowset writer context RowsetWriterContext writer_context; writer_context.rowset_id.init(15679); writer_context.tablet_id = 15679; writer_context.tablet_schema_hash = 567997577; writer_context.partition_id = 10; writer_context.rowset_type = BETA_ROWSET; writer_context.rowset_dir = _absolute_dir + "/" + std::to_string(15679); writer_context.rowset_state = VISIBLE; writer_context.tablet_schema = _tablet_schema; writer_context.version.first = 10; writer_context.version.second = 10; ASSERT_TRUE(io::global_local_filesystem()->create_directory(writer_context.rowset_dir).ok()); // 4. Create a rowset writer std::unique_ptr rowset_writer; auto status = RowsetFactory::create_rowset_writer(writer_context, false, &rowset_writer); ASSERT_TRUE(status.ok()) << status.to_string(); // 5. Write data to the rowset { vectorized::Block block = _tablet_schema->create_block(); auto columns = block.mutate_columns(); // Add data for k1 and k2 columns for (int i = 0; i < num_rows; ++i) { // k1 column (int) int32_t k1 = i * 10; columns[0]->insert_data((const char*)&k1, sizeof(k1)); // k2 column (int) int32_t k2 = i % 100; columns[1]->insert_data((const char*)&k2, sizeof(k2)); } // Add the block to the rowset Status s = rowset_writer->add_block(&block); ASSERT_TRUE(s.ok()) << s.to_string(); // Flush the writer s = rowset_writer->flush(); ASSERT_TRUE(s.ok()) << s.to_string(); // Build the rowset ASSERT_TRUE(rowset_writer->build(rowset).ok()); // Add the rowset to the tablet ASSERT_TRUE(_tablet->add_rowset(rowset).ok()); } // 6. Prepare indexes for building - valid k2 and non-existent k3 _alter_indexes.clear(); // Index for non-existent column "k3" TOlapTableIndex index2; index2.index_id = 3; index2.columns.emplace_back("k3"); // This column doesn't exist in the schema index2.index_name = "k3_index"; index2.index_type = TIndexType::INVERTED; _alter_indexes.push_back(index2); // 7. Create IndexBuilder IndexBuilder builder(_tablet, _columns, _alter_indexes, false); // 8. Initialize and verify status = builder.init(); EXPECT_TRUE(status.ok()) << status.to_string(); EXPECT_EQ(builder._alter_index_ids.size(), 1); // Only k1 is considered for building // 9. Build indexes - should only build for existing columns status = builder.do_build_inverted_index(); EXPECT_TRUE(status.ok()) << status.to_string(); // 10. Check paths and files auto old_tablet_path = _absolute_dir + "/" + std::to_string(15679); auto new_tablet_path = _absolute_dir + "/" + std::to_string(14679); bool old_exists = false; bool new_exists = false; EXPECT_TRUE(io::global_local_filesystem()->exists(old_tablet_path, &old_exists).ok()); EXPECT_TRUE(old_exists); EXPECT_TRUE(io::global_local_filesystem()->exists(new_tablet_path, &new_exists).ok()); EXPECT_TRUE(new_exists); // 11. Check files in old and new directories std::vector old_files; bool old_dir_exists = false; EXPECT_TRUE(io::global_local_filesystem() ->list(old_tablet_path, true, &old_files, &old_dir_exists) .ok()); EXPECT_TRUE(old_dir_exists); int old_idx_file_count = 0; int old_dat_file_count = 0; for (const auto& file : old_files) { std::string filename = file.file_name; if (filename.find(".idx") != std::string::npos) { old_idx_file_count++; } if (filename.find(".dat") != std::string::npos) { old_dat_file_count++; } } EXPECT_EQ(old_idx_file_count, 1) << "Old directory should contain exactly 1 .idx file for the original k1 index"; EXPECT_EQ(old_dat_file_count, 1) << "Old directory should contain exactly 1 .dat file"; std::vector new_files; bool new_dir_exists = false; EXPECT_TRUE(io::global_local_filesystem() ->list(new_tablet_path, true, &new_files, &new_dir_exists) .ok()); EXPECT_TRUE(new_dir_exists); int new_idx_file_count = 0; int new_dat_file_count = 0; for (const auto& file : new_files) { std::string filename = file.file_name; if (filename.find(".idx") != std::string::npos) { new_idx_file_count++; } if (filename.find(".dat") != std::string::npos) { new_dat_file_count++; } } // Should have 2 index files: original k1 index and new k2 index (k3 should be skipped) EXPECT_EQ(new_idx_file_count, 1) << "New directory should contain exactly 1 .idx files (for k1 and k2, not k3)"; EXPECT_EQ(new_dat_file_count, 1) << "New directory should contain exactly 1 .dat file"; // 12. Verify the tablet schema - would need to examine tablet_schema here // k1 and k2 indexes should exist, k3 index should not // Note: In production code, additional verification of schema would be done here } TEST_F(IndexBuilderTest, AddNonExistentColumnIndexWhenOneExistsTestV1) { // 1. Create new schema using V1 format auto v1_schema = std::make_shared(); create_tablet_schema(v1_schema, KeysType::DUP_KEYS); // 2. Modify to V1 format v1_schema->_inverted_index_storage_format = InvertedIndexStorageFormatPB::V1; // 3. First add an initial index to the schema (for k1 column) TabletIndex initial_index; initial_index._index_id = 1; initial_index._index_name = "k1_index"; initial_index._index_type = IndexType::INVERTED; initial_index._col_unique_ids.push_back(1); // unique_id for k1 v1_schema->append_index(std::move(initial_index)); // 4. Update schema in tablet TabletMetaPB tablet_meta_pb; _tablet_meta->to_meta_pb(&tablet_meta_pb); TabletSchemaPB v1_schema_pb; v1_schema->to_schema_pb(&v1_schema_pb); tablet_meta_pb.mutable_schema()->CopyFrom(v1_schema_pb); _tablet_meta->init_from_pb(tablet_meta_pb); // 5. Reinitialize tablet to use new schema _tablet = std::make_shared(*_engine, _tablet_meta, _data_dir.get()); ASSERT_TRUE(_tablet->init().ok()); auto tablet_path = _absolute_dir + "/" + std::to_string(14680); _tablet->_tablet_path = tablet_path; ASSERT_TRUE(io::global_local_filesystem()->delete_directory(tablet_path).ok()); ASSERT_TRUE(io::global_local_filesystem()->create_directory(tablet_path).ok()); // 6. Prepare data for writing RowsetSharedPtr rowset; const int num_rows = 1000; // 7. Create rowset writer context RowsetWriterContext writer_context; writer_context.rowset_id.init(15680); writer_context.tablet_id = 15680; writer_context.tablet_schema_hash = 567997577; writer_context.partition_id = 10; writer_context.rowset_type = BETA_ROWSET; writer_context.rowset_dir = _absolute_dir + "/" + std::to_string(15680); writer_context.rowset_state = VISIBLE; writer_context.tablet_schema = v1_schema; writer_context.version.first = 10; writer_context.version.second = 10; ASSERT_TRUE(io::global_local_filesystem()->create_directory(writer_context.rowset_dir).ok()); // 8. Create rowset writer std::unique_ptr rowset_writer; auto status = RowsetFactory::create_rowset_writer(writer_context, false, &rowset_writer); ASSERT_TRUE(status.ok()) << status.to_string(); // 9. Write data to rowset { vectorized::Block block = v1_schema->create_block(); auto columns = block.mutate_columns(); // Add data for k1 and k2 columns for (int i = 0; i < num_rows; ++i) { // k1 column (int) int32_t k1 = i * 10; columns[0]->insert_data((const char*)&k1, sizeof(k1)); // k2 column (int) int32_t k2 = i % 100; columns[1]->insert_data((const char*)&k2, sizeof(k2)); } // Add block to rowset Status s = rowset_writer->add_block(&block); ASSERT_TRUE(s.ok()) << s.to_string(); // Flush writer s = rowset_writer->flush(); ASSERT_TRUE(s.ok()) << s.to_string(); // Build rowset ASSERT_TRUE(rowset_writer->build(rowset).ok()); // Add rowset to tablet ASSERT_TRUE(_tablet->add_rowset(rowset).ok()); } // 10. Prepare indexes for building - valid k2 and non-existent k3 _alter_indexes.clear(); // Index for non-existent column "k3" TOlapTableIndex index2; index2.index_id = 3; index2.columns.emplace_back("k3"); // This column doesn't exist in the schema index2.index_name = "k3_index"; index2.index_type = TIndexType::INVERTED; _alter_indexes.push_back(index2); // Add column information for the non-existent column _columns.clear(); // Clear previous column info TColumn non_existent_column; non_existent_column.column_name = "k3"; non_existent_column.column_type.type = TPrimitiveType::INT; _columns.push_back(non_existent_column); // 11. Create IndexBuilder IndexBuilder builder(_tablet, _columns, _alter_indexes, false); // 12. Initialize and verify status = builder.init(); EXPECT_TRUE(status.ok()) << status.to_string(); EXPECT_EQ(builder._alter_index_ids.size(), 1); // 13. Build indexes - should only build for existing columns status = builder.do_build_inverted_index(); EXPECT_TRUE(status.ok()) << status.to_string(); // 14. Check paths and files auto old_tablet_path = _absolute_dir + "/" + std::to_string(15680); auto new_tablet_path = _absolute_dir + "/" + std::to_string(14680); bool old_exists = false; bool new_exists = false; EXPECT_TRUE(io::global_local_filesystem()->exists(old_tablet_path, &old_exists).ok()); EXPECT_TRUE(old_exists); EXPECT_TRUE(io::global_local_filesystem()->exists(new_tablet_path, &new_exists).ok()); EXPECT_TRUE(new_exists); // 15. Check files in old and new directories std::vector old_files; bool old_dir_exists = false; EXPECT_TRUE(io::global_local_filesystem() ->list(old_tablet_path, true, &old_files, &old_dir_exists) .ok()); EXPECT_TRUE(old_dir_exists); int old_idx_file_count = 0; int old_dat_file_count = 0; for (const auto& file : old_files) { std::string filename = file.file_name; if (filename.find(".idx") != std::string::npos) { old_idx_file_count++; } if (filename.find(".dat") != std::string::npos) { old_dat_file_count++; } } EXPECT_EQ(old_idx_file_count, 1) << "Old directory should contain exactly 1 .idx file for the original k1 index"; EXPECT_EQ(old_dat_file_count, 1) << "Old directory should contain exactly 1 .dat file"; std::vector new_files; bool new_dir_exists = false; EXPECT_TRUE(io::global_local_filesystem() ->list(new_tablet_path, true, &new_files, &new_dir_exists) .ok()); EXPECT_TRUE(new_dir_exists); int new_idx_file_count = 0; int new_dat_file_count = 0; for (const auto& file : new_files) { std::string filename = file.file_name; if (filename.find(".idx") != std::string::npos) { new_idx_file_count++; } if (filename.find(".dat") != std::string::npos) { new_dat_file_count++; } } // Should have 2 index files: original k1 index and new k2 index (k3 should be skipped) EXPECT_EQ(new_idx_file_count, 1) << "New directory should contain exactly 1 .idx files (for k1 and k2, not k3)"; EXPECT_EQ(new_dat_file_count, 1) << "New directory should contain exactly 1 .dat file"; // 16. Confirm storage format is still V1 EXPECT_EQ(v1_schema->_inverted_index_storage_format, InvertedIndexStorageFormatPB::V1); } TEST_F(IndexBuilderTest, NonNullIndexDataTest) { // 0. prepare tablet path auto tablet_path = _absolute_dir + "/" + std::to_string(14681); _tablet->_tablet_path = tablet_path; ASSERT_TRUE(io::global_local_filesystem()->delete_directory(tablet_path).ok()); ASSERT_TRUE(io::global_local_filesystem()->create_directory(tablet_path).ok()); // 1. Prepare data for writing RowsetSharedPtr rowset; const int num_rows = 1000; // 2. Create a rowset writer context RowsetWriterContext writer_context; writer_context.rowset_id.init(15681); writer_context.tablet_id = 15681; writer_context.tablet_schema_hash = 567997577; writer_context.partition_id = 10; writer_context.rowset_type = BETA_ROWSET; writer_context.rowset_dir = _absolute_dir + "/" + std::to_string(15681); writer_context.rowset_state = VISIBLE; writer_context.tablet_schema = _tablet_schema; writer_context.version.first = 10; writer_context.version.second = 10; ASSERT_TRUE(io::global_local_filesystem()->create_directory(writer_context.rowset_dir).ok()); // 3. Create a rowset writer with non-null values std::unique_ptr rowset_writer; auto status = RowsetFactory::create_rowset_writer(writer_context, false, &rowset_writer); ASSERT_TRUE(status.ok()) << status.to_string(); // 4. Write non-null data to the rowset { vectorized::Block block = _tablet_schema->create_block(); auto columns = block.mutate_columns(); // Add data for k1 and k2 columns with no null values for (int i = 0; i < num_rows; ++i) { // k1 column (int) int32_t k1 = i * 10; columns[0]->insert_data((const char*)&k1, sizeof(k1)); // k2 column (int) int32_t k2 = i % 100; columns[1]->insert_data((const char*)&k2, sizeof(k2)); } // Add the block to the rowset Status s = rowset_writer->add_block(&block); ASSERT_TRUE(s.ok()) << s.to_string(); // Flush the writer s = rowset_writer->flush(); ASSERT_TRUE(s.ok()) << s.to_string(); // Build the rowset ASSERT_TRUE(rowset_writer->build(rowset).ok()); // Add the rowset to the tablet ASSERT_TRUE(_tablet->add_rowset(rowset).ok()); } // 5. Prepare indexes for building - only use non-nullable fields TOlapTableIndex index1; index1.index_id = 1; index1.columns.emplace_back("k1"); index1.index_name = "k1_index"; index1.index_type = TIndexType::INVERTED; _alter_indexes.push_back(index1); // 6. Force columns to be treated as non-null by modifying schema TabletSchemaSPtr non_null_schema = std::make_shared(); create_tablet_schema(non_null_schema, KeysType::DUP_KEYS); // Set the second column to be non-nullable explicitly TabletColumn& k2_column = non_null_schema->mutable_column(1); k2_column.set_is_nullable(false); // 7. Create IndexBuilder with the modified schema IndexBuilder builder(_tablet, _columns, _alter_indexes, false); // 8. Initialize and verify status = builder.init(); EXPECT_TRUE(status.ok()) << status.to_string(); EXPECT_EQ(builder._alter_index_ids.size(), 1); // 9. Build index - should trigger _add_data rather than _add_nullable status = builder.do_build_inverted_index(); EXPECT_TRUE(status.ok()) << status.to_string(); // 10. Verify results auto old_tablet_path = _absolute_dir + "/" + std::to_string(15681); auto new_tablet_path = _absolute_dir + "/" + std::to_string(14681); bool old_exists = false; bool new_exists = false; EXPECT_TRUE(io::global_local_filesystem()->exists(old_tablet_path, &old_exists).ok()); EXPECT_TRUE(old_exists); EXPECT_TRUE(io::global_local_filesystem()->exists(new_tablet_path, &new_exists).ok()); EXPECT_TRUE(new_exists); // 11. Check files in old and new directories std::vector old_files; bool old_dir_exists = false; EXPECT_TRUE(io::global_local_filesystem() ->list(old_tablet_path, true, &old_files, &old_dir_exists) .ok()); EXPECT_TRUE(old_dir_exists); std::vector new_files; bool new_dir_exists = false; EXPECT_TRUE(io::global_local_filesystem() ->list(new_tablet_path, true, &new_files, &new_dir_exists) .ok()); EXPECT_TRUE(new_dir_exists); int new_idx_file_count = 0; for (const auto& file : new_files) { std::string filename = file.file_name; if (filename.find(".idx") != std::string::npos) { new_idx_file_count++; } } EXPECT_EQ(new_idx_file_count, 1) << "Should have created 1 index file"; } TEST_F(IndexBuilderTest, NonExistentColumnUniqueIdTest) { // 0. prepare tablet path auto tablet_path = _absolute_dir + "/" + std::to_string(14682); _tablet->_tablet_path = tablet_path; ASSERT_TRUE(io::global_local_filesystem()->delete_directory(tablet_path).ok()); ASSERT_TRUE(io::global_local_filesystem()->create_directory(tablet_path).ok()); // 1. Prepare data for writing RowsetSharedPtr rowset; const int num_rows = 1000; // 2. Create a rowset writer context RowsetWriterContext writer_context; writer_context.rowset_id.init(15682); writer_context.tablet_id = 15682; writer_context.tablet_schema_hash = 567997577; writer_context.partition_id = 10; writer_context.rowset_type = BETA_ROWSET; writer_context.rowset_dir = _absolute_dir + "/" + std::to_string(15682); writer_context.rowset_state = VISIBLE; writer_context.tablet_schema = _tablet_schema; writer_context.version.first = 10; writer_context.version.second = 10; ASSERT_TRUE(io::global_local_filesystem()->create_directory(writer_context.rowset_dir).ok()); // 3. Create a rowset writer std::unique_ptr rowset_writer; auto status = RowsetFactory::create_rowset_writer(writer_context, false, &rowset_writer); ASSERT_TRUE(status.ok()) << status.to_string(); // 4. Write data to the rowset { vectorized::Block block = _tablet_schema->create_block(); auto columns = block.mutate_columns(); // Add data for k1 and k2 columns for (int i = 0; i < num_rows; ++i) { // k1 column (int) int32_t k1 = i * 10; columns[0]->insert_data((const char*)&k1, sizeof(k1)); // k2 column (int) int32_t k2 = i % 100; columns[1]->insert_data((const char*)&k2, sizeof(k2)); } // Add the block to the rowset Status s = rowset_writer->add_block(&block); ASSERT_TRUE(s.ok()) << s.to_string(); // Flush the writer s = rowset_writer->flush(); ASSERT_TRUE(s.ok()) << s.to_string(); // Build the rowset ASSERT_TRUE(rowset_writer->build(rowset).ok()); // Add the rowset to the tablet ASSERT_TRUE(_tablet->add_rowset(rowset).ok()); } // 5. First add an initial index to the schema (for k1 column) TabletIndex initial_index; initial_index._index_id = 1; initial_index._index_name = "k1_index"; initial_index._index_type = IndexType::INVERTED; initial_index._col_unique_ids.push_back(1); // unique_id for k1 _tablet_schema->append_index(std::move(initial_index)); // 6. Prepare indexes for building - specifying column by unique_id that doesn't exist _alter_indexes.clear(); // Use drop operation to test column_unique_ids path TOlapTableIndex drop_index; drop_index.index_id = 1; drop_index.columns.emplace_back("non_existent_column"); drop_index.column_unique_ids.push_back(999); // This unique ID doesn't exist _alter_indexes.push_back(drop_index); // 7. Create IndexBuilder with drop operation IndexBuilder builder(_tablet, _columns, _alter_indexes, true); // 8. Initialize and verify status = builder.init(); EXPECT_TRUE(status.ok()) << status.to_string(); EXPECT_EQ(builder._alter_index_ids.size(), 1); // 9. Execute drop operation - should handle non-existent column gracefully status = builder.do_build_inverted_index(); EXPECT_TRUE(status.ok()) << status.to_string(); // 10. Verify paths exists - operations should complete without errors auto old_tablet_path = _absolute_dir + "/" + std::to_string(15682); auto new_tablet_path = _absolute_dir + "/" + std::to_string(14682); bool old_exists = false; bool new_exists = false; EXPECT_TRUE(io::global_local_filesystem()->exists(old_tablet_path, &old_exists).ok()); EXPECT_TRUE(old_exists); EXPECT_TRUE(io::global_local_filesystem()->exists(new_tablet_path, &new_exists).ok()); EXPECT_TRUE(new_exists); } TEST_F(IndexBuilderTest, DropIndexV1FormatTest) { // 1. Create new schema using V1 format auto v1_schema = std::make_shared(); create_tablet_schema(v1_schema, KeysType::DUP_KEYS); // 2. Modify to V1 format v1_schema->_inverted_index_storage_format = InvertedIndexStorageFormatPB::V1; // 3. Add an initial index to the schema (for k1 column) TabletIndex initial_index; initial_index._index_id = 1; initial_index._index_name = "k1_index"; initial_index._index_type = IndexType::INVERTED; initial_index._col_unique_ids.push_back(1); // unique_id for k1 v1_schema->append_index(std::move(initial_index)); // 4. Update schema in tablet TabletMetaPB tablet_meta_pb; _tablet_meta->to_meta_pb(&tablet_meta_pb); TabletSchemaPB v1_schema_pb; v1_schema->to_schema_pb(&v1_schema_pb); tablet_meta_pb.mutable_schema()->CopyFrom(v1_schema_pb); _tablet_meta->init_from_pb(tablet_meta_pb); // 5. Reinitialize tablet to use new schema _tablet = std::make_shared(*_engine, _tablet_meta, _data_dir.get()); ASSERT_TRUE(_tablet->init().ok()); auto tablet_path = _absolute_dir + "/" + std::to_string(15683); _tablet->_tablet_path = tablet_path; ASSERT_TRUE(io::global_local_filesystem()->delete_directory(tablet_path).ok()); ASSERT_TRUE(io::global_local_filesystem()->create_directory(tablet_path).ok()); // 6. Prepare data for writing RowsetSharedPtr rowset; const int num_rows = 1000; // 7. Create a rowset writer context RowsetWriterContext writer_context; writer_context.rowset_id.init(15683); writer_context.tablet_id = 15683; writer_context.tablet_schema_hash = 567997577; writer_context.partition_id = 10; writer_context.rowset_type = BETA_ROWSET; writer_context.rowset_dir = tablet_path; writer_context.rowset_state = VISIBLE; writer_context.tablet_schema = v1_schema; writer_context.version.first = 10; writer_context.version.second = 10; ASSERT_TRUE(io::global_local_filesystem()->create_directory(writer_context.rowset_dir).ok()); // 8. Create a rowset writer std::unique_ptr rowset_writer; auto status = RowsetFactory::create_rowset_writer(writer_context, false, &rowset_writer); ASSERT_TRUE(status.ok()) << status.to_string(); // 9. Write data to the rowset { vectorized::Block block = v1_schema->create_block(); auto columns = block.mutate_columns(); // Add data for k1 and k2 columns for (int i = 0; i < num_rows; ++i) { // k1 column (int) int32_t k1 = i * 10; columns[0]->insert_data((const char*)&k1, sizeof(k1)); // k2 column (int) int32_t k2 = i % 100; columns[1]->insert_data((const char*)&k2, sizeof(k2)); } // Add the block to the rowset Status s = rowset_writer->add_block(&block); ASSERT_TRUE(s.ok()) << s.to_string(); // Flush the writer s = rowset_writer->flush(); ASSERT_TRUE(s.ok()) << s.to_string(); // Build the rowset ASSERT_TRUE(rowset_writer->build(rowset).ok()); // Add the rowset to the tablet ASSERT_TRUE(_tablet->add_rowset(rowset).ok()); } // 10. Prepare to drop the k1 index _alter_indexes.clear(); TOlapTableIndex drop_index; drop_index.index_id = 1; drop_index.columns.emplace_back("k1"); drop_index.index_name = "k1_index"; drop_index.index_type = TIndexType::INVERTED; _alter_indexes.push_back(drop_index); // 11. Create IndexBuilder with drop operation IndexBuilder builder(_tablet, _columns, _alter_indexes, true); // 12. Initialize and verify status = builder.init(); EXPECT_TRUE(status.ok()) << status.to_string(); EXPECT_EQ(builder._alter_index_ids.size(), 1); // 13. Execute drop operation status = builder.do_build_inverted_index(); EXPECT_TRUE(status.ok()) << status.to_string(); // 14. Verify paths exists bool exists = false; EXPECT_TRUE(io::global_local_filesystem()->exists(tablet_path, &exists).ok()); EXPECT_TRUE(exists); // 15. Verify the index has been removed std::vector files; bool dir_exists = false; EXPECT_TRUE(io::global_local_filesystem()->list(tablet_path, true, &files, &dir_exists).ok()); EXPECT_TRUE(dir_exists); // Verify no index files in the new directory int new_idx_file_count = 0; int new_dat_file_count = 0; int old_idx_file_count = 0; int old_dat_file_count = 0; for (const auto& file : files) { std::string filename = file.file_name; if (filename.find("15683_0_1.idx") != std::string::npos) { old_idx_file_count++; } if (filename.find("15683_0.dat") != std::string::npos) { old_dat_file_count++; } if (filename.find("020000000000000100000000000000000000000000000000_0_1.idx") != std::string::npos) { new_idx_file_count++; } if (filename.find("020000000000000100000000000000000000000000000000_0.dat") != std::string::npos) { new_dat_file_count++; } } // The index should have been removed EXPECT_EQ(old_idx_file_count, 1) << "Tablet path should have 1 .idx file before drop"; EXPECT_EQ(old_dat_file_count, 1) << "Tablet path should have 1 .dat file before drop"; EXPECT_EQ(new_idx_file_count, 0) << "Tablet path should have no .idx file after drop"; EXPECT_EQ(new_dat_file_count, 1) << "Tablet path should have 1 .dat file after drop"; } TEST_F(IndexBuilderTest, ResourceCleanupTest) { // 0. prepare tablet path auto tablet_path = _absolute_dir + "/" + std::to_string(15684); _tablet->_tablet_path = tablet_path; ASSERT_TRUE(io::global_local_filesystem()->delete_directory(tablet_path).ok()); ASSERT_TRUE(io::global_local_filesystem()->create_directory(tablet_path).ok()); // 1. Prepare data for writing RowsetSharedPtr rowset; const int num_rows = 1000; // 2. Create a rowset writer context RowsetWriterContext writer_context; writer_context.rowset_id.init(15684); writer_context.tablet_id = 15684; writer_context.tablet_schema_hash = 567997577; writer_context.partition_id = 10; writer_context.rowset_type = BETA_ROWSET; writer_context.rowset_dir = tablet_path; writer_context.rowset_state = VISIBLE; writer_context.tablet_schema = _tablet_schema; writer_context.version.first = 10; writer_context.version.second = 10; ASSERT_TRUE(io::global_local_filesystem()->create_directory(writer_context.rowset_dir).ok()); // 3. Create a rowset writer std::unique_ptr rowset_writer; auto status = RowsetFactory::create_rowset_writer(writer_context, false, &rowset_writer); ASSERT_TRUE(status.ok()) << status.to_string(); // 4. Write data to the rowset { vectorized::Block block = _tablet_schema->create_block(); auto columns = block.mutate_columns(); // Add data for k1 and k2 columns for (int i = 0; i < num_rows; ++i) { // k1 column (int) int32_t k1 = i * 10; columns[0]->insert_data((const char*)&k1, sizeof(k1)); // k2 column (int) int32_t k2 = i % 100; columns[1]->insert_data((const char*)&k2, sizeof(k2)); } // Add the block to the rowset Status s = rowset_writer->add_block(&block); ASSERT_TRUE(s.ok()) << s.to_string(); // Flush the writer s = rowset_writer->flush(); ASSERT_TRUE(s.ok()) << s.to_string(); // Build the rowset ASSERT_TRUE(rowset_writer->build(rowset).ok()); // Add the rowset to the tablet ASSERT_TRUE(_tablet->add_rowset(rowset).ok()); } // 5. Prepare indexes for building TOlapTableIndex index1; index1.index_id = 1; index1.columns.emplace_back("k1"); index1.index_name = "k1_index"; index1.index_type = TIndexType::INVERTED; _alter_indexes.push_back(index1); // Create a custom IndexBuilder with a spy function to test resource cleanup class TestIndexBuilder : public IndexBuilder { public: TestIndexBuilder(TabletSharedPtr tablet, const std::vector& columns, const std::vector& alter_inverted_indexes, bool is_drop_op) : IndexBuilder(tablet, columns, alter_inverted_indexes, is_drop_op) {} ~TestIndexBuilder() override = default; // Override update_inverted_index_info to inject failure Status update_inverted_index_info() override { RETURN_IF_ERROR(IndexBuilder::update_inverted_index_info()); // Create a fake error to trigger cleanup return Status::Error("Simulated error for testing cleanup"); } }; // 6. Create our test builder TestIndexBuilder builder(_tablet, _columns, _alter_indexes, false); // 7. Initialize and verify status = builder.init(); EXPECT_TRUE(status.ok()) << status.to_string(); EXPECT_EQ(builder._alter_index_ids.size(), 1); // 8. Build index - should fail with our simulated error status = builder.do_build_inverted_index(); EXPECT_FALSE(status.ok()) << "Expected failure, but got success"; EXPECT_TRUE(status.is()) << "Expected internal error"; EXPECT_EQ(status.to_string(), "[INTERNAL_ERROR]Simulated error for testing cleanup") << "Error message doesn't match expected"; // Verify the paths haven't been modified since the operation failed bool exists = false; EXPECT_TRUE(io::global_local_filesystem()->exists(tablet_path, &exists).ok()); EXPECT_TRUE(exists); auto rowset_id = extract_rowset_id("020000000000000100000000000000000000000000000000_0.dat"); EXPECT_TRUE(_engine->check_rowset_id_in_unused_rowsets(rowset_id)) << "Rowset id should be in unused rowsets"; } TEST_F(IndexBuilderTest, ArrayTypeIndexTest) { // 1. Prepare tablet path auto tablet_path = _absolute_dir + "/" + std::to_string(14685); ASSERT_TRUE(io::global_local_filesystem()->delete_directory(tablet_path).ok()); ASSERT_TRUE(io::global_local_filesystem()->create_directory(tablet_path).ok()); // 2. Create tablet schema with array type auto tablet_schema = std::make_shared(); TabletColumn column_1(FieldAggregationMethod::OLAP_FIELD_AGGREGATION_NONE, FieldType::OLAP_FIELD_TYPE_INT, true); column_1.set_unique_id(1); column_1.set_is_key(true); column_1.set_name("k1"); tablet_schema->append_column(column_1); // Array type column TabletColumn column_2; column_2.set_unique_id(2); column_2.set_is_key(false); column_2.set_name("array_col"); column_2.set_type(FieldType::OLAP_FIELD_TYPE_ARRAY); column_2.set_is_nullable(false); // Add a primitive type for array items TabletColumn array_item_column(FieldAggregationMethod::OLAP_FIELD_AGGREGATION_NONE, FieldType::OLAP_FIELD_TYPE_VARCHAR, true); array_item_column.set_is_key(false); array_item_column.set_length(64); column_2.add_sub_column(array_item_column); tablet_schema->append_column(column_2); // 3. Create tablet auto tablet_meta = create_tablet_meta(); auto tablet = std::make_shared(*_engine, tablet_meta, _data_dir.get()); tablet->_tablet_path = tablet_path; ASSERT_TRUE(tablet->init().ok()); // 4. Add inverted index for array column _columns.clear(); TColumn tc1; tc1.column_name = "array_col"; _columns.push_back(tc1); _alter_indexes.clear(); TOlapTableIndex tt_index; tt_index.index_id = 1; tt_index.index_name = "array_index"; tt_index.columns.emplace_back("array_col"); tt_index.column_unique_ids.push_back(2); tt_index.index_type = TIndexType::type::INVERTED; _alter_indexes.push_back(tt_index); // 5. Create a rowset writer RowsetWriterContext writer_context; writer_context.rowset_id.init(14685); writer_context.tablet_id = 14685; writer_context.tablet_schema_hash = 567997577; writer_context.partition_id = 10; writer_context.rowset_type = BETA_ROWSET; writer_context.rowset_dir = tablet_path; writer_context.rowset_state = VISIBLE; writer_context.tablet_schema = tablet_schema; writer_context.version.first = 10; writer_context.version.second = 10; ASSERT_TRUE(io::global_local_filesystem()->create_directory(writer_context.rowset_dir).ok()); // 6. Create rowset writer std::unique_ptr rowset_writer; auto status = RowsetFactory::create_rowset_writer(writer_context, false, &rowset_writer); ASSERT_TRUE(status.ok()) << status.to_string(); // 7. Create data block and write data { vectorized::Block block = tablet_schema->create_block(); auto columns = block.mutate_columns(); // Prepare columns for k1 and array_col for (int i = 0; i < 1000; i++) { // k1 column (int) int32_t k1 = i; columns[0]->insert_data((const char*)&k1, sizeof(k1)); // array_col column // Create array data with 1-5 elements int array_size = i % 5 + 1; // For array type, we need to create a complex nested column structure auto& array_col = static_cast(*columns[1]); vectorized::Array arr; // Add string elements to the array for (int j = 0; j < array_size; j++) { std::string val = "item_" + std::to_string(i) + "_" + std::to_string(j); arr.push_back(vectorized::Field(val)); } array_col.insert(arr); } // Add block to rowset Status s = rowset_writer->add_block(&block); ASSERT_TRUE(s.ok()) << s.to_string(); // Flush writer s = rowset_writer->flush(); ASSERT_TRUE(s.ok()) << s.to_string(); } // 8. Build rowset RowsetSharedPtr rowset; ASSERT_TRUE(rowset_writer->build(rowset).ok()); ASSERT_TRUE(rowset != nullptr); ASSERT_TRUE(tablet->add_rowset(rowset).ok()); // 9. Initialize and build inverted index IndexBuilder builder(tablet, _columns, _alter_indexes, false); status = builder.init(); EXPECT_TRUE(status.ok()) << status.to_string(); status = builder.do_build_inverted_index(); EXPECT_TRUE(status.ok()) << status.to_string(); // 10. Verify that the index has been created std::string segment_path = local_segment_path( tablet->tablet_path(), extract_rowset_id("020000000000000100000000000000000000000000000000_0.dat").to_string(), 0); if (tablet_schema->get_inverted_index_storage_format() == InvertedIndexStorageFormatPB::V1) { // V1 format auto index_path = InvertedIndexDescriptor::inverted_index_file_path( tablet->tablet_path(), extract_rowset_id("020000000000000100000000000000000000000000000000_0.dat"), 0, 1, ""); bool exists = false; EXPECT_TRUE(io::global_local_filesystem()->exists(index_path, &exists).ok()); EXPECT_TRUE(exists) << "Index file not found: " << index_path; } else { // V2+ format auto index_path = InvertedIndexDescriptor::get_index_file_name(segment_path); bool exists = false; EXPECT_TRUE(io::global_local_filesystem()->exists(index_path, &exists).ok()); EXPECT_TRUE(exists) << "Index file not found: " << index_path; } } TEST_F(IndexBuilderTest, UniqueKeysTableIndexTest) { // 0. prepare tablet path auto tablet_path = _absolute_dir + "/" + std::to_string(14688); _tablet->_tablet_path = tablet_path; _tablet->_tablet_meta->_schema = _tablet_schema; _tablet->_tablet_meta->_schema->_keys_type = KeysType::UNIQUE_KEYS; _tablet->_tablet_meta->_enable_unique_key_merge_on_write = true; ASSERT_TRUE(io::global_local_filesystem()->delete_directory(tablet_path).ok()); ASSERT_TRUE(io::global_local_filesystem()->create_directory(tablet_path).ok()); // 1. Prepare data for writing RowsetSharedPtr rowset; const int rows_per_segment = 500; // 2. Create a rowset writer context with segment size set to trigger multiple segments RowsetWriterContext writer_context; writer_context.rowset_id.init(15677); writer_context.tablet_id = 15677; writer_context.tablet_schema_hash = 567997577; writer_context.partition_id = 10; writer_context.rowset_type = BETA_ROWSET; writer_context.rowset_dir = tablet_path; writer_context.rowset_state = VISIBLE; writer_context.tablet_schema = _tablet_schema; writer_context.version.first = 10; writer_context.version.second = 10; // Set small segment size to ensure we create multiple segments writer_context.max_rows_per_segment = rows_per_segment; ASSERT_TRUE(io::global_local_filesystem()->create_directory(writer_context.rowset_dir).ok()); std::unique_ptr rowset_writer; auto status = RowsetFactory::create_rowset_writer(writer_context, false, &rowset_writer); ASSERT_TRUE(status.ok()) << status.to_string(); { vectorized::Block block = _tablet_schema->create_block(); auto columns = block.mutate_columns(); // Add data for k1 and k2 columns for (int i = 0; i < 1000; ++i) { // k1 column (int) int32_t k1 = i * 10; columns[0]->insert_data((const char*)&k1, sizeof(k1)); // k2 column (int) int32_t k2 = i % 100; columns[1]->insert_data((const char*)&k2, sizeof(k2)); } // Add the block to the rowset Status s = rowset_writer->add_block(&block); ASSERT_TRUE(s.ok()) << s.to_string(); // Flush the writer s = rowset_writer->flush(); ASSERT_TRUE(s.ok()) << s.to_string(); // Build the rowset ASSERT_TRUE(rowset_writer->build(rowset).ok()); // Add the rowset to the tablet ASSERT_TRUE(_tablet->add_rowset(rowset).ok()); } // 6. Create test class that overrides methods to simulate unique key table behavior class TestIndexBuilder : public IndexBuilder { public: TestIndexBuilder(TabletSharedPtr tablet, const std::vector& columns, const std::vector& alter_inverted_indexes, bool is_drop_op) : IndexBuilder(tablet, columns, alter_inverted_indexes, is_drop_op) {} ~TestIndexBuilder() override = default; // Override to make sure modify_rowsets with UNIQUE_KEYS path is called Status modify_rowsets(const Merger::Statistics* stats = nullptr) override { // Call parent method which should use the UNIQUE_KEYS path return IndexBuilder::modify_rowsets(stats); } }; _alter_indexes.clear(); TOlapTableIndex tt_index; tt_index.index_id = 1; tt_index.index_name = "k1_index"; tt_index.columns.emplace_back("k1"); tt_index.column_unique_ids.push_back(1); tt_index.index_type = TIndexType::type::INVERTED; _alter_indexes.push_back(tt_index); // 7. Initialize and build inverted index TestIndexBuilder builder(_tablet, _columns, _alter_indexes, false); status = builder.init(); EXPECT_TRUE(status.ok()) << status.to_string(); // 8. Execute build index, which should go through UNIQUE_KEYS path in modify_rowsets status = builder.do_build_inverted_index(); EXPECT_TRUE(status.ok()) << status.to_string(); // 9. Verify that the index was created successfully std::string segment_path = local_segment_path( _tablet->tablet_path(), extract_rowset_id("020000000000000100000000000000000000000000000000_0.dat").to_string(), 0); if (_tablet_schema->get_inverted_index_storage_format() == InvertedIndexStorageFormatPB::V1) { auto index_path = InvertedIndexDescriptor::inverted_index_file_path( _tablet->tablet_path(), extract_rowset_id("020000000000000100000000000000000000000000000000_0.dat"), 0, 1, ""); bool exists = false; EXPECT_TRUE(io::global_local_filesystem()->exists(index_path, &exists).ok()); EXPECT_TRUE(exists) << "Index file not found: " << index_path; } else { auto index_path = InvertedIndexDescriptor::get_index_file_name(segment_path); bool exists = false; EXPECT_TRUE(io::global_local_filesystem()->exists(index_path, &exists).ok()); EXPECT_TRUE(exists) << "Index file not found: " << index_path; } } TEST_F(IndexBuilderTest, HandleSingleRowsetErrorTest) { // 1. Create a test class that overrides handle_single_rowset to simulate error scenarios class TestIndexBuilder : public IndexBuilder { public: TestIndexBuilder(TabletSharedPtr tablet, const std::vector& columns, const std::vector& alter_inverted_indexes, bool is_drop_op, bool simulate_non_local_rowset_error = false) : IndexBuilder(tablet, columns, alter_inverted_indexes, is_drop_op), _simulate_non_local_rowset_error(simulate_non_local_rowset_error) {} ~TestIndexBuilder() override = default; // Override to simulate error conditions Status handle_single_rowset(RowsetMetaSharedPtr output_rowset_meta, std::vector& segments) override { if (_simulate_non_local_rowset_error) { // Simulate the condition where is_local_rowset is false return Status::InternalError("should be local rowset. tablet_id={} rowset_id={}", 123, "test_rowset_id"); } // Call parent method for normal processing return IndexBuilder::handle_single_rowset(output_rowset_meta, segments); } private: bool _simulate_non_local_rowset_error; }; // 2. Prepare tablet path std::string tablet_path = _absolute_dir + "/" + std::to_string(14687); EXPECT_TRUE(io::global_local_filesystem()->delete_directory(tablet_path).ok()); EXPECT_TRUE(io::global_local_filesystem()->create_directory(tablet_path).ok()); // 3. Set up tablet schema and tablet TabletSchemaSPtr tablet_schema = std::make_shared(); create_tablet_schema(tablet_schema, KeysType::DUP_KEYS, 2); auto tablet_meta = create_tablet_meta(); tablet_meta->_schema = tablet_schema; auto tablet = std::make_shared(*_engine, tablet_meta, _data_dir.get()); tablet->_tablet_path = tablet_path; ASSERT_TRUE(tablet->init().ok()); // 4. Create inverted index definition _alter_indexes.clear(); TOlapTableIndex tt_index; tt_index.index_id = 1; tt_index.index_name = "k1_index"; tt_index.columns.emplace_back("k1"); tt_index.column_unique_ids.push_back(1); tt_index.index_type = TIndexType::type::INVERTED; _alter_indexes.push_back(tt_index); // 5. Create a rowset RowsetWriterContext writer_context; writer_context.rowset_id = _engine->next_rowset_id(); writer_context.tablet_id = 14687; writer_context.rowset_dir = tablet_path; writer_context.tablet_schema_hash = 1111; writer_context.partition_id = 10; writer_context.rowset_type = BETA_ROWSET; writer_context.segments_overlap = NONOVERLAPPING; writer_context.tablet_schema = tablet_schema; writer_context.version.first = 10; writer_context.version.second = 10; auto result = tablet->create_rowset_writer(writer_context, false); EXPECT_TRUE(result.has_value()) << result.error(); auto rowset_writer = std::move(result).value(); { vectorized::Block block = _tablet_schema->create_block(); auto columns = block.mutate_columns(); // Add data for k1 and k2 columns for (int i = 0; i < 1000; ++i) { // k1 column (int) int32_t k1 = i * 10; columns[0]->insert_data((const char*)&k1, sizeof(k1)); // k2 column (int) int32_t k2 = i % 100; columns[1]->insert_data((const char*)&k2, sizeof(k2)); } // Add the block to the rowset Status s = rowset_writer->add_block(&block); ASSERT_TRUE(s.ok()) << s.to_string(); // Flush the writer s = rowset_writer->flush(); ASSERT_TRUE(s.ok()) << s.to_string(); // Build the rowset RowsetSharedPtr rowset; ASSERT_TRUE(rowset_writer->build(rowset).ok()); // Add the rowset to the tablet ASSERT_TRUE(tablet->add_rowset(rowset).ok()); } // 6. Test error scenario with non-local rowset TestIndexBuilder builder(tablet, _columns, _alter_indexes, false, true); auto status = builder.init(); EXPECT_TRUE(status.ok()) << status.to_string(); // Execute build_index, which should fail due to simulated error status = builder.do_build_inverted_index(); EXPECT_FALSE(status.ok()) << "Expected failure but got success"; EXPECT_TRUE(status.is()) << "Expected internal error but got: " << status.to_string(); EXPECT_TRUE(status.to_string().find("should be local rowset") != std::string::npos) << "Error message doesn't match expected: " << status.to_string(); } TEST_F(IndexBuilderTest, UpdateInvertedIndexInfoErrorTest) { // 1. Create a test class that overrides update_inverted_index_info to simulate error scenarios class TestIndexBuilder : public IndexBuilder { public: TestIndexBuilder(TabletSharedPtr tablet, const std::vector& columns, const std::vector& alter_inverted_indexes, bool is_drop_op, int error_type = 0) : IndexBuilder(tablet, columns, alter_inverted_indexes, is_drop_op), _error_type(error_type) {} ~TestIndexBuilder() override = default; // Override update_inverted_index_info to inject errors Status update_inverted_index_info() override { if (_error_type == 1) { // Simulate non-local rowset error in update_inverted_index_info return Status::InternalError("should be local rowset. tablet_id={} rowset_id={}", 123, "test_rowset_id"); } else if (_error_type == 2) { // Simulate size retrieval error return Status::Error("debug point: get fs failed"); } // Call parent method for normal processing return IndexBuilder::update_inverted_index_info(); } private: int _error_type; // 0: no error, 1: non-local rowset error, 2: size retrieval error }; // 2. Prepare tablet path std::string tablet_path = _absolute_dir + "/" + std::to_string(14688); EXPECT_TRUE(io::global_local_filesystem()->delete_directory(tablet_path).ok()); EXPECT_TRUE(io::global_local_filesystem()->create_directory(tablet_path).ok()); // 3. Set up tablet schema and tablet TabletSchemaSPtr tablet_schema = std::make_shared(); create_tablet_schema(tablet_schema, KeysType::DUP_KEYS, 2); auto tablet_meta = create_tablet_meta(); tablet_meta->_schema = tablet_schema; auto tablet = std::make_shared(*_engine, tablet_meta, _data_dir.get()); tablet->_tablet_path = tablet_path; ASSERT_TRUE(tablet->init().ok()); // 4. Create inverted index definition _alter_indexes.clear(); TOlapTableIndex tt_index; tt_index.index_id = 1; tt_index.index_name = "k1_index"; tt_index.columns.emplace_back("k1"); tt_index.column_unique_ids.push_back(1); tt_index.index_type = TIndexType::type::INVERTED; _alter_indexes.push_back(tt_index); // 5. Create a rowset RowsetWriterContext writer_context; writer_context.rowset_id = _engine->next_rowset_id(); writer_context.tablet_id = 14688; writer_context.rowset_dir = tablet_path; writer_context.tablet_schema_hash = 1111; writer_context.partition_id = 10; writer_context.rowset_type = BETA_ROWSET; writer_context.segments_overlap = NONOVERLAPPING; writer_context.tablet_schema = tablet_schema; writer_context.version.first = 10; writer_context.version.second = 10; auto result = tablet->create_rowset_writer(writer_context, false); EXPECT_TRUE(result.has_value()) << result.error(); auto rowset_writer = std::move(result).value(); // Write data { vectorized::Block block = tablet_schema->create_block(); auto columns = block.mutate_columns(); // Add data for k1 and k2 columns for (int i = 0; i < 1000; ++i) { // k1 column (int) int32_t k1 = i * 10; columns[0]->insert_data((const char*)&k1, sizeof(k1)); // k2 column (int) int32_t k2 = i % 100; columns[1]->insert_data((const char*)&k2, sizeof(k2)); } // Add the block to the rowset Status s = rowset_writer->add_block(&block); ASSERT_TRUE(s.ok()) << s.to_string(); // Flush the writer s = rowset_writer->flush(); ASSERT_TRUE(s.ok()) << s.to_string(); // Build the rowset RowsetSharedPtr rowset; ASSERT_TRUE(rowset_writer->build(rowset).ok()); // Add the rowset to the tablet ASSERT_TRUE(tablet->add_rowset(rowset).ok()); } // 6. Test error scenarios // 6.1 Test non-local rowset error { TestIndexBuilder builder(tablet, _columns, _alter_indexes, false, 1); auto status = builder.init(); EXPECT_TRUE(status.ok()) << status.to_string(); // Execute build_index, which should fail due to simulated error status = builder.do_build_inverted_index(); EXPECT_FALSE(status.ok()) << "Expected failure but got success"; EXPECT_TRUE(status.is()) << "Expected internal error but got: " << status.to_string(); EXPECT_TRUE(status.to_string().find("should be local rowset") != std::string::npos) << "Error message doesn't match expected: " << status.to_string(); } // 6.2 Test size retrieval error { TestIndexBuilder builder(tablet, _columns, _alter_indexes, false, 2); auto status = builder.init(); EXPECT_TRUE(status.ok()) << status.to_string(); // Execute build_index, which should fail due to simulated error status = builder.do_build_inverted_index(); EXPECT_FALSE(status.ok()) << "Expected failure but got success"; EXPECT_TRUE(status.is()) << "Expected INIT_FAILED but got: " << status.to_string(); EXPECT_TRUE(status.to_string().find("debug point: get fs failed") != std::string::npos) << "Error message doesn't match expected: " << status.to_string(); } } } // namespace doris