pick #47562 #50056 from master --------- Co-authored-by: qiye <luen@selectdb.com>
This commit is contained in:
@ -825,6 +825,11 @@ Status Compaction::construct_output_rowset_writer(RowsetWriterContext& ctx, bool
|
||||
continue;
|
||||
}
|
||||
auto col_unique_id = col_unique_ids[0];
|
||||
if (!_cur_tablet_schema->has_column_unique_id(col_unique_id)) {
|
||||
LOG(WARNING) << "tablet[" << _tablet->tablet_id() << "] column_unique_id["
|
||||
<< col_unique_id << "] not found, will skip index compaction";
|
||||
continue;
|
||||
}
|
||||
// Avoid doing inverted index compaction on non-slice type columns
|
||||
if (!field_is_slice_type(_cur_tablet_schema->column_by_uid(col_unique_id).type())) {
|
||||
continue;
|
||||
|
||||
@ -740,10 +740,9 @@ void TabletIndex::init_from_thrift(const TOlapTableIndex& index,
|
||||
col_unique_ids[i] = tablet_schema.column(column_idx).unique_id();
|
||||
} else {
|
||||
// if column unique id not found by column name, find by column unique id
|
||||
// column unique id can not bigger than tablet schema column size, if bigger than column size means
|
||||
// this column is a new column added by light schema change
|
||||
if (index.__isset.column_unique_ids &&
|
||||
index.column_unique_ids[i] < tablet_schema.num_columns()) {
|
||||
// column unique id can not found means this column is a new column added by light schema change
|
||||
if (index.__isset.column_unique_ids && !index.column_unique_ids.empty() &&
|
||||
tablet_schema.has_column_unique_id(index.column_unique_ids[i])) {
|
||||
col_unique_ids[i] = index.column_unique_ids[i];
|
||||
} else {
|
||||
col_unique_ids[i] = -1;
|
||||
@ -1330,6 +1329,10 @@ void TabletSchema::update_indexes_from_thrift(const std::vector<doris::TOlapTabl
|
||||
_indexes = std::move(indexes);
|
||||
}
|
||||
|
||||
bool TabletSchema::has_column_unique_id(int32_t col_unique_id) const {
|
||||
return _field_id_to_index.contains(col_unique_id);
|
||||
}
|
||||
|
||||
Status TabletSchema::have_column(const std::string& field_name) const {
|
||||
if (!_field_name_to_index.contains(StringRef(field_name))) {
|
||||
return Status::Error<ErrorCode::INTERNAL_ERROR>(
|
||||
|
||||
@ -315,6 +315,7 @@ public:
|
||||
const TabletColumn& column(size_t ordinal) const;
|
||||
const TabletColumn& column(const std::string& field_name) const;
|
||||
Status have_column(const std::string& field_name) const;
|
||||
bool has_column_unique_id(int32_t col_unique_id) const;
|
||||
const TabletColumn& column_by_uid(int32_t col_unique_id) const;
|
||||
TabletColumn& mutable_column_by_uid(int32_t col_unique_id);
|
||||
TabletColumn& mutable_column(size_t ordinal);
|
||||
|
||||
@ -351,7 +351,8 @@ Status IndexBuilder::handle_single_rowset(RowsetMetaSharedPtr output_rowset_meta
|
||||
auto column_name = inverted_index.columns[0];
|
||||
auto column_idx = output_rowset_schema->field_index(column_name);
|
||||
if (column_idx < 0) {
|
||||
if (!inverted_index.column_unique_ids.empty()) {
|
||||
if (inverted_index.__isset.column_unique_ids &&
|
||||
!inverted_index.column_unique_ids.empty()) {
|
||||
column_idx = output_rowset_schema->field_index(
|
||||
inverted_index.column_unique_ids[0]);
|
||||
}
|
||||
|
||||
@ -1064,6 +1064,180 @@ TEST_F(IndexBuilderTest, NonExistentColumnIndexTest) {
|
||||
// but the file count verification above should be sufficient to confirm behavior
|
||||
}
|
||||
|
||||
TEST_F(IndexBuilderTest, RenameColumnIndexTest) {
|
||||
// 0. prepare tablet path
|
||||
auto tablet_path = _absolute_dir + "/" + std::to_string(14679);
|
||||
_tablet->_tablet_path = tablet_path;
|
||||
ASSERT_TRUE(io::global_local_filesystem()->delete_directory(tablet_path).ok());
|
||||
ASSERT_TRUE(io::global_local_filesystem()->create_directory(tablet_path).ok());
|
||||
auto schema = std::make_shared<TabletSchema>();
|
||||
|
||||
schema->_keys_type = KeysType::UNIQUE_KEYS;
|
||||
schema->_inverted_index_storage_format = InvertedIndexStorageFormatPB::V2;
|
||||
|
||||
// Create the first key column
|
||||
TabletColumn column_1;
|
||||
column_1.set_type(FieldType::OLAP_FIELD_TYPE_INT);
|
||||
column_1.set_unique_id(1);
|
||||
column_1.set_name("k1");
|
||||
column_1.set_is_key(true);
|
||||
schema->append_column(column_1);
|
||||
|
||||
// Create the second key column
|
||||
TabletColumn column_2;
|
||||
column_2.set_type(FieldType::OLAP_FIELD_TYPE_INT);
|
||||
// not sequential unique_id
|
||||
column_2.set_unique_id(3);
|
||||
column_2.set_name("k2");
|
||||
column_2.set_is_key(false);
|
||||
schema->append_column(column_2);
|
||||
|
||||
// 1. Prepare data for writing
|
||||
RowsetSharedPtr rowset;
|
||||
const int num_rows = 1000;
|
||||
|
||||
// 2. First add an initial index to the schema (for k1 column)
|
||||
TabletIndex initial_index;
|
||||
initial_index._index_id = 1;
|
||||
initial_index._index_name = "k1_index";
|
||||
initial_index._index_type = IndexType::INVERTED;
|
||||
initial_index._col_unique_ids.push_back(1); // unique_id for k1
|
||||
schema->append_index(std::move(initial_index));
|
||||
|
||||
// 3. Create a rowset writer context
|
||||
RowsetWriterContext writer_context;
|
||||
writer_context.rowset_id.init(15679);
|
||||
writer_context.tablet_id = 15679;
|
||||
writer_context.tablet_schema_hash = 567997577;
|
||||
writer_context.partition_id = 10;
|
||||
writer_context.rowset_type = BETA_ROWSET;
|
||||
writer_context.rowset_dir = _absolute_dir + "/" + std::to_string(15679);
|
||||
writer_context.rowset_state = VISIBLE;
|
||||
writer_context.tablet_schema = schema;
|
||||
writer_context.version.first = 10;
|
||||
writer_context.version.second = 10;
|
||||
|
||||
ASSERT_TRUE(io::global_local_filesystem()->create_directory(writer_context.rowset_dir).ok());
|
||||
|
||||
// 4. Create a rowset writer
|
||||
std::unique_ptr<RowsetWriter> rowset_writer;
|
||||
auto status = RowsetFactory::create_rowset_writer(writer_context, false, &rowset_writer);
|
||||
ASSERT_TRUE(status.ok()) << status.to_string();
|
||||
|
||||
// 5. Write data to the rowset
|
||||
{
|
||||
vectorized::Block block = schema->create_block();
|
||||
auto columns = block.mutate_columns();
|
||||
|
||||
// Add data for k1 and k2 columns
|
||||
for (int i = 0; i < num_rows; ++i) {
|
||||
// k1 column (int)
|
||||
int32_t k1 = i * 10;
|
||||
columns[0]->insert_data((const char*)&k1, sizeof(k1));
|
||||
|
||||
// k2 column (int)
|
||||
int32_t k2 = i % 100;
|
||||
columns[1]->insert_data((const char*)&k2, sizeof(k2));
|
||||
}
|
||||
|
||||
// Add the block to the rowset
|
||||
Status s = rowset_writer->add_block(&block);
|
||||
ASSERT_TRUE(s.ok()) << s.to_string();
|
||||
|
||||
// Flush the writer
|
||||
s = rowset_writer->flush();
|
||||
ASSERT_TRUE(s.ok()) << s.to_string();
|
||||
|
||||
// Build the rowset
|
||||
ASSERT_TRUE(rowset_writer->build(rowset).ok());
|
||||
|
||||
// Add the rowset to the tablet
|
||||
ASSERT_TRUE(_tablet->add_rowset(rowset).ok());
|
||||
}
|
||||
|
||||
// 6. Prepare indexes for building - valid k2 and non-existent k3
|
||||
_alter_indexes.clear();
|
||||
|
||||
// Index for rename column "k2" to "k3"
|
||||
TOlapTableIndex index2;
|
||||
index2.index_id = 3;
|
||||
index2.columns.emplace_back("k3"); // This column doesn't exist in the schema
|
||||
index2.index_name = "k3_index";
|
||||
index2.index_type = TIndexType::INVERTED;
|
||||
index2.column_unique_ids.push_back(3);
|
||||
index2.__isset.column_unique_ids = true;
|
||||
_alter_indexes.push_back(index2);
|
||||
|
||||
// 7. Create IndexBuilder
|
||||
IndexBuilder builder(_tablet, _columns, _alter_indexes, false);
|
||||
|
||||
// 8. Initialize and verify
|
||||
status = builder.init();
|
||||
EXPECT_TRUE(status.ok()) << status.to_string();
|
||||
EXPECT_EQ(builder._alter_index_ids.size(), 1); // Only k1 is considered for building
|
||||
|
||||
// 9. Build indexes - should only build for existing columns
|
||||
status = builder.do_build_inverted_index();
|
||||
EXPECT_TRUE(status.ok()) << status.to_string();
|
||||
|
||||
// 10. Check paths and files
|
||||
auto old_tablet_path = _absolute_dir + "/" + std::to_string(15679);
|
||||
auto new_tablet_path = _absolute_dir + "/" + std::to_string(14679);
|
||||
bool old_exists = false;
|
||||
bool new_exists = false;
|
||||
EXPECT_TRUE(io::global_local_filesystem()->exists(old_tablet_path, &old_exists).ok());
|
||||
EXPECT_TRUE(old_exists);
|
||||
EXPECT_TRUE(io::global_local_filesystem()->exists(new_tablet_path, &new_exists).ok());
|
||||
EXPECT_TRUE(new_exists);
|
||||
|
||||
// 11. Check files in old and new directories
|
||||
std::vector<io::FileInfo> old_files;
|
||||
bool old_dir_exists = false;
|
||||
EXPECT_TRUE(io::global_local_filesystem()
|
||||
->list(old_tablet_path, true, &old_files, &old_dir_exists)
|
||||
.ok());
|
||||
EXPECT_TRUE(old_dir_exists);
|
||||
int old_idx_file_count = 0;
|
||||
int old_dat_file_count = 0;
|
||||
for (const auto& file : old_files) {
|
||||
std::string filename = file.file_name;
|
||||
if (filename.find(".idx") != std::string::npos) {
|
||||
old_idx_file_count++;
|
||||
}
|
||||
if (filename.find(".dat") != std::string::npos) {
|
||||
old_dat_file_count++;
|
||||
}
|
||||
}
|
||||
EXPECT_EQ(old_idx_file_count, 1)
|
||||
<< "Old directory should contain exactly 1 .idx file for the original k1 index";
|
||||
EXPECT_EQ(old_dat_file_count, 1) << "Old directory should contain exactly 1 .dat file";
|
||||
|
||||
std::vector<io::FileInfo> new_files;
|
||||
bool new_dir_exists = false;
|
||||
EXPECT_TRUE(io::global_local_filesystem()
|
||||
->list(new_tablet_path, true, &new_files, &new_dir_exists)
|
||||
.ok());
|
||||
EXPECT_TRUE(new_dir_exists);
|
||||
int new_idx_file_count = 0;
|
||||
int new_dat_file_count = 0;
|
||||
for (const auto& file : new_files) {
|
||||
std::string filename = file.file_name;
|
||||
if (filename.find(".idx") != std::string::npos) {
|
||||
new_idx_file_count++;
|
||||
}
|
||||
if (filename.find(".dat") != std::string::npos) {
|
||||
new_dat_file_count++;
|
||||
}
|
||||
}
|
||||
// Should have 2 index files: original k1 index and new k2 index (k3 should be skipped)
|
||||
EXPECT_EQ(new_idx_file_count, 1)
|
||||
<< "New directory should contain exactly 1 .idx files (for k1 and k2, not k3)";
|
||||
EXPECT_EQ(new_dat_file_count, 1) << "New directory should contain exactly 1 .dat file";
|
||||
|
||||
// 12. Verify the tablet schema - would need to examine tablet_schema here
|
||||
// k1 and k2 indexes should exist, k3 index should not
|
||||
// Note: In production code, additional verification of schema would be done here
|
||||
}
|
||||
TEST_F(IndexBuilderTest, AddNonExistentColumnIndexWhenOneExistsTest) {
|
||||
// 0. prepare tablet path
|
||||
auto tablet_path = _absolute_dir + "/" + std::to_string(14679);
|
||||
|
||||
Reference in New Issue
Block a user