[Fix](SchemaChange) refactor variant root column iterator to make row… (#41941)

pick #41700
This commit is contained in:
lihangyu
2024-10-17 10:39:07 +08:00
committed by GitHub
parent d04082f685
commit 0b6447faeb
4 changed files with 53 additions and 46 deletions

View File

@ -1501,28 +1501,14 @@ void DefaultValueColumnIterator::_insert_many_default(vectorized::MutableColumnP
}
}
Status VariantRootColumnIterator::next_batch(size_t* n, vectorized::MutableColumnPtr& dst,
bool* has_null) {
size_t size = dst->size();
Status VariantRootColumnIterator::_process_root_column(
vectorized::MutableColumnPtr& dst, vectorized::MutableColumnPtr& root_column,
const vectorized::DataTypePtr& most_common_type) {
auto& obj =
dst->is_nullable()
? assert_cast<vectorized::ColumnObject&>(
assert_cast<vectorized::ColumnNullable&>(*dst).get_nested_column())
: assert_cast<vectorized::ColumnObject&>(*dst);
if (obj.is_null_root()) {
obj.create_root();
}
if (!obj.is_finalized()) {
obj.finalize();
}
auto root_column = obj.get_root();
RETURN_IF_ERROR(_inner_iter->next_batch(n, root_column, has_null));
obj.incr_num_rows(*n);
for (auto& entry : obj.get_subcolumns()) {
if (entry->data.size() != size + *n) {
entry->data.insertManyDefaults(*n);
}
}
// fill nullmap
if (root_column->is_nullable() && dst->is_nullable()) {
vectorized::ColumnUInt8& dst_null_map =
@ -1531,47 +1517,57 @@ Status VariantRootColumnIterator::next_batch(size_t* n, vectorized::MutableColum
assert_cast<vectorized::ColumnNullable&>(*root_column).get_null_map_column();
dst_null_map.insert_range_from(src_null_map, 0, src_null_map.size());
}
// add root column to a tmp object column
auto tmp = vectorized::ColumnObject::create(true, false);
auto& tmp_obj = assert_cast<vectorized::ColumnObject&>(*tmp);
tmp_obj.add_sub_column({}, std::move(root_column), most_common_type);
// merge tmp object column to dst
obj.insert_range_from(*tmp, 0, tmp->size());
// finalize object if needed
if (!obj.is_finalized()) {
obj.finalize();
}
#ifndef NDEBUG
obj.check_consistency();
#endif
return Status::OK();
}
Status VariantRootColumnIterator::read_by_rowids(const rowid_t* rowids, const size_t count,
vectorized::MutableColumnPtr& dst) {
size_t size = dst->size();
Status VariantRootColumnIterator::next_batch(size_t* n, vectorized::MutableColumnPtr& dst,
bool* has_null) {
// read root column
auto& obj =
dst->is_nullable()
? assert_cast<vectorized::ColumnObject&>(
assert_cast<vectorized::ColumnNullable&>(*dst).get_nested_column())
: assert_cast<vectorized::ColumnObject&>(*dst);
if (obj.is_null_root()) {
obj.create_root();
}
if (!obj.is_finalized()) {
obj.finalize();
}
auto root_column = obj.get_root();
auto most_common_type = obj.get_most_common_type();
auto root_column = most_common_type->create_column();
RETURN_IF_ERROR(_inner_iter->next_batch(n, root_column, has_null));
return _process_root_column(dst, root_column, most_common_type);
}
Status VariantRootColumnIterator::read_by_rowids(const rowid_t* rowids, const size_t count,
vectorized::MutableColumnPtr& dst) {
// read root column
auto& obj =
dst->is_nullable()
? assert_cast<vectorized::ColumnObject&>(
assert_cast<vectorized::ColumnNullable&>(*dst).get_nested_column())
: assert_cast<vectorized::ColumnObject&>(*dst);
auto most_common_type = obj.get_most_common_type();
auto root_column = most_common_type->create_column();
RETURN_IF_ERROR(_inner_iter->read_by_rowids(rowids, count, root_column));
obj.incr_num_rows(count);
for (auto& entry : obj.get_subcolumns()) {
if (entry->data.size() != (size + count)) {
entry->data.insertManyDefaults(count);
}
}
// fill nullmap
if (root_column->is_nullable() && dst->is_nullable()) {
vectorized::ColumnUInt8& dst_null_map =
assert_cast<vectorized::ColumnNullable&>(*dst).get_null_map_column();
vectorized::ColumnUInt8& src_null_map =
assert_cast<vectorized::ColumnNullable&>(*root_column).get_null_map_column();
DCHECK_EQ(src_null_map.size() - size, count);
dst_null_map.insert_range_from(src_null_map, size, count);
}
#ifndef NDEBUG
obj.check_consistency();
#endif
return Status::OK();
return _process_root_column(dst, root_column, most_common_type);
}
} // namespace segment_v2

View File

@ -633,6 +633,9 @@ public:
ordinal_t get_current_ordinal() const override { return _inner_iter->get_current_ordinal(); }
private:
Status _process_root_column(vectorized::MutableColumnPtr& dst,
vectorized::MutableColumnPtr& root_column,
const vectorized::DataTypePtr& most_common_type);
std::unique_ptr<FileColumnIterator> _inner_iter;
};

View File

@ -1563,6 +1563,12 @@ void ColumnObject::create_root(const DataTypePtr& type, MutableColumnPtr&& colum
add_sub_column({}, std::move(column), type);
}
DataTypePtr ColumnObject::get_most_common_type() const {
auto type = is_nullable ? make_nullable(std::make_shared<MostCommonType>())
: std::make_shared<MostCommonType>();
return type;
}
bool ColumnObject::is_null_root() const {
auto* root = subcolumns.get_root();
if (root == nullptr) {

View File

@ -274,6 +274,8 @@ public:
// create root with type and column if missing
void create_root(const DataTypePtr& type, MutableColumnPtr&& column);
DataTypePtr get_most_common_type() const;
// root is null or type nothing
bool is_null_root() const;