[FIX](complex-type) fix agg table with complex type with replace state (#24873)

fix agg table with complex type with replace state
This commit is contained in:
amory
2023-10-03 16:32:58 +08:00
committed by GitHub
parent f8a3034dca
commit 10f0c63896
7 changed files with 85 additions and 16 deletions

View File

@ -227,12 +227,26 @@ public:
void insert_indices_from(const IColumn& src, const int* indices_begin,
const int* indices_end) override;
void replace_column_data(const IColumn&, size_t row, size_t self_row = 0) override {
LOG(FATAL) << "replace_column_data not implemented";
void replace_column_data(const IColumn& rhs, size_t row, size_t self_row = 0) override {
DCHECK(size() > self_row);
const auto& r = assert_cast<const ColumnArray&>(rhs);
const size_t nested_row_size = r.size_at(row);
const size_t r_nested_start_off = r.offset_at(row);
// we should clear data because we call resize() before replace_column_data()
if (self_row == 0) {
data->clear();
}
get_offsets()[self_row] = get_offsets()[self_row - 1] + nested_row_size;
// we make sure call replace_column_data() by order so, here we just insert data for nested
data->insert_range_from(r.get_data(), r_nested_start_off, nested_row_size);
}
void replace_column_data_default(size_t self_row = 0) override {
LOG(FATAL) << "replace_column_data_default not implemented";
DCHECK(size() > self_row);
get_offsets()[self_row] = get_offsets()[self_row - 1];
}
void clear() override {
data->clear();
offsets->clear();

View File

@ -140,11 +140,26 @@ public:
return append_data_by_selector_impl<ColumnMap>(res, selector);
}
void replace_column_data(const IColumn&, size_t row, size_t self_row = 0) override {
LOG(FATAL) << "replace_column_data not implemented";
void replace_column_data(const IColumn& rhs, size_t row, size_t self_row = 0) override {
DCHECK(size() > self_row);
const auto& r = assert_cast<const ColumnMap&>(rhs);
const size_t nested_row_size = r.size_at(row);
const size_t r_key_nested_start_off = r.offset_at(row);
const size_t r_val_nested_start_off = r.offset_at(row);
if (self_row == 0) {
keys_column->clear();
values_column->clear();
}
get_offsets()[self_row] = get_offsets()[self_row - 1] + nested_row_size;
// here we use batch size to avoid many virtual call in nested column
keys_column->insert_range_from(r.get_keys(), r_key_nested_start_off, nested_row_size);
values_column->insert_range_from(r.get_values(), r_val_nested_start_off, nested_row_size);
}
void replace_column_data_default(size_t self_row = 0) override {
LOG(FATAL) << "replace_column_data_default not implemented";
DCHECK(size() > self_row);
get_offsets()[self_row] = get_offsets()[self_row - 1];
}
ColumnArray::Offsets64& ALWAYS_INLINE get_offsets() {

View File

@ -34,6 +34,7 @@
#include "common/status.h"
#include "vec/columns/column.h"
#include "vec/columns/column_impl.h"
#include "vec/common/assert_cast.h"
#include "vec/common/cow.h"
#include "vec/common/sip_hash.h"
#include "vec/common/string_ref.h"
@ -130,11 +131,20 @@ public:
void append_data_by_selector(MutableColumnPtr& res, const Selector& selector) const override {
return append_data_by_selector_impl<ColumnStruct>(res, selector);
}
void replace_column_data(const IColumn&, size_t row, size_t self_row = 0) override {
LOG(FATAL) << "replace_column_data not implemented";
void replace_column_data(const IColumn& rhs, size_t row, size_t self_row = 0) override {
DCHECK(size() > self_row);
const auto& r = assert_cast<const ColumnStruct&>(rhs);
for (size_t idx = 0; idx < columns.size(); ++idx) {
columns[idx]->replace_column_data(r.get_column(idx), row, self_row);
}
}
void replace_column_data_default(size_t self_row = 0) override {
LOG(FATAL) << "replace_column_data_default not implemented";
DCHECK(size() > self_row);
for (size_t idx = 0; idx < columns.size(); ++idx) {
columns[idx]->replace_column_data_default(self_row);
}
}
void insert_range_from(const IColumn& src, size_t start, size_t length) override;

View File

@ -163,7 +163,7 @@ void BlockReader::_init_agg_state(const ReaderParams& read_params) {
_next_row.block->create_same_struct_block(_reader_context.batch_size)->mutate_columns();
_stored_has_null_tag.resize(_stored_data_columns.size());
_stored_has_string_tag.resize(_stored_data_columns.size());
_stored_has_variable_length_tag.resize(_stored_data_columns.size());
auto& tablet_schema = *_tablet_schema;
for (auto idx : _agg_columns_idx) {
@ -182,13 +182,23 @@ void BlockReader::_init_agg_state(const ReaderParams& read_params) {
});
_agg_places.push_back(place);
// calculate `has_string` tag.
_stored_has_string_tag[idx] =
// calculate `_has_variable_length_tag` tag. like string, array, map
_stored_has_variable_length_tag[idx] =
_stored_data_columns[idx]->is_column_string() ||
(_stored_data_columns[idx]->is_nullable() &&
reinterpret_cast<ColumnNullable*>(_stored_data_columns[idx].get())
->get_nested_column_ptr()
->is_column_string());
->is_column_string()) ||
_stored_data_columns[idx]->is_column_array() ||
(_stored_data_columns[idx]->is_nullable() &&
reinterpret_cast<ColumnNullable*>(_stored_data_columns[idx].get())
->get_nested_column_ptr()
->is_column_array()) ||
_stored_data_columns[idx]->is_column_map() ||
(_stored_data_columns[idx]->is_nullable() &&
reinterpret_cast<ColumnNullable*>(_stored_data_columns[idx].get())
->get_nested_column_ptr()
->is_column_map());
}
}
@ -461,8 +471,8 @@ size_t BlockReader::_copy_agg_data() {
for (auto idx : _agg_columns_idx) {
auto& dst_column = _stored_data_columns[idx];
if (_stored_has_string_tag[idx]) {
//string type should replace ordered
if (_stored_has_variable_length_tag[idx]) {
//variable length type should replace ordered
for (size_t i = 0; i < copy_size; i++) {
auto& ref = _stored_row_ref[i];
dst_column->replace_column_data(*ref.block->get_by_position(idx).column,

View File

@ -111,7 +111,7 @@ private:
std::vector<IteratorRowRef> _stored_row_ref;
std::vector<bool> _stored_has_null_tag;
std::vector<bool> _stored_has_string_tag;
std::vector<bool> _stored_has_variable_length_tag;
phmap::flat_hash_map<const Block*, std::vector<std::pair<int, int>>> _temp_ref_map;

View File

@ -2,15 +2,30 @@
-- !sql_nested_table_agg_c --
1
-- !sql_nested_table_agg --
\N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N
-- !sql_nested_table_agg2_c --
1
-- !sql_nested_table_agg2 --
\N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N
-- !sql_nested_table_map_agg_c --
1
-- !sql_nested_table_map_agg --
\N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N
-- !sql_nested_table_array_map_agg_c --
1
-- !sql_nested_table_array_map_agg --
\N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N
-- !sql_nested_table_map_array_agg_c --
1
-- !sql_nested_table_map_array_agg --
\N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N

View File

@ -105,6 +105,7 @@ suite("test_nested_types_insert_into_with_agg_table", "p0") {
}
qt_sql_nested_table_agg_c """select count() from tbl_array_nested_types_agg;"""
qt_sql_nested_table_agg """select * from tbl_array_nested_types_agg;"""
// test action for scala to array with array-scala type
test {
@ -188,6 +189,7 @@ suite("test_nested_types_insert_into_with_agg_table", "p0") {
}
qt_sql_nested_table_agg2_c """select count() from tbl_array_nested_types_agg2;"""
qt_sql_nested_table_agg2 """select * from tbl_array_nested_types_agg2;"""
// test action for scala to map with map-scala-scala type
@ -272,6 +274,7 @@ suite("test_nested_types_insert_into_with_agg_table", "p0") {
}
qt_sql_nested_table_map_agg_c """select count() from tbl_map_types_agg;"""
qt_sql_nested_table_map_agg """select * from tbl_map_types_agg;"""
// test action for scala to array with map-scala-scala type
test {
@ -355,6 +358,7 @@ suite("test_nested_types_insert_into_with_agg_table", "p0") {
}
qt_sql_nested_table_array_map_agg_c """select count() from tbl_array_map_types_agg;"""
qt_sql_nested_table_array_map_agg """select * from tbl_array_map_types_agg;"""
// test action for map with scala array-scala
// test action for scala to array with array-scala type
@ -439,5 +443,6 @@ suite("test_nested_types_insert_into_with_agg_table", "p0") {
}
qt_sql_nested_table_map_array_agg_c """select count() from tbl_map_array_types_agg;"""
qt_sql_nested_table_map_array_agg """select * from tbl_map_array_types_agg;"""
}