diff --git a/be/src/olap/row_block2.cpp b/be/src/olap/row_block2.cpp index b6a2a0bb58..aa13c1f012 100644 --- a/be/src/olap/row_block2.cpp +++ b/be/src/olap/row_block2.cpp @@ -92,7 +92,8 @@ Status RowBlockV2::convert_to_row_block(RowCursor* helper, RowBlock* dst) { return Status::OK(); } -Status RowBlockV2::_copy_data_to_column(int cid, doris::vectorized::MutableColumnPtr& origin_column) { +Status RowBlockV2::_copy_data_to_column(int cid, + doris::vectorized::MutableColumnPtr& origin_column) { auto* column = origin_column.get(); bool nullable_mark_array[_selected_size]; @@ -170,7 +171,7 @@ Status RowBlockV2::_copy_data_to_column(int cid, doris::vectorized::MutableColum } } } - break; + break; } case OLAP_FIELD_TYPE_MAP: case OLAP_FIELD_TYPE_VARCHAR: { @@ -197,7 +198,8 @@ Status RowBlockV2::_copy_data_to_column(int cid, doris::vectorized::MutableColum if (LIKELY(slice->size <= MAX_SIZE_OF_VEC_STRING)) { column_string->insert_data(slice->data, slice->size); } else { - return Status::NotSupported("Not support string len over than 1MB in vec engine."); + return Status::NotSupported( + "Not support string len over than 1MB in vec engine."); } } else { column_string->insert_default(); @@ -291,9 +293,12 @@ Status RowBlockV2::_copy_data_to_column(int cid, doris::vectorized::MutableColum auto cv = reinterpret_cast(column_block(cid).cell_ptr(row_idx)); if (!nullable_mark_array[j]) { offset += cv->length(); - _append_data_to_column(src_col->elements(), src_col->item_offset(row_idx), cv->length(), nested_col); + _append_data_to_column(src_col->elements(), src_col->item_offset(row_idx), + cv->length(), nested_col); + offsets_col.push_back(offset); + } else { + column_array->insert_default(); } - offsets_col.emplace_back(offset); } break; } @@ -345,11 +350,13 @@ Status RowBlockV2::_copy_data_to_column(int cid, doris::vectorized::MutableColum return Status::OK(); } -Status RowBlockV2::_append_data_to_column(const ColumnVectorBatch* batch, uint16_t off, uint16_t len, doris::vectorized::MutableColumnPtr& origin_column) { +Status RowBlockV2::_append_data_to_column(const ColumnVectorBatch* batch, size_t start, + uint32_t len, + doris::vectorized::MutableColumnPtr& origin_column) { constexpr auto MAX_SIZE_OF_VEC_STRING = 1024l * 1024; auto* column = origin_column.get(); - uint16_t selected_size = len; + uint32_t selected_size = len; bool nullable_mark_array[selected_size]; bool column_nullable = origin_column->is_nullable(); @@ -360,8 +367,8 @@ Status RowBlockV2::_append_data_to_column(const ColumnVectorBatch* batch, uint16 column = nullable_column->get_nested_column_ptr().get(); if (origin_nullable) { - for (uint16_t i = 0; i < selected_size; ++i) { - uint16_t row_idx = i + off; + for (uint32_t i = 0; i < selected_size; ++i) { + uint32_t row_idx = i + start; null_map.push_back(batch->is_null_at(row_idx)); nullable_mark_array[i] = null_map.back(); } @@ -373,12 +380,12 @@ Status RowBlockV2::_append_data_to_column(const ColumnVectorBatch* batch, uint16 memset(nullable_mark_array, false, selected_size * sizeof(bool)); } - auto insert_data_directly = [&nullable_mark_array](auto& batch, auto& column, auto& off, auto& len) { - for (uint16_t j = 0; j < len; ++j) { + auto insert_data_directly = [&nullable_mark_array](auto& batch, auto& column, auto& start, + auto& length) { + for (uint32_t j = 0; j < length; ++j) { if (!nullable_mark_array[j]) { - uint16_t row_idx = j + off; - column->insert_data( - reinterpret_cast(batch->cell_ptr(row_idx)), 0); + uint32_t row_idx = j + start; + column->insert_data(reinterpret_cast(batch->cell_ptr(row_idx)), 0); } else { column->insert_default(); } @@ -388,10 +395,10 @@ Status RowBlockV2::_append_data_to_column(const ColumnVectorBatch* batch, uint16 switch (batch->type_info()->type()) { case OLAP_FIELD_TYPE_OBJECT: { auto column_bitmap = assert_cast(column); - for (uint16_t j = 0; j < selected_size; ++j) { + for (uint32_t j = 0; j < selected_size; ++j) { column_bitmap->insert_default(); if (!nullable_mark_array[j]) { - uint16_t row_idx = j + off; + uint32_t row_idx = j + start; auto slice = reinterpret_cast(batch->cell_ptr(row_idx)); BitmapValue* pvalue = &column_bitmap->get_element(column_bitmap->size() - 1); @@ -409,10 +416,10 @@ Status RowBlockV2::_append_data_to_column(const ColumnVectorBatch* batch, uint16 } case OLAP_FIELD_TYPE_HLL: { auto column_hll = assert_cast(column); - for (uint16_t j = 0; j < selected_size; ++j) { + for (uint32_t j = 0; j < selected_size; ++j) { column_hll->insert_default(); if (!nullable_mark_array[j]) { - uint16_t row_idx = j + off; + uint32_t row_idx = j + start; auto slice = reinterpret_cast(batch->cell_ptr(row_idx)); HyperLogLog* pvalue = &column_hll->get_element(column_hll->size() - 1); @@ -432,9 +439,9 @@ Status RowBlockV2::_append_data_to_column(const ColumnVectorBatch* batch, uint16 case OLAP_FIELD_TYPE_VARCHAR: { auto column_string = assert_cast(column); - for (uint16_t j = 0; j < selected_size; ++j) { + for (uint32_t j = 0; j < selected_size; ++j) { if (!nullable_mark_array[j]) { - uint16_t row_idx = j + off; + uint32_t row_idx = j + start; auto slice = reinterpret_cast(batch->cell_ptr(row_idx)); column_string->insert_data(slice->data, slice->size); } else { @@ -446,14 +453,15 @@ Status RowBlockV2::_append_data_to_column(const ColumnVectorBatch* batch, uint16 case OLAP_FIELD_TYPE_STRING: { auto column_string = assert_cast(column); - for (uint16_t j = 0; j < selected_size; ++j) { + for (uint32_t j = 0; j < selected_size; ++j) { if (!nullable_mark_array[j]) { - uint16_t row_idx = j + off; + uint32_t row_idx = j + start; auto slice = reinterpret_cast(batch->cell_ptr(row_idx)); if (LIKELY(slice->size <= MAX_SIZE_OF_VEC_STRING)) { column_string->insert_data(slice->data, slice->size); } else { - return Status::NotSupported("Not support string len over than 1MB in vec engine."); + return Status::NotSupported( + "Not support string len over than 1MB in vec engine."); } } else { column_string->insert_default(); @@ -464,9 +472,9 @@ Status RowBlockV2::_append_data_to_column(const ColumnVectorBatch* batch, uint16 case OLAP_FIELD_TYPE_CHAR: { auto column_string = assert_cast(column); - for (uint16_t j = 0; j < selected_size; ++j) { + for (uint32_t j = 0; j < selected_size; ++j) { if (!nullable_mark_array[j]) { - uint16_t row_idx = j + off; + uint32_t row_idx = j + start; auto slice = reinterpret_cast(batch->cell_ptr(row_idx)); column_string->insert_data(slice->data, strnlen(slice->data, slice->size)); } else { @@ -478,9 +486,9 @@ Status RowBlockV2::_append_data_to_column(const ColumnVectorBatch* batch, uint16 case OLAP_FIELD_TYPE_DATE: { auto column_int = assert_cast*>(column); - for (uint16_t j = 0; j < selected_size; ++j) { + for (uint32_t j = 0; j < selected_size; ++j) { if (!nullable_mark_array[j]) { - uint16_t row_idx = j + off; + uint32_t row_idx = j + start; auto ptr = reinterpret_cast(batch->cell_ptr(row_idx)); uint64_t value = 0; @@ -500,9 +508,9 @@ Status RowBlockV2::_append_data_to_column(const ColumnVectorBatch* batch, uint16 case OLAP_FIELD_TYPE_DATETIME: { auto column_int = assert_cast*>(column); - for (uint16_t j = 0; j < selected_size; ++j) { + for (uint32_t j = 0; j < selected_size; ++j) { if (!nullable_mark_array[j]) { - uint16_t row_idx = j + off; + uint32_t row_idx = j + start; auto ptr = reinterpret_cast(batch->cell_ptr(row_idx)); uint64_t value = *reinterpret_cast(ptr); @@ -518,9 +526,9 @@ Status RowBlockV2::_append_data_to_column(const ColumnVectorBatch* batch, uint16 auto column_decimal = assert_cast*>(column); - for (uint16_t j = 0; j < selected_size; ++j) { + for (uint32_t j = 0; j < selected_size; ++j) { if (!nullable_mark_array[j]) { - uint16_t row_idx = j + off; + uint32_t row_idx = j + start; auto ptr = reinterpret_cast(batch->cell_ptr(row_idx)); int64_t int_value = *(int64_t*)(ptr); @@ -533,44 +541,65 @@ Status RowBlockV2::_append_data_to_column(const ColumnVectorBatch* batch, uint16 } break; } + case OLAP_FIELD_TYPE_ARRAY: { + auto array_batch = reinterpret_cast(batch); + auto column_array = assert_cast(column); + auto nested_col = (*column_array->get_data_ptr()).assume_mutable(); + + auto& offsets_col = column_array->get_offsets(); + uint32_t offset = offsets_col.back(); + for (uint32_t j = 0; j < selected_size; ++j) { + if (!nullable_mark_array[j]) { + uint32_t row_idx = j + start; + auto cv = reinterpret_cast(batch->cell_ptr(row_idx)); + offset += cv->length(); + _append_data_to_column(array_batch->elements(), array_batch->item_offset(row_idx), + cv->length(), nested_col); + offsets_col.push_back(offset); + } else { + column_array->insert_default(); + } + } + break; + } case OLAP_FIELD_TYPE_INT: { auto column_int = assert_cast*>(column); - insert_data_directly(batch, column_int, off, len); + insert_data_directly(batch, column_int, start, len); break; } case OLAP_FIELD_TYPE_BOOL: { auto column_int = assert_cast*>(column); - insert_data_directly(batch, column_int, off, len); + insert_data_directly(batch, column_int, start, len); break; } case OLAP_FIELD_TYPE_TINYINT: { auto column_int = assert_cast*>(column); - insert_data_directly(batch, column_int, off, len); + insert_data_directly(batch, column_int, start, len); break; } case OLAP_FIELD_TYPE_SMALLINT: { auto column_int = assert_cast*>(column); - insert_data_directly(batch, column_int, off, len); + insert_data_directly(batch, column_int, start, len); break; } case OLAP_FIELD_TYPE_BIGINT: { auto column_int = assert_cast*>(column); - insert_data_directly(batch, column_int, off, len); + insert_data_directly(batch, column_int, start, len); break; } case OLAP_FIELD_TYPE_LARGEINT: { auto column_int = assert_cast*>(column); - insert_data_directly(batch, column_int, off, len); + insert_data_directly(batch, column_int, start, len); break; } case OLAP_FIELD_TYPE_FLOAT: { auto column_float = assert_cast*>(column); - insert_data_directly(batch, column_float, off, len); + insert_data_directly(batch, column_float, start, len); break; } case OLAP_FIELD_TYPE_DOUBLE: { auto column_float = assert_cast*>(column); - insert_data_directly(batch, column_float, off, len); + insert_data_directly(batch, column_float, start, len); break; } default: { diff --git a/be/src/olap/row_block2.h b/be/src/olap/row_block2.h index f2e9e5baef..fda9a49d6f 100644 --- a/be/src/olap/row_block2.h +++ b/be/src/olap/row_block2.h @@ -110,7 +110,8 @@ public: private: Status _copy_data_to_column(int cid, vectorized::MutableColumnPtr& mutable_column_ptr); - Status _append_data_to_column(const ColumnVectorBatch* batch, uint16_t off, uint16_t len, vectorized::MutableColumnPtr& mutable_column_ptr); + Status _append_data_to_column(const ColumnVectorBatch* batch, size_t start, uint32_t len, + vectorized::MutableColumnPtr& mutable_column_ptr); const Schema& _schema; size_t _capacity; diff --git a/be/src/vec/CMakeLists.txt b/be/src/vec/CMakeLists.txt index a4598e5c15..20705a60fc 100644 --- a/be/src/vec/CMakeLists.txt +++ b/be/src/vec/CMakeLists.txt @@ -102,6 +102,7 @@ set(VEC_FILES exprs/vexpr.cpp exprs/vexpr_context.cpp exprs/vliteral.cpp + exprs/varray_literal.cpp exprs/vin_predicate.cpp exprs/vtuple_is_null_predicate.cpp exprs/vslot_ref.cpp diff --git a/be/src/vec/columns/column_array.cpp b/be/src/vec/columns/column_array.cpp index be611afb25..4754ca3b70 100644 --- a/be/src/vec/columns/column_array.cpp +++ b/be/src/vec/columns/column_array.cpp @@ -311,7 +311,8 @@ void ColumnArray::insert_range_from(const IColumn & src, size_t start, size_t le cur_offsets.assign(src_offsets.begin(), src_offsets.begin() + length); } else { size_t old_size = cur_offsets.size(); - size_t prev_max_offset = old_size ? cur_offsets.back() : 0; + // -1 is ok, because PaddedPODArray pads zeros on the left. + size_t prev_max_offset = cur_offsets.back(); cur_offsets.resize(old_size + length); for (size_t i = 0; i < length; ++i) diff --git a/be/src/vec/columns/column_array.h b/be/src/vec/columns/column_array.h index f3b8b558f9..e6567d1c97 100644 --- a/be/src/vec/columns/column_array.h +++ b/be/src/vec/columns/column_array.h @@ -144,6 +144,10 @@ public: void replace_column_data_default(size_t self_row = 0) override { LOG(FATAL) << "replace_column_data_default not implemented"; } + void clear() override { + data->clear(); + offsets->clear(); + } private: WrappedPtr data; diff --git a/be/src/vec/core/block.cpp b/be/src/vec/core/block.cpp index 831e9f74f0..0cec8abd9e 100644 --- a/be/src/vec/core/block.cpp +++ b/be/src/vec/core/block.cpp @@ -32,6 +32,7 @@ #include "runtime/row_batch.h" #include "runtime/tuple.h" #include "runtime/tuple_row.h" +#include "udf/udf.h" #include "vec/columns/column_const.h" #include "vec/columns/column_nullable.h" #include "vec/columns/column_vector.h" @@ -701,63 +702,119 @@ doris::Tuple* Block::deep_copy_tuple(const doris::TupleDescriptor& desc, MemPool for (int i = 0; i < desc.slots().size(); ++i) { auto slot_desc = desc.slots()[i]; - auto data_ref = get_by_position(column_offset + i).column->get_data_at(row); - - if (data_ref.data == nullptr) { + auto& type_desc = slot_desc->type(); + const auto& column = get_by_position(column_offset + i).column; + const auto& data_ref = + type_desc.type != TYPE_ARRAY ? column->get_data_at(row) : StringRef(); + bool is_null = is_column_data_null(slot_desc->type(), data_ref, column, row); + if (is_null) { dst->set_null(slot_desc->null_indicator_offset()); - continue; } else { dst->set_not_null(slot_desc->null_indicator_offset()); } - - if (!slot_desc->type().is_string_type() && !slot_desc->type().is_date_type()) { - memcpy((void*)dst->get_slot(slot_desc->tuple_offset()), data_ref.data, data_ref.size); - } else if (slot_desc->type().is_string_type() && slot_desc->type() != TYPE_OBJECT && - slot_desc->type() != TYPE_HLL) { - memcpy((void*)dst->get_slot(slot_desc->tuple_offset()), (const void*)(&data_ref), - sizeof(data_ref)); - // Copy the content of string - if (padding_char && slot_desc->type() == TYPE_CHAR) { - // serialize the content of string - auto string_slot = dst->get_string_slot(slot_desc->tuple_offset()); - string_slot->ptr = reinterpret_cast(pool->allocate(slot_desc->type().len)); - string_slot->len = slot_desc->type().len; - memset(string_slot->ptr, 0, slot_desc->type().len); - memcpy(string_slot->ptr, data_ref.data, data_ref.size); - } else { - auto str_ptr = pool->allocate(data_ref.size); - memcpy(str_ptr, data_ref.data, data_ref.size); - dst->get_string_slot(slot_desc->tuple_offset())->ptr = - reinterpret_cast(str_ptr); - } - } else if (slot_desc->type() == TYPE_OBJECT) { - auto bitmap_value = (BitmapValue*)(data_ref.data); - auto size = bitmap_value->getSizeInBytes(); - - // serialize the content of string - auto string_slot = dst->get_string_slot(slot_desc->tuple_offset()); - string_slot->ptr = reinterpret_cast(pool->allocate(size)); - bitmap_value->write(string_slot->ptr); - string_slot->len = size; - } else if (slot_desc->type() == TYPE_HLL) { - auto hll_value = (HyperLogLog*)(data_ref.data); - auto size = hll_value->max_serialized_size(); - auto string_slot = dst->get_string_slot(slot_desc->tuple_offset()); - string_slot->ptr = reinterpret_cast(pool->allocate(size)); - size_t actual_size = hll_value->serialize((uint8_t*)string_slot->ptr); - string_slot->len = actual_size; - } else { - VecDateTimeValue ts = - *reinterpret_cast(data_ref.data); - DateTimeValue dt; - ts.convert_vec_dt_to_dt(&dt); - memcpy((void*)dst->get_slot(slot_desc->tuple_offset()), &dt, sizeof(DateTimeValue)); - } + deep_copy_slot(dst->get_slot(slot_desc->tuple_offset()), pool, type_desc, data_ref, + column.get(), row, padding_char); } return dst; } -MutableBlock::MutableBlock(const std::vector& tuple_descs) { +inline bool Block::is_column_data_null(const doris::TypeDescriptor& type_desc, + const StringRef& data_ref, const IColumn* column, int row) { + if (type_desc.type != TYPE_ARRAY) { + return data_ref.data == nullptr; + } else { + Field array; + column->get(row, array); + return array.is_null(); + } +} + +// TODO: need to refactor this function, too long. +void Block::deep_copy_slot(void* dst, MemPool* pool, const doris::TypeDescriptor& type_desc, + const StringRef& data_ref, const IColumn* column, int row, + bool padding_char) { + if (type_desc.is_collection_type()) { + if (type_desc.type != TYPE_ARRAY) { + return; + } + + Field field; + column->get(row, field); + const auto& array = field.get(); + auto collection_value = reinterpret_cast(dst); + auto item_type_desc = type_desc.children.front(); + CollectionValue::init_collection(pool, array.size(), item_type_desc.type, collection_value); + + const ColumnArray* array_column = nullptr; + if (is_column_nullable(*column)) { + auto& nested_column = + reinterpret_cast(column)->get_nested_column(); + array_column = reinterpret_cast(&nested_column); + } else { + array_column = reinterpret_cast(column); + } + auto item_column = array_column->get_data_ptr().get(); + auto offset = array_column->get_offsets()[row - 1]; + for (int i = 0; i < collection_value->length(); ++i) { + char* item_dst = reinterpret_cast(collection_value->mutable_data()) + + i * item_type_desc.get_slot_size(); + if (array[i].is_null()) { + const auto& null_value = doris_udf::AnyVal(true); + collection_value->set(i, item_type_desc.type, &null_value); + } else { + auto item_offset = offset + i; + const auto& data_ref = item_type_desc.type != TYPE_ARRAY + ? item_column->get_data_at(item_offset) + : StringRef(); + deep_copy_slot(item_dst, pool, item_type_desc, data_ref, item_column, item_offset, + padding_char); + } + } + } else if (type_desc.is_date_type()) { + VecDateTimeValue ts = + *reinterpret_cast(data_ref.data); + DateTimeValue dt; + ts.convert_vec_dt_to_dt(&dt); + memcpy(dst, &dt, sizeof(DateTimeValue)); + } else if (type_desc.type == TYPE_OBJECT) { + auto bitmap_value = (BitmapValue*)(data_ref.data); + auto size = bitmap_value->getSizeInBytes(); + + // serialize the content of string + auto string_slot = reinterpret_cast(dst); + string_slot->ptr = reinterpret_cast(pool->allocate(size)); + bitmap_value->write(string_slot->ptr); + string_slot->len = size; + } else if (type_desc.type == TYPE_HLL) { + auto hll_value = (HyperLogLog*)(data_ref.data); + auto size = hll_value->max_serialized_size(); + auto string_slot = reinterpret_cast(dst); + string_slot->ptr = reinterpret_cast(pool->allocate(size)); + size_t actual_size = hll_value->serialize((uint8_t*)string_slot->ptr); + string_slot->len = actual_size; + } else if (type_desc.is_string_type()) { // TYPE_OBJECT and TYPE_HLL must be handled before. + memcpy(dst, (const void*)(&data_ref), sizeof(data_ref)); + // Copy the content of string + if (padding_char && type_desc.type == TYPE_CHAR) { + // serialize the content of string + auto string_slot = reinterpret_cast(dst); + string_slot->ptr = reinterpret_cast(pool->allocate(type_desc.len)); + string_slot->len = type_desc.len; + memset(string_slot->ptr, 0, type_desc.len); + memcpy(string_slot->ptr, data_ref.data, data_ref.size); + } else { + auto str_ptr = pool->allocate(data_ref.size); + memcpy(str_ptr, data_ref.data, data_ref.size); + auto string_slot = reinterpret_cast(dst); + string_slot->ptr = reinterpret_cast(str_ptr); + string_slot->len = data_ref.size; + } + } else { + memcpy(dst, data_ref.data, data_ref.size); + } +} + +MutableBlock::MutableBlock(const std::vector& tuple_descs) { for (auto tuple_desc : tuple_descs) { for (auto slot_desc : tuple_desc->slots()) { _data_types.emplace_back(slot_desc->get_data_type_ptr()); diff --git a/be/src/vec/core/block.h b/be/src/vec/core/block.h index c660bdb13a..12145f6772 100644 --- a/be/src/vec/core/block.h +++ b/be/src/vec/core/block.h @@ -20,12 +20,13 @@ #pragma once +#include + #include #include #include #include #include -#include #include "gen_cpp/data.pb.h" #include "vec/columns/column_nullable.h" @@ -36,12 +37,14 @@ #include "vec/data_types/data_type_nullable.h" namespace doris { -class Status; + +class MemPool; class RowBatch; class RowDescriptor; +class Status; class Tuple; class TupleDescriptor; -class MemPool; +struct TypeDescriptor; namespace vectorized { @@ -97,17 +100,23 @@ public: ColumnWithTypeAndName& get_by_position(size_t position) { return data[position]; } const ColumnWithTypeAndName& get_by_position(size_t position) const { return data[position]; } - Status copy_column_data_to_block(bool is_block_mem_reuse, doris::vectorized::IColumn* input_col_ptr, - uint16_t* sel_rowid_idx, uint16_t select_size, int block_cid, size_t batch_size) { + Status copy_column_data_to_block(bool is_block_mem_reuse, + doris::vectorized::IColumn* input_col_ptr, + uint16_t* sel_rowid_idx, uint16_t select_size, int block_cid, + size_t batch_size) { if (is_block_mem_reuse) { auto* raw_res_ptr = this->get_by_position(block_cid).column.get(); const_cast(raw_res_ptr)->reserve(batch_size); - return input_col_ptr->filter_by_selector(sel_rowid_idx, select_size, const_cast(raw_res_ptr)); + return input_col_ptr->filter_by_selector( + sel_rowid_idx, select_size, + const_cast(raw_res_ptr)); } else { MutableColumnPtr res_col_ptr = data[block_cid].type->create_column(); res_col_ptr->reserve(batch_size); auto* raw_res_ptr = res_col_ptr.get(); - RETURN_IF_ERROR(input_col_ptr->filter_by_selector(sel_rowid_idx, select_size, const_cast(raw_res_ptr))); + RETURN_IF_ERROR(input_col_ptr->filter_by_selector( + sel_rowid_idx, select_size, + const_cast(raw_res_ptr))); this->replace_by_position(block_cid, std::move(res_col_ptr)); return Status::OK(); } @@ -148,9 +157,9 @@ public: Names get_names() const; DataTypes get_data_types() const; - DataTypePtr get_data_type(size_t index) const { + DataTypePtr get_data_type(size_t index) const { CHECK(index < data.size()); - return data[index].type; + return data[index].type; } /// Returns number of rows from first column in block, not equal to nullptr. If no columns, returns 0. @@ -160,7 +169,7 @@ public: void set_num_rows(size_t length); // Skip the rows in block, use in OFFSET, LIMIT operation - void skip_num_rows(int64_t & offset); + void skip_num_rows(int64_t& offset); size_t columns() const { return data.size(); } @@ -233,7 +242,8 @@ public: } // serialize block to PBlock - Status serialize(PBlock* pblock, size_t* uncompressed_bytes, size_t* compressed_bytes, std::string* allocated_buf) const; + Status serialize(PBlock* pblock, size_t* uncompressed_bytes, size_t* compressed_bytes, + std::string* allocated_buf) const; // serialize block to PRowbatch void serialize(RowBatch*, const RowDescriptor&); @@ -276,17 +286,25 @@ public: //note(wb) no DCHECK here, because this method is only used after compare_at now, so no need to repeat check here. // If this method is used in more places, you can add DCHECK case by case. - int compare_column_at(size_t n, size_t m, size_t col_idx, const Block& rhs, int nan_direction_hint) const { - auto res = get_by_position(col_idx).column->compare_at(n, m, *(rhs.get_by_position(col_idx).column), - nan_direction_hint); + int compare_column_at(size_t n, size_t m, size_t col_idx, const Block& rhs, + int nan_direction_hint) const { + auto res = get_by_position(col_idx).column->compare_at( + n, m, *(rhs.get_by_position(col_idx).column), nan_direction_hint); return res; } - doris::Tuple* deep_copy_tuple(const TupleDescriptor&, MemPool*, int, int, bool padding_char = false); + doris::Tuple* deep_copy_tuple(const TupleDescriptor&, MemPool*, int, int, + bool padding_char = false); private: void erase_impl(size_t position); void initialize_index_by_name(); + inline bool is_column_data_null(const doris::TypeDescriptor& type_desc, + const StringRef& data_ref, + const IColumn* column_with_type_and_name, int row); + void deep_copy_slot(void* dst, MemPool* pool, const doris::TypeDescriptor& type_desc, + const StringRef& data_ref, const IColumn* column, int row, + bool padding_char); }; using Blocks = std::vector; diff --git a/be/src/vec/data_types/data_type_factory.cpp b/be/src/vec/data_types/data_type_factory.cpp index 44e9e7827f..5c6e39fe74 100644 --- a/be/src/vec/data_types/data_type_factory.cpp +++ b/be/src/vec/data_types/data_type_factory.cpp @@ -32,7 +32,7 @@ DataTypePtr DataTypeFactory::create_data_type(const doris::Field& col_desc) { } if (col_desc.is_nullable() && nested) { - return std::make_shared(std::move(nested)); + return std::make_shared(nested); } return nested; } @@ -47,7 +47,7 @@ DataTypePtr DataTypeFactory::create_data_type(const TabletColumn& col_desc) { } if (col_desc.is_nullable() && nested) { - return std::make_shared(std::move(nested)); + return std::make_shared(nested); } return nested; } @@ -104,7 +104,8 @@ DataTypePtr DataTypeFactory::create_data_type(const TypeDescriptor& col_desc, bo break; case TYPE_ARRAY: DCHECK(col_desc.children.size() == 1); - nested = std::make_shared(create_data_type(col_desc.children[0], false)); + nested = + std::make_shared(create_data_type(col_desc.children[0])); break; case INVALID_TYPE: default: @@ -113,7 +114,7 @@ DataTypePtr DataTypeFactory::create_data_type(const TypeDescriptor& col_desc, bo } if (nested && is_nullable) { - return std::make_shared(std::move(nested)); + return std::make_shared(nested); } return nested; } @@ -121,54 +122,54 @@ DataTypePtr DataTypeFactory::create_data_type(const TypeDescriptor& col_desc, bo DataTypePtr DataTypeFactory::_create_primitive_data_type(const FieldType& type) const { DataTypePtr result = nullptr; switch (type) { - case OLAP_FIELD_TYPE_BOOL: - result = std::make_shared(); - break; - case OLAP_FIELD_TYPE_TINYINT: - result = std::make_shared(); - break; - case OLAP_FIELD_TYPE_SMALLINT: - result = std::make_shared(); - break; - case OLAP_FIELD_TYPE_INT: - result = std::make_shared(); - break; - case OLAP_FIELD_TYPE_FLOAT: - result = std::make_shared(); - break; - case OLAP_FIELD_TYPE_BIGINT: - result = std::make_shared(); - break; - case OLAP_FIELD_TYPE_LARGEINT: - result = std::make_shared(); - break; - case OLAP_FIELD_TYPE_DATE: - result = std::make_shared(); - break; - case OLAP_FIELD_TYPE_DATETIME: - result = std::make_shared(); - break; - case OLAP_FIELD_TYPE_DOUBLE: - result = std::make_shared(); - break; - case OLAP_FIELD_TYPE_CHAR: - case OLAP_FIELD_TYPE_VARCHAR: - case OLAP_FIELD_TYPE_HLL: - case OLAP_FIELD_TYPE_STRING: - result = std::make_shared(); - break; - case OLAP_FIELD_TYPE_OBJECT: - result = std::make_shared(); - break; - case OLAP_FIELD_TYPE_DECIMAL: - result = std::make_shared>(27, 9); - break; - default: - DCHECK(false) << "Invalid FieldType:" << (int)type; - result = nullptr; - break; + case OLAP_FIELD_TYPE_BOOL: + result = std::make_shared(); + break; + case OLAP_FIELD_TYPE_TINYINT: + result = std::make_shared(); + break; + case OLAP_FIELD_TYPE_SMALLINT: + result = std::make_shared(); + break; + case OLAP_FIELD_TYPE_INT: + result = std::make_shared(); + break; + case OLAP_FIELD_TYPE_FLOAT: + result = std::make_shared(); + break; + case OLAP_FIELD_TYPE_BIGINT: + result = std::make_shared(); + break; + case OLAP_FIELD_TYPE_LARGEINT: + result = std::make_shared(); + break; + case OLAP_FIELD_TYPE_DATE: + result = std::make_shared(); + break; + case OLAP_FIELD_TYPE_DATETIME: + result = std::make_shared(); + break; + case OLAP_FIELD_TYPE_DOUBLE: + result = std::make_shared(); + break; + case OLAP_FIELD_TYPE_CHAR: + case OLAP_FIELD_TYPE_VARCHAR: + case OLAP_FIELD_TYPE_HLL: + case OLAP_FIELD_TYPE_STRING: + result = std::make_shared(); + break; + case OLAP_FIELD_TYPE_OBJECT: + result = std::make_shared(); + break; + case OLAP_FIELD_TYPE_DECIMAL: + result = std::make_shared>(27, 9); + break; + default: + DCHECK(false) << "Invalid FieldType:" << (int)type; + result = nullptr; + break; } - return result; + return result; } DataTypePtr DataTypeFactory::create_data_type(const PColumnMeta& pcolumn) { @@ -221,22 +222,22 @@ DataTypePtr DataTypeFactory::create_data_type(const PColumnMeta& pcolumn) { break; case PGenericType::DECIMAL32: nested = std::make_shared>(pcolumn.decimal_param().precision(), - pcolumn.decimal_param().scale()); + pcolumn.decimal_param().scale()); break; case PGenericType::DECIMAL64: nested = std::make_shared>(pcolumn.decimal_param().precision(), - pcolumn.decimal_param().scale()); + pcolumn.decimal_param().scale()); break; case PGenericType::DECIMAL128: nested = std::make_shared>(pcolumn.decimal_param().precision(), - pcolumn.decimal_param().scale()); + pcolumn.decimal_param().scale()); break; case PGenericType::BITMAP: nested = std::make_shared(); break; case PGenericType::LIST: DCHECK(pcolumn.children_size() == 1); - nested = std::make_shared(std::move(create_data_type(pcolumn.children(0)))); + nested = std::make_shared(create_data_type(pcolumn.children(0))); break; default: { LOG(FATAL) << fmt::format("Unknown data type: {}", pcolumn.type()); @@ -245,10 +246,9 @@ DataTypePtr DataTypeFactory::create_data_type(const PColumnMeta& pcolumn) { } if (nested && pcolumn.is_nullable() > 0) { - return std::make_shared(std::move(nested)); + return std::make_shared(nested); } return nested; } - } // namespace doris::vectorized diff --git a/be/src/vec/data_types/data_type_factory.hpp b/be/src/vec/data_types/data_type_factory.hpp index 7834b12ba4..c78a4ef344 100644 --- a/be/src/vec/data_types/data_type_factory.hpp +++ b/be/src/vec/data_types/data_type_factory.hpp @@ -26,7 +26,6 @@ #include "olap/field.h" #include "olap/tablet_schema.h" #include "runtime/types.h" - #include "vec/data_types/data_type.h" #include "vec/data_types/data_type_array.h" #include "vec/data_types/data_type_bitmap.h" @@ -34,45 +33,45 @@ #include "vec/data_types/data_type_date_time.h" #include "vec/data_types/data_type_decimal.h" #include "vec/data_types/data_type_nothing.h" -#include "vec/data_types/data_type_number.h" #include "vec/data_types/data_type_nullable.h" +#include "vec/data_types/data_type_number.h" #include "vec/data_types/data_type_string.h" namespace doris::vectorized { class DataTypeFactory { -using DataTypeMap = std::unordered_map; -using InvertedDataTypeMap = std::vector>; + using DataTypeMap = std::unordered_map; + using InvertedDataTypeMap = std::vector>; public: static DataTypeFactory& instance() { static std::once_flag oc; static DataTypeFactory instance; - std::call_once(oc, [&]() { - instance.regist_data_type("UInt8", DataTypePtr(std::make_shared())); - instance.regist_data_type("UInt16", DataTypePtr(std::make_shared())); - instance.regist_data_type("UInt32", DataTypePtr(std::make_shared())); - instance.regist_data_type("UInt64", DataTypePtr(std::make_shared())); - instance.regist_data_type("Int8", DataTypePtr(std::make_shared())); - instance.regist_data_type("Int16", DataTypePtr(std::make_shared())); - instance.regist_data_type("Int32", DataTypePtr(std::make_shared())); - instance.regist_data_type("Int64", DataTypePtr(std::make_shared())); - instance.regist_data_type("Int128", DataTypePtr(std::make_shared())); - instance.regist_data_type("Float32", DataTypePtr(std::make_shared())); - instance.regist_data_type("Float64", DataTypePtr(std::make_shared())); - instance.regist_data_type("Date", DataTypePtr(std::make_shared())); - instance.regist_data_type("DateTime", - DataTypePtr(std::make_shared())); - instance.regist_data_type("String", DataTypePtr(std::make_shared())); - instance.regist_data_type("Decimal", - DataTypePtr(std::make_shared>(27, 9))); + std::call_once(oc, []() { + instance.register_data_type("UInt8", std::make_shared()); + instance.register_data_type("UInt16", std::make_shared()); + instance.register_data_type("UInt32", std::make_shared()); + instance.register_data_type("UInt64", std::make_shared()); + instance.register_data_type("Int8", std::make_shared()); + instance.register_data_type("Int16", std::make_shared()); + instance.register_data_type("Int32", std::make_shared()); + instance.register_data_type("Int64", std::make_shared()); + instance.register_data_type("Int128", std::make_shared()); + instance.register_data_type("Float32", std::make_shared()); + instance.register_data_type("Float64", std::make_shared()); + instance.register_data_type("Date", std::make_shared()); + instance.register_data_type("DateTime", std::make_shared()); + instance.register_data_type("String", std::make_shared()); + instance.register_data_type("Decimal", + std::make_shared>(27, 9)); }); return instance; } DataTypePtr get(const std::string& name) { return _data_type_map[name]; } const std::string& get(const DataTypePtr& data_type) const { - auto type_ptr = data_type->is_nullable() ? - ((DataTypeNullable*)(data_type.get()))->get_nested_type() : data_type; + auto type_ptr = data_type->is_nullable() + ? ((DataTypeNullable*)(data_type.get()))->get_nested_type() + : data_type; for (const auto& entity : _invert_data_type_map) { if (entity.first->equals(*type_ptr)) { return entity.second; @@ -91,7 +90,7 @@ public: private: DataTypePtr _create_primitive_data_type(const FieldType& type) const; - void regist_data_type(const std::string& name, const DataTypePtr& data_type) { + void register_data_type(const std::string& name, const DataTypePtr& data_type) { _data_type_map.emplace(name, data_type); _invert_data_type_map.emplace_back(data_type, name); } diff --git a/be/src/vec/exprs/varray_literal.cpp b/be/src/vec/exprs/varray_literal.cpp new file mode 100644 index 0000000000..d289c17988 --- /dev/null +++ b/be/src/vec/exprs/varray_literal.cpp @@ -0,0 +1,38 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "vec/exprs/varray_literal.h" + +namespace doris::vectorized { + +Status VArrayLiteral::prepare(RuntimeState* state, const RowDescriptor& row_desc, + VExprContext* context) { + DCHECK_EQ(type().children.size(), 1) << "array children type not 1"; + + RETURN_IF_ERROR(VExpr::prepare(state, row_desc, context)); + bool is_null = (_node_type == TExprNodeType::NULL_LITERAL); + Field array = is_null ? Field() : Array(); + for (const auto child : _children) { + Field item; + child->get_const_col(context)->column_ptr->get(0, item); + array.get().push_back(item); + } + _column_ptr = _data_type->create_column_const(1, array); + return Status::OK(); +} + +} // namespace doris::vectorized diff --git a/be/src/vec/exprs/varray_literal.h b/be/src/vec/exprs/varray_literal.h new file mode 100644 index 0000000000..56b5f8e331 --- /dev/null +++ b/be/src/vec/exprs/varray_literal.h @@ -0,0 +1,34 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include "vec/exprs/vliteral.h" + +namespace doris { + +namespace vectorized { +class VArrayLiteral : public VLiteral { +public: + VArrayLiteral(const TExprNode& node) : VLiteral(node, false) {} + virtual ~VArrayLiteral() = default; + virtual Status prepare(RuntimeState* state, const RowDescriptor& row_desc, + VExprContext* context) override; +}; +} // namespace vectorized + +} // namespace doris diff --git a/be/src/vec/exprs/vexpr.cpp b/be/src/vec/exprs/vexpr.cpp index 6757b745b9..1c71c5b2a4 100644 --- a/be/src/vec/exprs/vexpr.cpp +++ b/be/src/vec/exprs/vexpr.cpp @@ -24,15 +24,16 @@ #include "exprs/anyval_util.h" #include "gen_cpp/Exprs_types.h" #include "vec/data_types/data_type_factory.hpp" +#include "vec/exprs/varray_literal.h" #include "vec/exprs/vcase_expr.h" #include "vec/exprs/vcast_expr.h" #include "vec/exprs/vcompound_pred.h" #include "vec/exprs/vectorized_fn_call.h" #include "vec/exprs/vin_predicate.h" -#include "vec/exprs/vtuple_is_null_predicate.h" +#include "vec/exprs/vinfo_func.h" #include "vec/exprs/vliteral.h" #include "vec/exprs/vslot_ref.h" -#include "vec/exprs/vinfo_func.h" +#include "vec/exprs/vtuple_is_null_predicate.h" namespace doris::vectorized { using doris::Status; @@ -100,6 +101,10 @@ Status VExpr::create_expr(doris::ObjectPool* pool, const doris::TExprNode& texpr *expr = pool->add(new VLiteral(texpr_node)); return Status::OK(); } + case TExprNodeType::ARRAY_LITERAL: { + *expr = pool->add(new VArrayLiteral(texpr_node)); + return Status::OK(); + } case doris::TExprNodeType::SLOT_REF: { *expr = pool->add(new VSlotRef(texpr_node)); break; diff --git a/be/src/vec/exprs/vliteral.cpp b/be/src/vec/exprs/vliteral.cpp index 38ec4e1bf6..c856c769dc 100644 --- a/be/src/vec/exprs/vliteral.cpp +++ b/be/src/vec/exprs/vliteral.cpp @@ -24,107 +24,104 @@ #include "vec/data_types/data_type_nullable.h" #include "vec/runtime/vdatetime_value.h" namespace doris::vectorized { -VLiteral::VLiteral(const TExprNode& node) : VExpr(node) { + +void VLiteral::init(const TExprNode& node) { Field field; if (node.node_type != TExprNodeType::NULL_LITERAL) { switch (_type.type) { - case TYPE_BOOLEAN: { - DCHECK_EQ(node.node_type, TExprNodeType::BOOL_LITERAL); - DCHECK(node.__isset.bool_literal); - field = Int8(node.bool_literal.value); - break; - } - case TYPE_TINYINT: { - DCHECK_EQ(node.node_type, TExprNodeType::INT_LITERAL); - DCHECK(node.__isset.int_literal); - field = Int8(node.int_literal.value); - break; - } - case TYPE_SMALLINT: { - DCHECK_EQ(node.node_type, TExprNodeType::INT_LITERAL); - DCHECK(node.__isset.int_literal); - field = Int16(node.int_literal.value); - break; - } - case TYPE_INT: { - DCHECK_EQ(node.node_type, TExprNodeType::INT_LITERAL); - DCHECK(node.__isset.int_literal); - field = Int32(node.int_literal.value); - break; - } - case TYPE_BIGINT: { - DCHECK_EQ(node.node_type, TExprNodeType::INT_LITERAL); - DCHECK(node.__isset.int_literal); - field = Int64(node.int_literal.value); - break; - } - case TYPE_LARGEINT: { - StringParser::ParseResult parse_result = StringParser::PARSE_SUCCESS; - DCHECK_EQ(node.node_type, TExprNodeType::LARGE_INT_LITERAL); - __int128_t value = StringParser::string_to_int<__int128>( - node.large_int_literal.value.c_str(), node.large_int_literal.value.size(), - &parse_result); - if (parse_result != StringParser::PARSE_SUCCESS) { - value = MAX_INT128; - } - field = Int128(value); - break; - } - case TYPE_FLOAT: { - DCHECK_EQ(node.node_type, TExprNodeType::FLOAT_LITERAL); - DCHECK(node.__isset.float_literal); - field = Float32(node.float_literal.value); - break; - } - case TYPE_TIME: - case TYPE_DOUBLE: { - DCHECK_EQ(node.node_type, TExprNodeType::FLOAT_LITERAL); - DCHECK(node.__isset.float_literal); - field = Float64(node.float_literal.value); - break; - } - case TYPE_DATE: { - VecDateTimeValue value; - value.from_date_str(node.date_literal.value.c_str(), node.date_literal.value.size()); - value.cast_to_date(); - field = Int64(*reinterpret_cast<__int64_t*>(&value)); - break; - } - case TYPE_DATETIME: { - VecDateTimeValue value; - value.from_date_str(node.date_literal.value.c_str(), node.date_literal.value.size()); - value.to_datetime(); - field = Int64(*reinterpret_cast<__int64_t*>(&value)); - break; - } - case TYPE_STRING: - case TYPE_CHAR: - case TYPE_VARCHAR: { - DCHECK_EQ(node.node_type, TExprNodeType::STRING_LITERAL); - DCHECK(node.__isset.string_literal); - field = node.string_literal.value; - break; - } - case TYPE_DECIMALV2: { - DCHECK_EQ(node.node_type, TExprNodeType::DECIMAL_LITERAL); - DCHECK(node.__isset.decimal_literal); - DecimalV2Value value(node.decimal_literal.value); - field = DecimalField(value.value(), value.scale()); - break; - } - default: { - DCHECK(false) << "Invalid type: " << _type.type; - break; + case TYPE_BOOLEAN: { + DCHECK_EQ(node.node_type, TExprNodeType::BOOL_LITERAL); + DCHECK(node.__isset.bool_literal); + field = Int8(node.bool_literal.value); + break; + } + case TYPE_TINYINT: { + DCHECK_EQ(node.node_type, TExprNodeType::INT_LITERAL); + DCHECK(node.__isset.int_literal); + field = Int8(node.int_literal.value); + break; + } + case TYPE_SMALLINT: { + DCHECK_EQ(node.node_type, TExprNodeType::INT_LITERAL); + DCHECK(node.__isset.int_literal); + field = Int16(node.int_literal.value); + break; + } + case TYPE_INT: { + DCHECK_EQ(node.node_type, TExprNodeType::INT_LITERAL); + DCHECK(node.__isset.int_literal); + field = Int32(node.int_literal.value); + break; + } + case TYPE_BIGINT: { + DCHECK_EQ(node.node_type, TExprNodeType::INT_LITERAL); + DCHECK(node.__isset.int_literal); + field = Int64(node.int_literal.value); + break; + } + case TYPE_LARGEINT: { + StringParser::ParseResult parse_result = StringParser::PARSE_SUCCESS; + DCHECK_EQ(node.node_type, TExprNodeType::LARGE_INT_LITERAL); + __int128_t value = StringParser::string_to_int<__int128>( + node.large_int_literal.value.c_str(), node.large_int_literal.value.size(), + &parse_result); + if (parse_result != StringParser::PARSE_SUCCESS) { + value = MAX_INT128; } + field = Int128(value); + break; + } + case TYPE_FLOAT: { + DCHECK_EQ(node.node_type, TExprNodeType::FLOAT_LITERAL); + DCHECK(node.__isset.float_literal); + field = Float32(node.float_literal.value); + break; + } + case TYPE_TIME: + case TYPE_DOUBLE: { + DCHECK_EQ(node.node_type, TExprNodeType::FLOAT_LITERAL); + DCHECK(node.__isset.float_literal); + field = Float64(node.float_literal.value); + break; + } + case TYPE_DATE: { + VecDateTimeValue value; + value.from_date_str(node.date_literal.value.c_str(), node.date_literal.value.size()); + value.cast_to_date(); + field = Int64(*reinterpret_cast<__int64_t*>(&value)); + break; + } + case TYPE_DATETIME: { + VecDateTimeValue value; + value.from_date_str(node.date_literal.value.c_str(), node.date_literal.value.size()); + value.to_datetime(); + field = Int64(*reinterpret_cast<__int64_t*>(&value)); + break; + } + case TYPE_STRING: + case TYPE_CHAR: + case TYPE_VARCHAR: { + DCHECK_EQ(node.node_type, TExprNodeType::STRING_LITERAL); + DCHECK(node.__isset.string_literal); + field = node.string_literal.value; + break; + } + case TYPE_DECIMALV2: { + DCHECK_EQ(node.node_type, TExprNodeType::DECIMAL_LITERAL); + DCHECK(node.__isset.decimal_literal); + DecimalV2Value value(node.decimal_literal.value); + field = DecimalField(value.value(), value.scale()); + break; + } + default: { + DCHECK(false) << "Invalid type: " << _type.type; + break; + } } } - - this->_column_ptr = _data_type->create_column_const(1, field); - _expr_name = _data_type->get_name(); + _column_ptr = _data_type->create_column_const(1, field); } -VLiteral::~VLiteral() {} - Status VLiteral::execute(VExprContext* context, vectorized::Block* block, int* result_column_id) { int rows = block->rows(); if (rows < 1) { diff --git a/be/src/vec/exprs/vliteral.h b/be/src/vec/exprs/vliteral.h index d1d3dcc003..b694ab565f 100644 --- a/be/src/vec/exprs/vliteral.h +++ b/be/src/vec/exprs/vliteral.h @@ -27,8 +27,13 @@ class TExprNode; namespace vectorized { class VLiteral : public VExpr { public: - virtual ~VLiteral(); - VLiteral(const TExprNode& node); + VLiteral(const TExprNode& node, bool should_init = true) + : VExpr(node), _expr_name(_data_type->get_name()) { + if (should_init) { + init(node); + } + }; + virtual ~VLiteral() = default; virtual Status execute(VExprContext* context, vectorized::Block* block, int* result_column_id) override; virtual const std::string& expr_name() const override { return _expr_name; } @@ -36,9 +41,12 @@ public: return pool->add(new VLiteral(*this)); } -private: +protected: ColumnPtr _column_ptr; std::string _expr_name; + +private: + void init(const TExprNode& node); }; } // namespace vectorized diff --git a/be/src/vec/sink/mysql_result_writer.cpp b/be/src/vec/sink/mysql_result_writer.cpp index e4fc56b38d..60fa9a4d24 100644 --- a/be/src/vec/sink/mysql_result_writer.cpp +++ b/be/src/vec/sink/mysql_result_writer.cpp @@ -107,8 +107,8 @@ Status VMysqlResultWriter::_add_one_column(const ColumnPtr& column_ptr, result->result_batch.rows[i].append(_buffer.buf(), _buffer.length()); } } else if constexpr (type == TYPE_ARRAY) { - auto& array_column = assert_cast(*column); - auto& offsets = array_column.get_offsets(); + auto& column_array = assert_cast(*column); + auto& offsets = column_array.get_offsets(); for (int i = 0; i < column_size; ++i) { if (0 != buf_ret) { return Status::InternalError("pack mysql buffer failed."); @@ -130,7 +130,12 @@ Status VMysqlResultWriter::_add_one_column(const ColumnPtr& column_ptr, if (!begin) { buf_ret = _buffer.push_string(", ", 2); } - buf_ret = _add_one_cell(array_column.get_data_ptr(), j, nested_type_ptr, _buffer); + const auto& data = column_array.get_data_ptr(); + if (data->is_null_at(j)) { + buf_ret = _buffer.push_string("NULL", strlen("NULL")); + } else { + buf_ret = _add_one_cell(data, j, nested_type_ptr, _buffer); + } begin = false; } buf_ret = _buffer.push_string("]", 1); @@ -211,7 +216,7 @@ Status VMysqlResultWriter::_add_one_column(const ColumnPtr& column_ptr, } int VMysqlResultWriter::_add_one_cell(const ColumnPtr& column_ptr, size_t row_idx, - const DataTypePtr& type, MysqlRowBuffer& buffer) { + const DataTypePtr& type, MysqlRowBuffer& buffer) { WhichDataType which(type->get_type_id()); if (which.is_nullable() && column_ptr->is_null_at(row_idx)) { return buffer.push_null(); @@ -265,6 +270,38 @@ int VMysqlResultWriter::_add_one_cell(const ColumnPtr& column_ptr, size_t row_id buf_ret = buffer.push_string(string_val.data, string_val.size); } return buf_ret; + } else if (which.is_array()) { + auto& column_array = assert_cast(*column); + auto& offsets = column_array.get_offsets(); + DataTypePtr sub_type; + if (type->is_nullable()) { + auto& nested_type = assert_cast(*type).get_nested_type(); + sub_type = assert_cast(*nested_type).get_nested_type(); + } else { + sub_type = assert_cast(*type).get_nested_type(); + } + + int start = offsets[row_idx - 1]; + int length = offsets[row_idx] - start; + const auto& data = column_array.get_data_ptr(); + + int buf_ret = buffer.push_string("[", strlen("[")); + bool begin = true; + for (int i = 0; i < length; ++i) { + int position = start + i; + if (begin) { + begin = false; + } else { + buf_ret = buffer.push_string(", ", strlen(", ")); + } + if (data->is_null_at(position)) { + buf_ret = buffer.push_string("NULL", strlen("NULL")); + } else { + buf_ret = _add_one_cell(data, position, sub_type, buffer); + } + } + buf_ret = buffer.push_string("]", strlen("]")); + return buf_ret; } else { LOG(WARNING) << "sub TypeIndex(" << (int)which.idx << "not supported yet"; return -1; @@ -408,12 +445,15 @@ Status VMysqlResultWriter::append_block(Block& input_block) { } case TYPE_ARRAY: { if (type_ptr->is_nullable()) { - auto& nested_type = assert_cast(*type_ptr).get_nested_type(); + auto& nested_type = + assert_cast(*type_ptr).get_nested_type(); auto& sub_type = assert_cast(*nested_type).get_nested_type(); - status = _add_one_column(column_ptr, result, sub_type); + status = _add_one_column(column_ptr, result, + sub_type); } else { auto& sub_type = assert_cast(*type_ptr).get_nested_type(); - status = _add_one_column(column_ptr, result, sub_type); + status = _add_one_column(column_ptr, result, + sub_type); } break; } diff --git a/be/test/runtime/array_test.cpp b/be/test/runtime/array_test.cpp index 57a5cdb336..40a326af4c 100644 --- a/be/test/runtime/array_test.cpp +++ b/be/test/runtime/array_test.cpp @@ -31,6 +31,7 @@ #include "olap/field.h" #include "olap/fs/block_manager.h" #include "olap/fs/fs_util.h" +#include "olap/row_block2.h" #include "olap/rowset/segment_v2/column_reader.h" #include "olap/rowset/segment_v2/column_writer.h" #include "olap/tablet_schema.h" @@ -39,9 +40,11 @@ #include "runtime/mem_pool.h" #include "runtime/mem_tracker.h" #include "runtime/raw_value.h" +#include "test_util/array_utils.h" #include "testutil/desc_tbl_builder.h" #include "util/file_utils.h" #include "util/uid_util.h" +#include "vec/core/block.h" namespace doris { @@ -103,86 +106,19 @@ TupleDescriptor* get_tuple_descriptor(ObjectPool& object_pool, const TypeInfo* t return builder.build()->get_tuple_descriptor(0); } -CollectionValue* parse(ObjectPool& object_pool, - const rapidjson::GenericValue>::ConstArray& json_array, - const TypeDescriptor& type_desc) { - if (json_array.Empty()) { - return object_pool.add(new CollectionValue(0)); - } else { - auto array = object_pool.add(new CollectionValue()); - const auto& item_type_desc = type_desc.children[0]; - CollectionValue::init_collection(&object_pool, json_array.Size(), item_type_desc.type, - array); - int index = 0; - switch (item_type_desc.type) { - case TYPE_ARRAY: - for (auto it = json_array.Begin(); it != json_array.End(); ++it) { - auto val = CollectionVal(); - if (it->IsNull()) { - val.is_null = true; - } else { - auto sub_array = parse(object_pool, it->GetArray(), item_type_desc); - sub_array->to_collection_val(&val); - } - array->set(index++, item_type_desc.type, &val); - } - break; - case TYPE_INT: - for (auto it = json_array.Begin(); it != json_array.End(); ++it) { - auto val = it->IsNull() ? IntVal::null() : IntVal(it->GetInt()); - array->set(index++, item_type_desc.type, &val); - } - break; - case TYPE_VARCHAR: - for (auto it = json_array.Begin(); it != json_array.End(); ++it) { - if (it->IsNull()) { - auto val = StringVal::null(); - array->set(index++, item_type_desc.type, &val); - } else { - char* string = object_pool.add_array(new char[it->GetStringLength()]); - memcpy(string, it->GetString(), it->GetStringLength()); - auto val = StringVal(reinterpret_cast(string), it->GetStringLength()); - array->set(index++, item_type_desc.type, &val); - } - } - break; - default: - break; - } - if (!array->has_null()) { - array->set_null_signs(nullptr); - } - return array; - } -} - -CollectionValue* parse(ObjectPool& object_pool, const std::string& text, - const TypeDescriptor& type_desc) { - rapidjson::Document document; - if (document.Parse(text.c_str()).HasParseError() || !document.IsArray()) { +CollectionValue* parse(MemPool& mem_pool, FunctionContext& context, const std::string& text, + const ColumnPB& column_pb) { + auto collection_value = + reinterpret_cast(mem_pool.allocate(sizeof(CollectionValue))); + auto status = ArrayUtils::create_collection_value(collection_value, &context, text); + if (!status.ok()) { return nullptr; } - return parse(object_pool, (const_cast(&document))->GetArray(), - type_desc); -} - -void validate(const Field* field, const CollectionValue* expect, const CollectionValue* actual, - bool check_nullptr) { - EXPECT_TRUE(field->type_info()->equal(expect, actual)); - if (check_nullptr) { - if (expect->length() == 0) { - EXPECT_EQ(nullptr, actual->data()); - EXPECT_EQ(expect->data(), actual->data()); - } - if (!expect->has_null()) { - EXPECT_EQ(nullptr, expect->null_signs()); - EXPECT_EQ(expect->null_signs(), actual->null_signs()); - } - } + return collection_value; } void validate(const Field* field, const CollectionValue* expect, const CollectionValue* actual) { - validate(field, expect, actual, true); + EXPECT_TRUE(field->type_info()->equal(expect, actual)); } class ArrayTest : public ::testing::Test { @@ -254,7 +190,7 @@ private: for (auto array : arrays) { field->type_info()->direct_copy(&cell, array); EXPECT_EQ(cell.null_signs(), reinterpret_cast(variable_ptr.get())); - validate(field, array, &cell, false); + validate(field, array, &cell); } } @@ -263,8 +199,13 @@ private: const std::vector& arrays) { const std::string path = TEST_DIR + "/" + generate_uuid_string(); LOG(INFO) << "Test directory: " << path; + segment_v2::ColumnMetaPB meta; init_column_meta(&meta, column_pb); + + TabletColumn tablet_column; + tablet_column.init_from_pb(column_pb); + Schema schema({tablet_column}, 0); { auto wblock = create_writable_block(path); ASSERT_NE(wblock, nullptr); @@ -295,14 +236,8 @@ private: auto st = iter->seek_to_first(); ASSERT_TRUE(st.ok()) << st.to_string(); - auto tracker = std::make_shared(); - MemPool pool(tracker.get()); - std::unique_ptr cvb; - ColumnVectorBatch::create(0, true, field->type_info(), const_cast(field), &cvb); - ASSERT_NE(cvb, nullptr) << st.to_string(); - cvb->resize(1024); - ColumnBlock col(cvb.get(), &pool); - + RowBlockV2 block(schema, 1024); + auto col = block.column_block(0); int index = 0; size_t rows_read = 1024; do { @@ -311,12 +246,17 @@ private: ASSERT_TRUE(st.ok()); for (int i = 0; i < rows_read; ++i) { validate(field, arrays[index++], - reinterpret_cast(col.cell_ptr(i)), false); + reinterpret_cast(col.cell_ptr(i))); } ASSERT_TRUE(st.ok()); } while (rows_read >= 1024); + + auto tuple_desc = get_tuple_descriptor(_object_pool, get_type_info(column_pb).get()); + block.set_selected_size(rows_read); + test_convert_to_vec_block(block, tuple_desc, field, arrays); } } + template void init_column_meta(segment_v2::ColumnMetaPB* meta, const ColumnPB& column_pb) { int column_id = 0; @@ -405,11 +345,30 @@ private: template void test_array(const ColumnPB& column_pb, const Field* field, const TupleDescriptor* tuple_desc, const CollectionValue* array) { + ASSERT_NE(array, nullptr); test_copy_array(tuple_desc, field, array); test_direct_copy_array(field, {array}); test_write_and_read_column(column_pb, field, {array}); } + void test_convert_to_vec_block(RowBlockV2& row_block, const TupleDescriptor* tuple_desc, + const Field* field, + const std::vector& arrays) { + vectorized::Block block; + for (const auto slot_desc : tuple_desc->slots()) { + block.insert(vectorized::ColumnWithTypeAndName(slot_desc->get_empty_mutable_column(), + slot_desc->get_data_type_ptr(), + slot_desc->col_name())); + } + + row_block.convert_to_vec_block(&block); + for (int i = 0; i < arrays.size(); ++i) { + auto tuple = block.deep_copy_tuple(*tuple_desc, _mem_pool.get(), i, 0, false); + auto actual = tuple->get_collection_slot(tuple_desc->slots().front()->tuple_offset()); + validate(field, arrays[i], actual); + } + } + private: static constexpr size_t MAX_MEMORY_BYTES = 1024 * 1024; static const std::string TEST_DIR; @@ -426,16 +385,17 @@ TEST_F(ArrayTest, TestSimpleIntArrays) { auto field = create_field(column_pb); auto tuple_desc = get_tuple_descriptor(_object_pool, type_info.get()); ASSERT_EQ(tuple_desc->slots().size(), 1); - auto type_desc = tuple_desc->slots().front()->type(); + FunctionContext context; + ArrayUtils::prepare_context(context, *_mem_pool, column_pb); std::vector arrays = { - parse(_object_pool, "[]", type_desc), - parse(_object_pool, "[null]", type_desc), - parse(_object_pool, "[1, 2, 3]", type_desc), - parse(_object_pool, "[1, null, 3]", type_desc), - parse(_object_pool, "[1, null, null]", type_desc), - parse(_object_pool, "[null, null, 3]", type_desc), - parse(_object_pool, "[null, null, null]", type_desc), + parse(*_mem_pool, context, "[]", column_pb), + parse(*_mem_pool, context, "[null]", column_pb), + parse(*_mem_pool, context, "[1, 2, 3]", column_pb), + parse(*_mem_pool, context, "[1, null, 3]", column_pb), + parse(*_mem_pool, context, "[1, null, null]", column_pb), + parse(*_mem_pool, context, "[null, null, 3]", column_pb), + parse(*_mem_pool, context, "[null, null, null]", column_pb), }; for (auto array : arrays) { test_array(column_pb, field.get(), @@ -453,15 +413,16 @@ TEST_F(ArrayTest, TestNestedIntArrays) { auto field = create_field(column_pb); auto tuple_desc = get_tuple_descriptor(_object_pool, type_info.get()); ASSERT_EQ(tuple_desc->slots().size(), 1); - auto type_desc = tuple_desc->slots().front()->type(); + auto context = std::make_unique(); + ArrayUtils::prepare_context(*context, *_mem_pool, column_pb); std::vector arrays = { - parse(_object_pool, "[]", type_desc), - parse(_object_pool, "[[]]", type_desc), - parse(_object_pool, "[[1, 2, 3], [4, 5, 6]]", type_desc), - parse(_object_pool, "[[1, 2, 3], null, [4, 5, 6]]", type_desc), - parse(_object_pool, "[[1, 2, null], null, [4, null, 6], null, [null, 8, 9]]", - type_desc), + parse(*_mem_pool, *context, "[]", column_pb), + parse(*_mem_pool, *context, "[[]]", column_pb), + parse(*_mem_pool, *context, "[[1, 2, 3], [4, 5, 6]]", column_pb), + parse(*_mem_pool, *context, "[[1, 2, 3], null, [4, 5, 6]]", column_pb), + parse(*_mem_pool, *context, "[[1, 2, null], null, [4, null, 6], null, [null, 8, 9]]", + column_pb), }; for (auto array : arrays) { test_array(column_pb, field.get(), @@ -477,15 +438,17 @@ TEST_F(ArrayTest, TestNestedIntArrays) { field = create_field(column_pb); tuple_desc = get_tuple_descriptor(_object_pool, type_info.get()); ASSERT_EQ(tuple_desc->slots().size(), 1); - type_desc = tuple_desc->slots().front()->type(); arrays.clear(); ASSERT_EQ(arrays.size(), 0); + context.reset(new FunctionContext); + ArrayUtils::prepare_context(*context, *_mem_pool, column_pb); arrays = { - parse(_object_pool, "[]", type_desc), - parse(_object_pool, "[[]]", type_desc), - parse(_object_pool, "[[[]]]", type_desc), - parse(_object_pool, "[[[null]], [[1], [2, 3]], [[4, 5, 6], null, null]]", type_desc), + parse(*_mem_pool, *context, "[]", column_pb), + parse(*_mem_pool, *context, "[[]]", column_pb), + parse(*_mem_pool, *context, "[[[]]]", column_pb), + parse(*_mem_pool, *context, "[[[null]], [[1], [2, 3]], [[4, 5, 6], null, null]]", + column_pb), }; for (auto array : arrays) { test_array(column_pb, field.get(), @@ -502,17 +465,18 @@ TEST_F(ArrayTest, TestSimpleStringArrays) { auto field = create_field(column_pb); auto tuple_desc = get_tuple_descriptor(_object_pool, type_info.get()); ASSERT_EQ(tuple_desc->slots().size(), 1); - auto type_desc = tuple_desc->slots().front()->type(); + FunctionContext context; + ArrayUtils::prepare_context(context, *_mem_pool, column_pb); std::vector arrays = { - parse(_object_pool, "[]", type_desc), - parse(_object_pool, "[null]", type_desc), - parse(_object_pool, "[\"a\", \"b\", \"c\"]", type_desc), - parse(_object_pool, "[null, \"b\", \"c\"]", type_desc), - parse(_object_pool, "[\"a\", null, \"c\"]", type_desc), - parse(_object_pool, "[\"a\", \"b\", null]", type_desc), - parse(_object_pool, "[null, \"b\", null]", type_desc), - parse(_object_pool, "[null, null, null]", type_desc), + parse(*_mem_pool, context, "[]", column_pb), + parse(*_mem_pool, context, "[null]", column_pb), + parse(*_mem_pool, context, "[\"a\", \"b\", \"c\"]", column_pb), + parse(*_mem_pool, context, "[null, \"b\", \"c\"]", column_pb), + parse(*_mem_pool, context, "[\"a\", null, \"c\"]", column_pb), + parse(*_mem_pool, context, "[\"a\", \"b\", null]", column_pb), + parse(*_mem_pool, context, "[null, \"b\", null]", column_pb), + parse(*_mem_pool, context, "[null, null, null]", column_pb), }; for (auto array : arrays) { test_array(column_pb, field.get(), @@ -529,15 +493,16 @@ TEST_F(ArrayTest, TestNestedStringArrays) { auto field = create_field(column_pb); auto tuple_desc = get_tuple_descriptor(_object_pool, type_info.get()); ASSERT_EQ(tuple_desc->slots().size(), 1); - auto type_desc = tuple_desc->slots().front()->type(); + FunctionContext context; + ArrayUtils::prepare_context(context, *_mem_pool, column_pb); std::vector arrays = { - parse(_object_pool, "[]", type_desc), - parse(_object_pool, "[[]]", type_desc), - parse(_object_pool, "[[[]]]", type_desc), - parse(_object_pool, "[null, [null], [[null]]]", type_desc), - parse(_object_pool, "[[[\"a\", null, \"c\"], [\"d\", \"e\", \"f\"]], null, [[\"g\"]]]", - type_desc), + parse(*_mem_pool, context, "[]", column_pb), + parse(*_mem_pool, context, "[[]]", column_pb), + parse(*_mem_pool, context, "[[[]]]", column_pb), + parse(*_mem_pool, context, "[null, [null], [[null]]]", column_pb), + parse(*_mem_pool, context, + "[[[\"a\", null, \"c\"], [\"d\", \"e\", \"f\"]], null, [[\"g\"]]]", column_pb), }; for (auto array : arrays) { test_array(column_pb, field.get(), diff --git a/be/test/test_util/CMakeLists.txt b/be/test/test_util/CMakeLists.txt index 2d80269aec..223df85022 100644 --- a/be/test/test_util/CMakeLists.txt +++ b/be/test/test_util/CMakeLists.txt @@ -20,10 +20,12 @@ set(LIBRARY_OUTPUT_PATH "${BUILD_DIR}/test/test_util") set(TEST_UTIL_FILES test_util.cpp + array_utils.cpp ) add_library(Test_util STATIC ${TEST_UTIL_FILES} ) +target_compile_options(Test_util PRIVATE "-fno-access-control") target_link_libraries(Test_util Common Util Gutil ${Boost_LIBRARIES} glog gflags protobuf) diff --git a/be/test/test_util/array_utils.cpp b/be/test/test_util/array_utils.cpp new file mode 100644 index 0000000000..4fb0da5e8b --- /dev/null +++ b/be/test/test_util/array_utils.cpp @@ -0,0 +1,72 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "test_util/array_utils.h" + +#include "common/status.h" +#include "exprs/anyval_util.h" +#include "gen_cpp/olap_file.pb.h" +#include "runtime/collection_value.h" +#include "runtime/free_pool.hpp" +#include "runtime/mem_pool.h" +#include "runtime/mem_tracker.h" +#include "udf/udf_internal.h" +#include "util/array_parser.hpp" + +namespace doris { + +using TypeDesc = FunctionContext::TypeDesc; + +void ArrayUtils::prepare_context(FunctionContext& context, MemPool& mem_pool, + const ColumnPB& column_pb) { + auto function_type_desc = create_function_type_desc(column_pb); + context.impl()->_return_type = function_type_desc; + context.impl()->_pool = new FreePool(&mem_pool); +} + +Status ArrayUtils::create_collection_value(CollectionValue* collection_value, + FunctionContext* context, + const std::string& json_string) { + CollectionVal collection_val; + auto status = ArrayParser::parse(collection_val, context, StringVal(json_string.c_str())); + if (!status.ok()) { + return status; + } + new (collection_value) CollectionValue(collection_val.data, collection_val.length, + collection_val.has_null, collection_val.null_signs); + return Status::OK(); +} + +TypeDesc ArrayUtils::create_function_type_desc(const ColumnPB& column_pb) { + TypeDesc type_desc; + type_desc.len = column_pb.length(); + type_desc.precision = column_pb.precision(); + type_desc.scale = column_pb.frac(); + if (column_pb.type() == "ARRAY") { + type_desc.type = FunctionContext::TYPE_ARRAY; + } else if (column_pb.type() == "INT") { + type_desc.type = FunctionContext::TYPE_INT; + } else if (column_pb.type() == "VARCHAR") { + type_desc.type = FunctionContext::TYPE_VARCHAR; + } + for (const auto& sub_column_pb : column_pb.children_columns()) { + type_desc.children.push_back(create_function_type_desc(sub_column_pb)); + } + return type_desc; +} + +} // namespace doris diff --git a/be/test/test_util/array_utils.h b/be/test/test_util/array_utils.h new file mode 100644 index 0000000000..41503dcf99 --- /dev/null +++ b/be/test/test_util/array_utils.h @@ -0,0 +1,43 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include + +#include "udf/udf.h" + +namespace doris { + +class ColumnPB; +class MemPool; +class Status; +struct CollectionValue; + +class ArrayUtils { +public: + using TypeDesc = FunctionContext::TypeDesc; + static void prepare_context(FunctionContext& context, MemPool& mem_pool, + const ColumnPB& column_pb); + static Status create_collection_value(CollectionValue* collection_value, + FunctionContext* context, const std::string& json_string); + +private: + static TypeDesc create_function_type_desc(const ColumnPB& column_pb); +}; + +} // namespace doris diff --git a/be/test/util/array_parser_test.cpp b/be/test/util/array_parser_test.cpp index cbda49498d..da14102b77 100644 --- a/be/test/util/array_parser_test.cpp +++ b/be/test/util/array_parser_test.cpp @@ -19,111 +19,102 @@ #include #include -#include -#include "gutil/casts.h" #include "olap/types.h" -#include "runtime/free_pool.hpp" -#include "runtime/mem_pool.h" #include "runtime/mem_tracker.h" #include "runtime/string_value.h" -#include "udf/udf.h" -#include "udf/udf_internal.h" +#include "test_util/array_utils.h" namespace doris { -using TypeDesc = FunctionContext::TypeDesc; - template -TypeDesc create_function_type_desc(FunctionContext::Type type, Ts... sub_types) { - TypeDesc type_desc = {.type = type, - .len = (type == FunctionContext::TYPE_ARRAY) ? OLAP_ARRAY_MAX_BYTES : 0}; - if constexpr (sizeof...(sub_types)) { - type_desc.children.push_back(create_function_type_desc(sub_types...)); +ColumnPB create_column_pb(const std::string& type, const Ts&... sub_column_types) { + ColumnPB column; + column.set_type(type); + column.set_aggregation("NONE"); + column.set_is_nullable(true); + if (type == "ARRAY") { + column.set_length(OLAP_ARRAY_MAX_BYTES); } - return type_desc; + if constexpr (sizeof...(sub_column_types) > 0) { + auto sub_column = create_column_pb(sub_column_types...); + column.add_children_columns()->Swap(&sub_column); + } + return column; } -ColumnPB create_column_pb(const TypeDesc& function_type_desc) { - ColumnPB column_pb; - column_pb.set_length(function_type_desc.len); - switch (function_type_desc.type) { - case FunctionContext::TYPE_ARRAY: - column_pb.set_type("ARRAY"); - break; - case FunctionContext::TYPE_INT: - column_pb.set_type("INT"); - break; - case FunctionContext::TYPE_VARCHAR: - column_pb.set_type("VARCHAR"); - break; - default: - break; - } - for (auto child_type_desc : function_type_desc.children) { - auto sub_column_pb = create_column_pb(child_type_desc); - column_pb.add_children_columns()->Swap(&sub_column_pb); - } - return column_pb; -} - -std::shared_ptr get_type_info(const TypeDesc& function_type_desc) { - auto column_pb = create_column_pb(function_type_desc); +std::shared_ptr get_type_info(const ColumnPB& column_pb) { TabletColumn tablet_column; tablet_column.init_from_pb(column_pb); return get_type_info(&tablet_column); } -void test_array_parser(const TypeDesc& function_type_desc, const std::string& json, +void test_array_parser(const ColumnPB& column_pb, const std::string& json, const CollectionValue& expect) { MemTracker tracker(1024 * 1024, "ArrayParserTest"); MemPool mem_pool(&tracker); - std::unique_ptr function_context(new FunctionContext()); - function_context->impl()->_return_type = function_type_desc; - function_context->impl()->_pool = new FreePool(&mem_pool); - CollectionVal collection_val; - auto status = - ArrayParser::parse(collection_val, function_context.get(), StringVal(json.c_str())); - EXPECT_TRUE(status.ok()); - auto actual = CollectionValue::from_collection_val(collection_val); - EXPECT_TRUE(get_type_info(function_type_desc)->equal(&expect, &actual)); + FunctionContext context; + ArrayUtils::prepare_context(context, mem_pool, column_pb); + CollectionValue actual; + auto status = ArrayUtils::create_collection_value(&actual, &context, json); + ASSERT_TRUE(status.ok()); + EXPECT_TRUE(get_type_info(column_pb)->equal(&expect, &actual)); } TEST(ArrayParserTest, TestParseIntArray) { - auto function_type_desc = - create_function_type_desc(FunctionContext::TYPE_ARRAY, FunctionContext::TYPE_INT); - test_array_parser(function_type_desc, "[]", CollectionValue(0)); + auto column_pb = create_column_pb("ARRAY", "INT"); + test_array_parser(column_pb, "[]", CollectionValue(0)); - int num_items = 3; - std::unique_ptr data(new int32_t[num_items] {1, 2, 3}); - CollectionValue value(data.get(), num_items, false, nullptr); - test_array_parser(function_type_desc, "[1, 2, 3]", value); + int32_t data[] = {1, 2, 3}; + int num_items = sizeof(data) / sizeof(data[0]); + CollectionValue value(data, num_items, false, nullptr); + test_array_parser(column_pb, "[1, 2, 3]", value); - std::unique_ptr null_signs(new bool[num_items] {false, true, false}); + bool null_signs[] = {false, true, false}; value.set_has_null(true); - value.set_null_signs(null_signs.get()); - test_array_parser(function_type_desc, "[1, null, 3]", value); + value.set_null_signs(null_signs); + test_array_parser(column_pb, "[1, null, 3]", value); } TEST(ArrayParserTest, TestParseVarcharArray) { - auto function_type_desc = - create_function_type_desc(FunctionContext::TYPE_ARRAY, FunctionContext::TYPE_VARCHAR); - test_array_parser(function_type_desc, "[]", CollectionValue(0)); + auto column_pb = create_column_pb("ARRAY", "VARCHAR"); + test_array_parser(column_pb, "[]", CollectionValue(0)); - int num_items = 3; - std::unique_ptr data(new char[num_items] {'a', 'b', 'c'}); - std::unique_ptr string_values(new StringValue[num_items] { + char data[] = {'a', 'b', 'c'}; + int num_items = sizeof(data) / sizeof(data[0]); + StringValue string_values[] = { {&data[0], 1}, {&data[1], 1}, {&data[2], 1}, - }); - CollectionValue value(string_values.get(), num_items, false, nullptr); - test_array_parser(function_type_desc, "[\"a\", \"b\", \"c\"]", value); + }; + CollectionValue value(string_values, num_items, false, nullptr); + test_array_parser(column_pb, "[\"a\", \"b\", \"c\"]", value); - std::unique_ptr null_signs(new bool[num_items] {false, true, false}); + bool null_signs[] = {false, true, false}; value.set_has_null(true); - value.set_null_signs(null_signs.get()); - test_array_parser(function_type_desc, "[\"a\", null, \"c\"]", value); + value.set_null_signs(null_signs); + test_array_parser(column_pb, "[\"a\", null, \"c\"]", value); +} + +TEST(ArrayParserTest, TestNestedArray) { + auto column_pb = create_column_pb("ARRAY", "ARRAY", "INT"); + test_array_parser(column_pb, "[]", CollectionValue(0)); + + CollectionValue empty_array(0); + test_array_parser(column_pb, "[[]]", {&empty_array, 1, false, nullptr}); + + int data[] = {1, 0, 3}; + uint32_t num_items = sizeof(data) / sizeof(data[0]); + bool null_signs[] = {false, true, false}; + CollectionValue array = {data, num_items, true, null_signs}; + + CollectionValue array_data[] = {empty_array, array, empty_array, array}; + uint32_t num_arrays = sizeof(array_data) / sizeof(array_data[0]); + test_array_parser(column_pb, "[[], [1, null, 3], [], [1, null, 3]]", + {array_data, num_arrays, false, nullptr}); + bool array_null_signs[] = {false, true, true, false}; + test_array_parser(column_pb, "[[], null, null, [1, null, 3]]", + {array_data, num_arrays, true, array_null_signs}); } } // namespace doris