[ARRAY] Support array type load and select not include access by index (#5980)

This is part of the array type support and has not been fully completed. 
The following functions are implemented
1. fe array type support and implementation of array function, support array syntax analysis and planning
2. Support import array type data through insert into
3. Support select array type data
4. Only the array type is supported on the value lie of the duplicate table

this pr merge some code from #4655 #4650 #4644 #4643 #4623 #2979
This commit is contained in:
Zhengguo Yang
2021-07-13 14:02:39 +08:00
committed by GitHub
parent 8fe5c75877
commit ed3ff470ce
115 changed files with 2919 additions and 754 deletions

View File

@ -28,6 +28,7 @@
//#include "runtime/mem_tracker.h"
#include "gen_cpp/Data_types.h"
#include "gen_cpp/data.pb.h"
#include "runtime/collection_value.h"
#include "util/debug_util.h"
using std::vector;
@ -126,7 +127,7 @@ RowBatch::RowBatch(const RowDescriptor& row_desc, const PRowBatch& input_batch,
TupleRow* row = get_row(i);
std::vector<TupleDescriptor*>::const_iterator desc = tuple_descs.begin();
for (int j = 0; desc != tuple_descs.end(); ++desc, ++j) {
if ((*desc)->string_slots().empty()) {
if ((*desc)->string_slots().empty() && (*desc)->collection_slots().empty()) {
continue;
}
Tuple* tuple = row->get_tuple(j);
@ -146,6 +147,42 @@ RowBatch::RowBatch(const RowDescriptor& row_desc, const PRowBatch& input_batch,
// length. So we make the high bits zero here.
string_val->len &= 0x7FFFFFFFL;
}
// copy collection slots
vector<SlotDescriptor*>::const_iterator slot_collection =
(*desc)->collection_slots().begin();
for (; slot_collection != (*desc)->collection_slots().end(); ++slot_collection) {
DCHECK((*slot_collection)->type().is_collection_type());
CollectionValue* array_val =
tuple->get_collection_slot((*slot_collection)->tuple_offset());
// assgin data and null_sign pointer position in tuple_data
int data_offset = reinterpret_cast<intptr_t>(array_val->data());
array_val->set_data(reinterpret_cast<char*>(tuple_data + data_offset));
int null_offset = reinterpret_cast<intptr_t>(array_val->null_signs());
array_val->set_null_signs(reinterpret_cast<bool*>(tuple_data + null_offset));
const TypeDescriptor& item_type = (*slot_collection)->type().children.at(0);
if (!item_type.is_string_type()) {
continue;
}
// copy every string item
for (int i = 0; i < array_val->length(); ++i) {
if (array_val->is_null_at(i)) {
continue;
}
StringValue* dst_item_v = reinterpret_cast<StringValue*>(
(uint8_t*)array_val->data() + i * item_type.get_slot_size());
if (dst_item_v->len != 0) {
int offset = reinterpret_cast<intptr_t>(dst_item_v->ptr);
dst_item_v->ptr = reinterpret_cast<char*>(tuple_data + offset);
}
}
}
}
}
}
@ -221,7 +258,7 @@ RowBatch::RowBatch(const RowDescriptor& row_desc, const TRowBatch& input_batch,
TupleRow* row = get_row(i);
std::vector<TupleDescriptor*>::const_iterator desc = tuple_descs.begin();
for (int j = 0; desc != tuple_descs.end(); ++desc, ++j) {
if ((*desc)->string_slots().empty()) {
if ((*desc)->string_slots().empty() && (*desc)->collection_slots().empty()) {
continue;
}
@ -244,6 +281,40 @@ RowBatch::RowBatch(const RowDescriptor& row_desc, const TRowBatch& input_batch,
// length. So we make the high bits zero here.
string_val->len &= 0x7FFFFFFFL;
}
// copy collection slot
vector<SlotDescriptor*>::const_iterator slot_collection =
(*desc)->collection_slots().begin();
for (; slot_collection != (*desc)->collection_slots().end(); ++slot_collection) {
DCHECK((*slot_collection)->type().is_collection_type());
CollectionValue* array_val =
tuple->get_collection_slot((*slot_collection)->tuple_offset());
int offset = reinterpret_cast<intptr_t>(array_val->data());
array_val->set_data(reinterpret_cast<char*>(tuple_data + offset));
int null_offset = reinterpret_cast<intptr_t>(array_val->null_signs());
array_val->set_null_signs(reinterpret_cast<bool*>(tuple_data + null_offset));
const TypeDescriptor& item_type = (*slot_collection)->type().children.at(0);
if (!item_type.is_string_type()) {
continue;
}
// copy string item
for (int i = 0; i < array_val->length(); ++i) {
if (array_val->is_null_at(i)) {
continue;
}
StringValue* dst_item_v = reinterpret_cast<StringValue*>(
(uint8_t*)array_val->data() + i * item_type.get_slot_size());
if (dst_item_v->len != 0) {
int offset = reinterpret_cast<intptr_t>(dst_item_v->ptr);
dst_item_v->ptr = reinterpret_cast<char*>(tuple_data + offset);
}
}
}
}
}
}
@ -606,6 +677,37 @@ int RowBatch::total_byte_size() {
StringValue* string_val = tuple->get_string_slot((*slot)->tuple_offset());
result += string_val->len;
}
// compute slot collection size
vector<SlotDescriptor*>::const_iterator slot_collection =
(*desc)->collection_slots().begin();
for (; slot_collection != (*desc)->collection_slots().end(); ++slot_collection) {
DCHECK((*slot_collection)->type().is_collection_type());
if (tuple->is_null((*slot_collection)->null_indicator_offset())) {
continue;
}
// compute data null_signs size
CollectionValue* array_val =
tuple->get_collection_slot((*slot_collection)->tuple_offset());
result += array_val->length() * sizeof(bool);
const TypeDescriptor& item_type = (*slot_collection)->type().children.at(0);
result += array_val->length() * item_type.get_slot_size();
if (!item_type.is_string_type()) {
continue;
}
// compute string type item size
for (int i = 0; i < array_val->length(); ++i) {
if (array_val->is_null_at(i)) {
continue;
}
StringValue* dst_item_v = reinterpret_cast<StringValue*>(
(uint8_t*)array_val->data() + i * item_type.get_slot_size());
result += dst_item_v->len;
}
}
}
}