[Improve](array-function)Improve array function intersect (#20085)

now we just support array function with 2 arrays , but intersect operator can support more than 2 arrays
This commit is contained in:
amory
2023-06-05 10:38:48 +08:00
committed by GitHub
parent 653dc715f6
commit 59a0f80233
11 changed files with 390 additions and 82 deletions

View File

@ -17,6 +17,7 @@
#pragma once
#include "vec/columns/column_array.h"
#include "vec/columns/column_nullable.h"
#include "vec/columns/columns_number.h"
#include "vec/core/types.h"
#include "vec/data_types/data_type.h"
@ -29,6 +30,15 @@ class IColumn;
namespace doris::vectorized {
struct ColumnArrayMutableData {
public:
MutableColumnPtr array_nested_col = nullptr;
ColumnUInt8::Container* nested_nullmap_data = nullptr;
MutableColumnPtr offsets_col = nullptr;
ColumnArray::Offsets64* offsets_ptr = nullptr;
IColumn* nested_col = nullptr;
};
struct ColumnArrayExecutionData {
public:
void reset() {
@ -45,17 +55,34 @@ public:
const ColumnArray::Offsets64* offsets_ptr = nullptr;
const UInt8* nested_nullmap_data = nullptr;
const IColumn* nested_col = nullptr;
};
struct ColumnArrayMutableData {
public:
MutableColumnPtr array_nested_col = nullptr;
ColumnUInt8::Container* nested_nullmap_data = nullptr;
MutableColumnPtr offsets_col = nullptr;
ColumnArray::Offsets64* offsets_ptr = nullptr;
IColumn* nested_col = nullptr;
ColumnArrayMutableData to_mutable_data() const {
ColumnArrayMutableData dst;
dst.offsets_col = ColumnArray::ColumnOffsets::create();
dst.offsets_ptr =
&reinterpret_cast<ColumnArray::ColumnOffsets*>(dst.offsets_col.get())->get_data();
dst.array_nested_col =
ColumnNullable::create(nested_col->clone_empty(), ColumnUInt8::create());
auto* nullable_col = reinterpret_cast<ColumnNullable*>(dst.array_nested_col.get());
dst.nested_nullmap_data = &nullable_col->get_null_map_data();
dst.nested_col = nullable_col->get_nested_column_ptr().get();
for (size_t row = 0; row < offsets_ptr->size(); ++row) {
dst.offsets_ptr->push_back((*offsets_ptr)[row]);
size_t off = (*offsets_ptr)[row - 1];
size_t len = (*offsets_ptr)[row] - off;
for (int start = off; start < off + len; ++start) {
if (nested_nullmap_data && nested_nullmap_data[start]) {
dst.nested_col->insert_default();
dst.nested_nullmap_data->push_back(1);
} else {
dst.nested_col->insert_from(*nested_col, start);
dst.nested_nullmap_data->push_back(0);
}
}
}
return dst;
}
};
bool extract_column_array_info(const IColumn& src, ColumnArrayExecutionData& data);
ColumnArrayMutableData create_mutable_data(const IColumn* nested_col, bool is_nullable);
@ -66,4 +93,5 @@ MutableColumnPtr assemble_column_array(ColumnArrayMutableData& data);
void slice_array(ColumnArrayMutableData& dst, ColumnArrayExecutionData& src,
const IColumn& offset_column, const IColumn* length_column);
using ColumnArrayExecutionDatas = std::vector<ColumnArrayExecutionData>;
} // namespace doris::vectorized