[vectorized](function) support array_filter function (#17832)
This commit is contained in:
@ -27,6 +27,7 @@ namespace doris::vectorized {
|
||||
class LambdaFunctionFactory;
|
||||
|
||||
void register_function_array_map(LambdaFunctionFactory& factory);
|
||||
void register_function_array_filter(LambdaFunctionFactory& factory);
|
||||
|
||||
class LambdaFunctionFactory {
|
||||
using Creator = std::function<LambdaFunctionPtr()>;
|
||||
@ -58,7 +59,10 @@ public:
|
||||
static LambdaFunctionFactory& instance() {
|
||||
static std::once_flag oc;
|
||||
static LambdaFunctionFactory instance;
|
||||
std::call_once(oc, []() { register_function_array_map(instance); });
|
||||
std::call_once(oc, []() {
|
||||
register_function_array_map(instance);
|
||||
register_function_array_filter(instance);
|
||||
});
|
||||
return instance;
|
||||
}
|
||||
};
|
||||
|
||||
165
be/src/vec/exprs/lambda_function/varray_filter_function.cpp
Normal file
165
be/src/vec/exprs/lambda_function/varray_filter_function.cpp
Normal file
@ -0,0 +1,165 @@
|
||||
// Licensed to the Apache Software Foundation (ASF) under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing,
|
||||
// software distributed under the License is distributed on an
|
||||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
#include <fmt/core.h>
|
||||
|
||||
#include "common/status.h"
|
||||
#include "vec/columns/column.h"
|
||||
#include "vec/columns/column_array.h"
|
||||
#include "vec/columns/columns_number.h"
|
||||
#include "vec/core/block.h"
|
||||
#include "vec/data_types/data_type_array.h"
|
||||
#include "vec/exprs/lambda_function/lambda_function.h"
|
||||
#include "vec/exprs/lambda_function/lambda_function_factory.h"
|
||||
#include "vec/exprs/vexpr.h"
|
||||
#include "vec/exprs/vexpr_context.h"
|
||||
#include "vec/utils/util.hpp"
|
||||
|
||||
namespace doris::vectorized {
|
||||
|
||||
class ArrayFilterFunction : public LambdaFunction {
|
||||
public:
|
||||
~ArrayFilterFunction() override = default;
|
||||
|
||||
static constexpr auto name = "array_filter";
|
||||
|
||||
static LambdaFunctionPtr create() { return std::make_shared<ArrayFilterFunction>(); }
|
||||
|
||||
std::string get_name() const override { return name; }
|
||||
|
||||
doris::Status execute(VExprContext* context, doris::vectorized::Block* block,
|
||||
int* result_column_id, DataTypePtr result_type,
|
||||
const std::vector<VExpr*>& children) override {
|
||||
///* array_filter(array, array<boolean>) *///
|
||||
|
||||
//1. child[0:end]->execute(src_block)
|
||||
doris::vectorized::ColumnNumbers arguments(children.size());
|
||||
for (int i = 0; i < children.size(); ++i) {
|
||||
int column_id = -1;
|
||||
RETURN_IF_ERROR(children[i]->execute(context, block, &column_id));
|
||||
arguments[i] = column_id;
|
||||
}
|
||||
|
||||
//2. get first and second array column
|
||||
auto first_column =
|
||||
block->get_by_position(arguments[0]).column->convert_to_full_column_if_const();
|
||||
auto second_column =
|
||||
block->get_by_position(arguments[1]).column->convert_to_full_column_if_const();
|
||||
|
||||
int input_rows = first_column->size();
|
||||
auto first_outside_null_map = ColumnUInt8::create(input_rows, 0);
|
||||
auto first_arg_column = first_column;
|
||||
if (first_arg_column->is_nullable()) {
|
||||
first_arg_column =
|
||||
assert_cast<const ColumnNullable*>(first_column.get())->get_nested_column_ptr();
|
||||
const auto& column_array_nullmap =
|
||||
assert_cast<const ColumnNullable*>(first_column.get())->get_null_map_column();
|
||||
VectorizedUtils::update_null_map(first_outside_null_map->get_data(),
|
||||
column_array_nullmap.get_data());
|
||||
}
|
||||
const ColumnArray& first_col_array = assert_cast<const ColumnArray&>(*first_arg_column);
|
||||
const auto& first_off_data =
|
||||
assert_cast<const ColumnArray::ColumnOffsets&>(first_col_array.get_offsets_column())
|
||||
.get_data();
|
||||
const auto& first_nested_nullable_column =
|
||||
assert_cast<const ColumnNullable&>(*first_col_array.get_data_ptr());
|
||||
|
||||
auto result_data_column = first_nested_nullable_column.clone_empty();
|
||||
auto result_offset_column = ColumnArray::ColumnOffsets::create();
|
||||
auto& result_offset_data = result_offset_column->get_data();
|
||||
vectorized::IColumn::Selector selector;
|
||||
selector.reserve(first_off_data.size());
|
||||
result_offset_data.reserve(input_rows);
|
||||
|
||||
auto second_arg_column = second_column;
|
||||
auto second_outside_null_map = ColumnUInt8::create(input_rows, 0);
|
||||
if (second_arg_column->is_nullable()) {
|
||||
second_arg_column = assert_cast<const ColumnNullable*>(second_column.get())
|
||||
->get_nested_column_ptr();
|
||||
const auto& column_array_nullmap =
|
||||
assert_cast<const ColumnNullable*>(second_column.get())->get_null_map_column();
|
||||
VectorizedUtils::update_null_map(second_outside_null_map->get_data(),
|
||||
column_array_nullmap.get_data());
|
||||
}
|
||||
const ColumnArray& second_col_array = assert_cast<const ColumnArray&>(*second_arg_column);
|
||||
const auto& second_off_data = assert_cast<const ColumnArray::ColumnOffsets&>(
|
||||
second_col_array.get_offsets_column())
|
||||
.get_data();
|
||||
const auto& second_nested_null_map_data =
|
||||
assert_cast<const ColumnNullable&>(*second_col_array.get_data_ptr())
|
||||
.get_null_map_column()
|
||||
.get_data();
|
||||
const auto& second_nested_column =
|
||||
assert_cast<const ColumnNullable&>(*second_col_array.get_data_ptr())
|
||||
.get_nested_column();
|
||||
const auto& second_nested_data =
|
||||
assert_cast<const ColumnUInt8&>(second_nested_column).get_data();
|
||||
|
||||
//3. get the idx of second column data is not null and not 0
|
||||
for (int row = 0; row < input_rows; ++row) {
|
||||
//first or second column is null, so current row is invalid data
|
||||
if (first_outside_null_map->get_data()[row] ||
|
||||
second_outside_null_map->get_data()[row]) {
|
||||
result_offset_data.push_back(result_offset_data.back());
|
||||
} else {
|
||||
unsigned long count = 0;
|
||||
auto first_offset_start = first_off_data[row - 1];
|
||||
auto first_offset_end = first_off_data[row];
|
||||
auto second_offset_start = second_off_data[row - 1];
|
||||
auto second_offset_end = second_off_data[row];
|
||||
auto move_off = second_offset_start;
|
||||
for (auto off = first_offset_start;
|
||||
off < first_offset_end && move_off < second_offset_end; // not out range
|
||||
++off) {
|
||||
if (!second_nested_null_map_data[move_off] && // not null
|
||||
second_nested_data[move_off]) { // not 0
|
||||
count++;
|
||||
selector.push_back(off);
|
||||
}
|
||||
move_off++;
|
||||
}
|
||||
result_offset_data.push_back(count + result_offset_data.back());
|
||||
}
|
||||
}
|
||||
first_nested_nullable_column.append_data_by_selector(result_data_column, selector);
|
||||
|
||||
//4. insert the result column to block
|
||||
ColumnWithTypeAndName result_arr;
|
||||
if (result_type->is_nullable()) {
|
||||
result_arr = {
|
||||
ColumnNullable::create(ColumnArray::create(std::move(result_data_column),
|
||||
std::move(result_offset_column)),
|
||||
std::move(first_outside_null_map)),
|
||||
result_type, "array_filter_result"};
|
||||
|
||||
} else {
|
||||
DCHECK(!first_column->is_nullable());
|
||||
DCHECK(!second_column->is_nullable());
|
||||
result_arr = {ColumnArray::create(std::move(result_data_column),
|
||||
std::move(result_offset_column)),
|
||||
result_type, "array_filter_result"};
|
||||
}
|
||||
block->insert(std::move(result_arr));
|
||||
*result_column_id = block->columns() - 1;
|
||||
return Status::OK();
|
||||
}
|
||||
};
|
||||
|
||||
void register_function_array_filter(doris::vectorized::LambdaFunctionFactory& factory) {
|
||||
factory.register_function<ArrayFilterFunction>();
|
||||
}
|
||||
} // namespace doris::vectorized
|
||||
@ -66,20 +66,17 @@ public:
|
||||
Block lambda_block;
|
||||
for (int i = 0; i < arguments.size(); ++i) {
|
||||
const auto& array_column_type_name = block->get_by_position(arguments[i]);
|
||||
auto column_array = array_column_type_name.column;
|
||||
column_array = column_array->convert_to_full_column_if_const();
|
||||
auto column_array = array_column_type_name.column->convert_to_full_column_if_const();
|
||||
auto type_array = array_column_type_name.type;
|
||||
|
||||
if (type_array->is_nullable()) {
|
||||
// get the nullmap of nullable column
|
||||
const auto& column_array_nullmap =
|
||||
assert_cast<const ColumnNullable&>(*array_column_type_name.column)
|
||||
.get_null_map_column();
|
||||
assert_cast<const ColumnNullable&>(*column_array).get_null_map_column();
|
||||
|
||||
// get the array column from nullable column
|
||||
column_array =
|
||||
assert_cast<const ColumnNullable*>(array_column_type_name.column.get())
|
||||
->get_nested_column_ptr();
|
||||
column_array = assert_cast<const ColumnNullable*>(column_array.get())
|
||||
->get_nested_column_ptr();
|
||||
|
||||
// get the nested type from nullable type
|
||||
type_array = assert_cast<const DataTypeNullable*>(array_column_type_name.type.get())
|
||||
|
||||
Reference in New Issue
Block a user