Files
doris/be/src/vec/functions/array/function_array_index.h
camby 71ce3c4a6e [feature-wip](array-type) Add codes and UT for array_contains and array_position functions (#8401) (#8589)
array_contains function Usage example:
1. create table with ARRAY column, and insert some data:
```
> select * from array_test;
+------+------+--------+
| k1   | k2   | k3     |
+------+------+--------+
|    1 |    2 | [1, 2] |
|    2 |    3 | NULL   |
|    4 | NULL | []     |
|    3 | NULL | NULL   |
+------+------+--------+
```
2. enable vectorized:
```
> set enable_vectorized_engine=true;
```
3. select with array_contains:
```
> select k1,array_contains(k3,1) from array_test;
+------+-------------------------+
| k1   | array_contains(`k3`, 1) |
+------+-------------------------+
|    3 |                    NULL |
|    1 |                       1 |
|    2 |                    NULL |
|    4 |                       0 |
+------+-------------------------+
```
4. also we can use array_contains in where condition
```
> select * from array_test where array_contains(k3,1);
+------+------+--------+
| k1   | k2   | k3     |
+------+------+--------+
|    1 |    2 | [1, 2] |
+------+------+--------+
```
5. array_position usage example
```
> select k1,k3,array_position(k3,2) from array_test;
+------+--------+-------------------------+
| k1   | k3     | array_position(`k3`, 2) |
+------+--------+-------------------------+
|    3 | NULL   |                    NULL |
|    1 | [1, 2] |                       2 |
|    2 | NULL   |                    NULL |
|    4 | []     |                       0 |
+------+--------+-------------------------+
```
2022-03-22 15:42:40 +08:00

197 lines
8.1 KiB
C++

// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
// This file is copied from
// https://github.com/ClickHouse/ClickHouse/blob/master/src/Functions/array/arrayIndex.h
// and modified by Doris
#pragma once
#include <string_view>
#include "vec/columns/column_array.h"
#include "vec/columns/column_const.h"
#include "vec/columns/column_string.h"
#include "vec/data_types/data_type_number.h"
#include "vec/functions/function.h"
namespace doris::vectorized {
struct ArrayContainsAction
{
using ResultType = UInt8;
static constexpr const bool resume_execution = false;
static constexpr void apply(ResultType& current, size_t) noexcept { current = 1; }
};
struct ArrayPositionAction
{
using ResultType = Int64;
static constexpr const bool resume_execution = false;
static constexpr void apply(ResultType& current, size_t j) noexcept { current = j + 1; }
};
template <typename ConcreteAction, typename Name>
class FunctionArrayIndex : public IFunction
{
public:
using ResultType = typename ConcreteAction::ResultType;
static constexpr auto name = Name::name;
static FunctionPtr create() { return std::make_shared<FunctionArrayIndex>(); }
/// Get function name.
String get_name() const override { return name; }
bool is_variadic() const override { return false; }
size_t get_number_of_arguments() const override { return 2; }
DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
DCHECK(WhichDataType(arguments[0]).is_array());
return std::make_shared<DataTypeNumber<ResultType>>();
}
Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
size_t result, size_t input_rows_count) override {
return execute_non_nullable(block, arguments, result, input_rows_count);
}
private:
static bool execute_string(Block& block, const ColumnNumbers& arguments, size_t result, size_t input_rows_count) {
// check array nested column type and get data
auto array_column = check_and_get_column<ColumnArray>(*block.get_by_position(arguments[0]).column);
DCHECK(array_column != nullptr);
auto nested_column = check_and_get_column<ColumnString>(array_column->get_data());
if (!nested_column) {
return false;
}
const auto& arr_offs = array_column->get_offsets();
const auto& str_offs = nested_column->get_offsets();
const auto& str_chars = nested_column->get_chars();
// check right column type
auto ptr = block.get_by_position(arguments[1]).column;
if (is_column_const(*ptr)) {
ptr = check_and_get_column<ColumnConst>(ptr)->get_data_column_ptr();
}
if (!check_and_get_column<ColumnString>(*ptr)) {
return false;
}
// expand const column and get data
auto right_column = check_and_get_column<ColumnString>(*block.get_by_position(arguments[1]).column->convert_to_full_column_if_const());
const auto& right_offs = right_column->get_offsets();
const auto& right_chars = right_column->get_chars();
// prepare return data
auto dst = ColumnVector<ResultType>::create();
auto& dst_data = dst->get_data();
dst_data.resize(input_rows_count);
// process
for (size_t row = 0; row < input_rows_count; ++row) {
ResultType res = 0;
size_t off = arr_offs[row - 1];
size_t len = arr_offs[row] - off;
size_t right_off = right_offs[row - 1];
size_t right_len = right_offs[row] - right_off;
for (size_t pos = 0; pos < len; ++pos) {
size_t str_pos = str_offs[pos + off - 1];
size_t str_len = str_offs[pos + off] - str_pos;
const char* left_raw_v = reinterpret_cast<const char*>(&str_chars[str_pos]);
const char* right_raw_v = reinterpret_cast<const char*>(&right_chars[right_off]);
if (std::string_view(left_raw_v, str_len) == std::string_view(right_raw_v, right_len)) {
ConcreteAction::apply(res, pos);
break;
}
}
dst_data[row] = res;
}
block.replace_by_position(result, std::move(dst));
return true;
}
#define INTEGRAL_TPL_PACK UInt8, UInt16, UInt32, UInt64, Int8, Int16, Int32, Int64, Float32, Float64
template <typename... Integral>
static bool execute_integral(Block& block, const ColumnNumbers& arguments, size_t result, size_t input_rows_count) {
return (execute_integral_expanded<Integral, Integral...>(block, arguments, result, input_rows_count) || ...);
}
template <typename A, typename... Other>
static bool execute_integral_expanded(Block& block, const ColumnNumbers& arguments, size_t result, size_t input_rows_count) {
return (execute_integral_impl<A, Other>(block, arguments, result, input_rows_count) || ...);
}
template <typename Initial, typename Resulting>
static bool execute_integral_impl(Block& block, const ColumnNumbers& arguments, size_t result, size_t input_rows_count) {
// check array nested column type and get data
auto array_column = check_and_get_column<ColumnArray>(*block.get_by_position(arguments[0]).column);
DCHECK(array_column != nullptr);
auto nested_column = check_and_get_column<ColumnVector<Initial>>(array_column->get_data());
if (!nested_column) {
return false;
}
const auto& offsets = array_column->get_offsets();
const auto& nested_data = nested_column->get_data();
// check right column type
auto ptr = block.get_by_position(arguments[1]).column;
if (is_column_const(*ptr)) {
ptr = check_and_get_column<ColumnConst>(ptr)->get_data_column_ptr();
}
if (!check_and_get_column<ColumnVector<Resulting>>(*ptr)) {
return false;
}
// expand const column and get data
auto right_column = block.get_by_position(arguments[1]).column->convert_to_full_column_if_const();
const auto& right_data = check_and_get_column<ColumnVector<Resulting>>(*right_column)->get_data();
// prepare return data
auto dst = ColumnVector<ResultType>::create();
auto& dst_data = dst->get_data();
dst_data.resize(input_rows_count);
// process
for (size_t row = 0; row < input_rows_count; ++row) {
ResultType res = 0;
size_t off = offsets[row - 1];
size_t len = offsets[row] - off;
for (size_t pos = 0; pos < len; ++pos) {
if (nested_data[pos + off] == right_data[row]) {
ConcreteAction::apply(res, pos);
break;
}
}
dst_data[row] = res;
}
block.replace_by_position(result, std::move(dst));
return true;
}
Status execute_non_nullable(Block& block, const ColumnNumbers& arguments, size_t result, size_t input_rows_count) {
WhichDataType right_type(block.get_by_position(arguments[1]).type);
if ((right_type.is_string() && execute_string(block, arguments, result, input_rows_count)) ||
execute_integral<INTEGRAL_TPL_PACK>(block, arguments, result, input_rows_count)) {
return Status::OK();
}
return Status::OK();
}
#undef INTEGRAL_TPL_PACK
};
} // namespace doris::vectorized