Files
doris/be/src/vec/functions/hll_cardinality.cpp

137 lines
5.1 KiB
C++

// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#include <stddef.h>
#include <algorithm>
#include <boost/iterator/iterator_facade.hpp>
#include <memory>
#include <utility>
#include <vector>
#include "common/status.h"
#include "olap/hll.h"
#include "vec/aggregate_functions/aggregate_function.h"
#include "vec/columns/column.h"
#include "vec/columns/column_complex.h"
#include "vec/columns/column_nullable.h"
#include "vec/columns/column_vector.h"
#include "vec/columns/columns_number.h"
#include "vec/core/block.h"
#include "vec/core/column_numbers.h"
#include "vec/core/column_with_type_and_name.h"
#include "vec/core/types.h"
#include "vec/data_types/data_type.h"
#include "vec/data_types/data_type_number.h"
#include "vec/functions/function.h"
#include "vec/functions/simple_function_factory.h"
namespace doris {
class FunctionContext;
} // namespace doris
namespace doris::vectorized {
struct HLLCardinality {
static constexpr auto name = "hll_cardinality";
using ReturnType = DataTypeNumber<Int64>;
static void vector(const std::vector<HyperLogLog>& data, MutableColumnPtr& col_res) {
typename ColumnVector<Int64>::Container& res =
reinterpret_cast<ColumnVector<Int64>*>(col_res.get())->get_data();
auto size = res.size();
for (int i = 0; i < size; ++i) {
res[i] = data[i].estimate_cardinality();
}
}
static void vector_nullable(const std::vector<HyperLogLog>& data, const NullMap& nullmap,
MutableColumnPtr& col_res) {
typename ColumnVector<Int64>::Container& res =
reinterpret_cast<ColumnVector<Int64>*>(col_res.get())->get_data();
auto size = res.size();
for (int i = 0; i < size; ++i) {
if (nullmap[i]) {
res[i] = 0;
} else {
res[i] = data[i].estimate_cardinality();
}
}
}
};
template <typename Function>
class FunctionHLL : public IFunction {
public:
static constexpr auto name = Function::name;
static FunctionPtr create() { return std::make_shared<FunctionHLL>(); }
String get_name() const override { return Function::name; }
size_t get_number_of_arguments() const override { return 1; }
DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
return std::make_shared<typename Function::ReturnType>();
}
bool use_default_implementation_for_nulls() const override { return false; }
Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
size_t result, size_t input_rows_count) const override {
auto column = block.get_by_position(arguments[0]).column;
MutableColumnPtr column_result = get_return_type_impl({})->create_column();
column_result->resize(input_rows_count);
if (const ColumnNullable* col_nullable =
check_and_get_column<ColumnNullable>(column.get())) {
const ColumnHLL* col =
check_and_get_column<ColumnHLL>(col_nullable->get_nested_column_ptr().get());
const ColumnUInt8* col_nullmap = check_and_get_column<ColumnUInt8>(
col_nullable->get_null_map_column_ptr().get());
if (col != nullptr && col_nullmap != nullptr) {
Function::vector_nullable(col->get_data(), col_nullmap->get_data(), column_result);
block.replace_by_position(result, std::move(column_result));
return Status::OK();
}
} else if (const ColumnHLL* col = check_and_get_column<ColumnHLL>(column.get())) {
Function::vector(col->get_data(), column_result);
block.replace_by_position(result, std::move(column_result));
return Status::OK();
} else {
return Status::RuntimeError("Illegal column {} of argument of function {}",
block.get_by_position(arguments[0]).column->get_name(),
get_name());
}
block.replace_by_position(result, std::move(column_result));
return Status::OK();
}
};
using FunctionHLLCardinality = FunctionHLL<HLLCardinality>;
void register_function_hll_cardinality(SimpleFunctionFactory& factory) {
factory.register_function<FunctionHLLCardinality>();
}
} // namespace doris::vectorized