// Licensed to the Apache Software Foundation (ASF) under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information // regarding copyright ownership. The ASF licenses this file // to you under the Apache License, Version 2.0 (the // "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, // software distributed under the License is distributed on an // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY // KIND, either express or implied. See the License for the // specific language governing permissions and limitations // under the License. // This file is copied from // https://github.com/ClickHouse/ClickHouse/blob/master/src/Processors/Transforms/WindowTransform.h // and modified by Doris #pragma once #include "factory_helpers.h" #include "vec/aggregate_functions/aggregate_function.h" #include "vec/aggregate_functions/aggregate_function_reader_first_last.h" #include "vec/aggregate_functions/helpers.h" #include "vec/columns/column_vector.h" #include "vec/data_types/data_type_decimal.h" #include "vec/data_types/data_type_nullable.h" #include "vec/data_types/data_type_number.h" #include "vec/data_types/data_type_string.h" #include "vec/io/io_helper.h" namespace doris::vectorized { struct RowNumberData { int64_t count; }; class WindowFunctionRowNumber final : public IAggregateFunctionDataHelper { public: WindowFunctionRowNumber(const DataTypes& argument_types_) : IAggregateFunctionDataHelper(argument_types_) {} String get_name() const override { return "row_number"; } DataTypePtr get_return_type() const override { return std::make_shared(); } void add(AggregateDataPtr place, const IColumn**, size_t, Arena*) const override { ++data(place).count; } void add_range_single_place(int64_t partition_start, int64_t partition_end, int64_t frame_start, int64_t frame_end, AggregateDataPtr place, const IColumn** columns, Arena* arena) const override { ++data(place).count; } void reset(AggregateDataPtr place) const override { WindowFunctionRowNumber::data(place).count = 0; } void insert_result_into(ConstAggregateDataPtr place, IColumn& to) const override { assert_cast(to).get_data().push_back(data(place).count); } void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena*) const override {} void serialize(ConstAggregateDataPtr place, BufferWritable& buf) const override {} void deserialize(AggregateDataPtr place, BufferReadable& buf, Arena*) const override {} }; struct RankData { int64_t rank; int64_t count; int64_t peer_group_start; }; class WindowFunctionRank final : public IAggregateFunctionDataHelper { public: WindowFunctionRank(const DataTypes& argument_types_) : IAggregateFunctionDataHelper(argument_types_) {} String get_name() const override { return "rank"; } DataTypePtr get_return_type() const override { return std::make_shared(); } void add(AggregateDataPtr place, const IColumn**, size_t, Arena*) const override { ++data(place).rank; } void add_range_single_place(int64_t partition_start, int64_t partition_end, int64_t frame_start, int64_t frame_end, AggregateDataPtr place, const IColumn** columns, Arena* arena) const override { int64_t peer_group_count = frame_end - frame_start; if (WindowFunctionRank::data(place).peer_group_start != frame_start) { WindowFunctionRank::data(place).peer_group_start = frame_start; WindowFunctionRank::data(place).rank += WindowFunctionRank::data(place).count; } WindowFunctionRank::data(place).count = peer_group_count; } void reset(AggregateDataPtr place) const override { WindowFunctionRank::data(place).rank = 0; WindowFunctionRank::data(place).count = 1; WindowFunctionRank::data(place).peer_group_start = -1; } void insert_result_into(ConstAggregateDataPtr place, IColumn& to) const override { assert_cast(to).get_data().push_back(data(place).rank); } void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena*) const override {} void serialize(ConstAggregateDataPtr place, BufferWritable& buf) const override {} void deserialize(AggregateDataPtr place, BufferReadable& buf, Arena*) const override {} }; struct DenseRankData { int64_t rank; int64_t peer_group_start; }; class WindowFunctionDenseRank final : public IAggregateFunctionDataHelper { public: WindowFunctionDenseRank(const DataTypes& argument_types_) : IAggregateFunctionDataHelper(argument_types_) {} String get_name() const override { return "dense_rank"; } DataTypePtr get_return_type() const override { return std::make_shared(); } void add(AggregateDataPtr place, const IColumn**, size_t, Arena*) const override { ++data(place).rank; } void add_range_single_place(int64_t partition_start, int64_t partition_end, int64_t frame_start, int64_t frame_end, AggregateDataPtr place, const IColumn** columns, Arena* arena) const override { if (WindowFunctionDenseRank::data(place).peer_group_start != frame_start) { WindowFunctionDenseRank::data(place).peer_group_start = frame_start; WindowFunctionDenseRank::data(place).rank++; } } void reset(AggregateDataPtr place) const override { WindowFunctionDenseRank::data(place).rank = 0; WindowFunctionDenseRank::data(place).peer_group_start = -1; } void insert_result_into(ConstAggregateDataPtr place, IColumn& to) const override { assert_cast(to).get_data().push_back(data(place).rank); } void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena*) const override {} void serialize(ConstAggregateDataPtr place, BufferWritable& buf) const override {} void deserialize(AggregateDataPtr place, BufferReadable& buf, Arena*) const override {} }; struct NTileData { int64_t bucket_index; int64_t rows; }; class WindowFunctionNTile final : public IAggregateFunctionDataHelper { public: WindowFunctionNTile(const DataTypes& argument_types_) : IAggregateFunctionDataHelper(argument_types_) {} String get_name() const override { return "ntile"; } DataTypePtr get_return_type() const override { return std::make_shared(); } void add(AggregateDataPtr place, const IColumn**, size_t, Arena*) const override {} void add_range_single_place(int64_t partition_start, int64_t partition_end, int64_t frame_start, int64_t frame_end, AggregateDataPtr place, const IColumn** columns, Arena* arena) const override { // some variables are partition related, but there is no chance to init them // when the new partition arrives, so we calculate them evey time now. // Partition = big_bucket_num * big_bucket_size + small_bucket_num * small_bucket_size int64_t row_index = ++WindowFunctionNTile::data(place).rows - 1; int64_t bucket_num = columns[0]->get_int(0); int64_t partition_size = partition_end - partition_start; int64 small_bucket_size = partition_size / bucket_num; int64 big_bucket_num = partition_size % bucket_num; int64 first_small_bucket_row_index = big_bucket_num * (small_bucket_size + 1); if (row_index >= first_small_bucket_row_index) { // small_bucket_size can't be zero WindowFunctionNTile::data(place).bucket_index = big_bucket_num + 1 + (row_index - first_small_bucket_row_index) / small_bucket_size; } else { WindowFunctionNTile::data(place).bucket_index = row_index / (small_bucket_size + 1) + 1; } } void reset(AggregateDataPtr place) const override { WindowFunctionNTile::data(place).rows = 0; } void insert_result_into(ConstAggregateDataPtr place, IColumn& to) const override { assert_cast(to).get_data().push_back( WindowFunctionNTile::data(place).bucket_index); } void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena*) const override {} void serialize(ConstAggregateDataPtr place, BufferWritable& buf) const override {} void deserialize(AggregateDataPtr place, BufferReadable& buf, Arena*) const override {} }; template struct FirstLastData : public ReaderFirstAndLastData { public: void set_is_null() { this->_data_value.reset(); } }; template struct BaseValue : public Value { public: bool is_null() const { return this->_ptr == nullptr; } // because _ptr pointer to first_argument or third argument, so it's difficult to cast ptr // so here will call virtual function StringRef get_value() const { return this->_ptr->get_data_at(this->_offset); } }; template struct LeadLagData { public: void reset() { _data_value.reset(); _default_value.reset(); _is_inited = false; } bool default_is_null() { return _default_value.is_null(); } // here _ptr pointer default column from third void set_value_from_default() { this->_data_value = _default_value; } void insert_result_into(IColumn& to) const { if constexpr (result_is_nullable) { if (_data_value.is_null()) { auto& col = assert_cast(to); col.insert_default(); } else { auto& col = assert_cast(to); StringRef value = _data_value.get_value(); col.insert_data(value.data, value.size); } } else { StringRef value = _data_value.get_value(); to.insert_data(value.data, value.size); } } void set_is_null() { this->_data_value.reset(); } void set_value(const IColumn** columns, size_t pos) { if constexpr (arg_is_nullable) { if (assert_cast(columns[0])->is_null_at(pos)) { // ptr == nullptr means nullable _data_value.reset(); return; } } // here ptr is pointer to nullable column or not null column from first _data_value.set_value(columns[0], pos); } void check_default(const IColumn* column) { if (!_is_inited) { if (is_column_nullable(*column)) { const auto* nullable_column = assert_cast(column); if (nullable_column->is_null_at(0)) { _default_value.reset(); } } else { _default_value.set_value(column, 0); } _is_inited = true; } } private: BaseValue _data_value; BaseValue _default_value; bool _is_inited = false; }; template struct WindowFunctionLeadImpl : Data { void add_range_single_place(int64_t partition_start, int64_t partition_end, int64_t frame_start, int64_t frame_end, const IColumn** columns) { this->check_default(columns[2]); if (frame_end > partition_end) { //output default value, win end is under partition if (this->default_is_null()) { this->set_is_null(); } else { this->set_value_from_default(); } return; } this->set_value(columns, frame_end - 1); } static const char* name() { return "lead"; } }; template struct WindowFunctionLagImpl : Data { void add_range_single_place(int64_t partition_start, int64_t partition_end, int64_t frame_start, int64_t frame_end, const IColumn** columns) { this->check_default(columns[2]); if (partition_start >= frame_end) { //[unbound preceding(0), offset preceding(-123)] if (this->default_is_null()) { // win start is beyond partition this->set_is_null(); } else { this->set_value_from_default(); } return; } this->set_value(columns, frame_end - 1); } static const char* name() { return "lag"; } }; // TODO: first_value && last_value in some corner case will be core, // if need to simply change it, should set them to always nullable insert into null value, and register in cpp maybe be change // But it's may be another better way to handle it template struct WindowFunctionFirstImpl : Data { void add_range_single_place(int64_t partition_start, int64_t partition_end, int64_t frame_start, int64_t frame_end, const IColumn** columns) { if (this->has_set_value()) { return; } if (frame_start < frame_end && frame_end <= partition_start) { //rewrite last_value when under partition this->set_is_null(); //so no need more judge return; } frame_start = std::max(frame_start, partition_start); this->set_value(columns, frame_start); } static const char* name() { return "first_value"; } }; template struct WindowFunctionLastImpl : Data { void add_range_single_place(int64_t partition_start, int64_t partition_end, int64_t frame_start, int64_t frame_end, const IColumn** columns) { if ((frame_start < frame_end) && ((frame_end <= partition_start) || (frame_start >= partition_end))) { //beyond or under partition, set null this->set_is_null(); return; } frame_end = std::min(frame_end, partition_end); this->set_value(columns, frame_end - 1); } static const char* name() { return "last_value"; } }; template class WindowFunctionData final : public IAggregateFunctionDataHelper> { public: WindowFunctionData(const DataTypes& argument_types) : IAggregateFunctionDataHelper>(argument_types), _argument_type(argument_types[0]) {} String get_name() const override { return Data::name(); } DataTypePtr get_return_type() const override { return _argument_type; } void add_range_single_place(int64_t partition_start, int64_t partition_end, int64_t frame_start, int64_t frame_end, AggregateDataPtr place, const IColumn** columns, Arena* arena) const override { this->data(place).add_range_single_place(partition_start, partition_end, frame_start, frame_end, columns); } void reset(AggregateDataPtr place) const override { this->data(place).reset(); } void insert_result_into(ConstAggregateDataPtr place, IColumn& to) const override { this->data(place).insert_result_into(to); } void add(AggregateDataPtr place, const IColumn** columns, size_t row_num, Arena* arena) const override { LOG(FATAL) << "WindowFunctionLeadLagData do not support add"; } void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena*) const override { LOG(FATAL) << "WindowFunctionLeadLagData do not support merge"; } void serialize(ConstAggregateDataPtr place, BufferWritable& buf) const override { LOG(FATAL) << "WindowFunctionLeadLagData do not support serialize"; } void deserialize(AggregateDataPtr place, BufferReadable& buf, Arena*) const override { LOG(FATAL) << "WindowFunctionLeadLagData do not support deserialize"; } private: DataTypePtr _argument_type; }; } // namespace doris::vectorized