// Licensed to the Apache Software Foundation (ASF) under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information // regarding copyright ownership. The ASF licenses this file // to you under the Apache License, Version 2.0 (the // "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, // software distributed under the License is distributed on an // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY // KIND, either express or implied. See the License for the // specific language governing permissions and limitations // under the License. #include #include #include #include #include #include #include #include #include #include "common/logging.h" #include "gtest/gtest_pred_impl.h" #include "olap/hll.h" #include "util/bitmap_value.h" #include "vec/aggregate_functions/aggregate_function.h" #include "vec/aggregate_functions/aggregate_function_reader.h" #include "vec/aggregate_functions/aggregate_function_reader_first_last.h" #include "vec/aggregate_functions/aggregate_function_simple_factory.h" #include "vec/columns/column.h" #include "vec/columns/column_array.h" #include "vec/columns/column_complex.h" #include "vec/columns/column_nullable.h" #include "vec/columns/column_string.h" #include "vec/columns/columns_number.h" #include "vec/common/arena.h" #include "vec/common/assert_cast.h" #include "vec/common/string_ref.h" #include "vec/core/field.h" #include "vec/core/types.h" #include "vec/data_types/data_type.h" #include "vec/data_types/data_type_array.h" #include "vec/data_types/data_type_bitmap.h" #include "vec/data_types/data_type_date.h" #include "vec/data_types/data_type_date_time.h" #include "vec/data_types/data_type_decimal.h" #include "vec/data_types/data_type_hll.h" #include "vec/data_types/data_type_nullable.h" #include "vec/data_types/data_type_number.h" #include "vec/data_types/data_type_string.h" namespace doris::vectorized { class VAggReplaceTest : public testing::Test { public: void SetUp() { AggregateFunctionSimpleFactory factory = AggregateFunctionSimpleFactory::instance(); register_aggregate_function_replace_reader_load(factory); } void TearDown() {} template void check_column_basic(const IColumn* column, int64_t expect_num, size_t pos = 0) { //expect basic column[pos]=expect_num EXPECT_FALSE(column->is_null_at(pos)); auto* unwrap_col = column; if constexpr (nullable) { auto* nullable_col = assert_cast(unwrap_col); EXPECT_FALSE(nullable_col->is_null_at(pos)); unwrap_col = nullable_col->get_nested_column_ptr().get(); } if constexpr (std::is_same_v) { auto str = unwrap_col->get_data_at(pos).to_string(); EXPECT_EQ("item" + std::to_string(expect_num), str); } else if constexpr (std::is_same_v) { auto& container = assert_cast(unwrap_col)->get_data(); auto& bitmap = container[pos]; EXPECT_TRUE(bitmap.contains(static_cast(expect_num))); } else if constexpr (std::is_same_v) { auto& container = assert_cast(unwrap_col)->get_data(); auto& hll = container[pos]; auto expect = hll.estimate_cardinality(); const_cast(&hll)->update(static_cast(expect_num)); auto actual = hll.estimate_cardinality(); EXPECT_EQ(expect, actual); } else { EXPECT_EQ(expect_num, unwrap_col->get_int(pos)); } } template void check_column_array(const IColumn* column, int32_t expect_num) { //expect array column:[[0..expect_num-1]] EXPECT_EQ(column->size(), 1); auto* unwrap_col = column; if constexpr (nullable) { auto* nullable_col = assert_cast(unwrap_col); EXPECT_FALSE(nullable_col->is_null_at(0)); unwrap_col = nullable_col->get_nested_column_ptr().get(); } auto* array_col = assert_cast(unwrap_col); EXPECT_EQ(array_col->get_offsets()[0], expect_num); auto* data_col = array_col->get_data_ptr().get(); EXPECT_EQ(data_col->size(), expect_num); for (size_t i = 0; i < expect_num; ++i) { check_column_basic(data_col, i, i); } } template void add_elements(MutableColumnPtr& input_col, size_t input_nums) { //fill column: [0..input_nums-1] using FieldType = typename DataType::FieldType; Field field; for (size_t i = 0; i < input_nums; ++i) { if constexpr (std::is_same_v) { auto item = std::string("item") + std::to_string(i); input_col->insert_data(item.c_str(), item.size()); EXPECT_EQ(item, input_col->get_data_at(i).to_string()); } else if constexpr (std::is_same_v) { BitmapValue bitmap; bitmap.add(i); input_col->insert_data(reinterpret_cast(&bitmap), sizeof(bitmap)); } else if constexpr (std::is_same_v) { HyperLogLog hll; hll.update(i); input_col->insert_data(reinterpret_cast(&hll), sizeof(hll)); } else { auto item = FieldType(static_cast(i)); input_col->insert_data(reinterpret_cast(&item), 0); } } EXPECT_EQ(input_col->size(), input_nums); } template void agg_replace_add_elements(MutableColumnPtr& input_col, AggregateFunctionPtr agg_function, AggregateDataPtr place, size_t input_nums) { //fill column: [0..input_nums-1] add_elements(input_col, input_nums); const IColumn* column[1] = {input_col.get()}; for (int i = 0; i < input_col->size(); i++) { agg_function->add(place, column, i, &_agg_arena_pool); } } template void array_add_elements(MutableColumnPtr& input_col, size_t input_nums) { //fill array column: [[],[0],[0,1]..[0..input_nums-1]] using FieldType = typename DataType::FieldType; Field field; for (int32_t i = 0; i <= input_nums; ++i) { doris::vectorized::Array array(i); for (int32_t j = 0; j < i; ++j) { if constexpr (std::is_same_v) { auto item = std::string("item") + std::to_string(j); array[j] = std::move(item); } else if constexpr (IsDecimalNumber) { auto item = FieldType(static_cast(j)); array[j] = std::move(DecimalField(item, 20)); } else { array[j] = std::move(FieldType(static_cast(j))); } } input_col->insert(array); } EXPECT_EQ(input_col->size(), input_nums + 1); } template void agg_replace_array_add_elements(MutableColumnPtr& input_col, AggregateFunctionPtr agg_function, AggregateDataPtr place, size_t input_nums) { //fill array column: [[],[0],[0,1]..[0..input_nums-1]] array_add_elements(input_col, input_nums); const IColumn* column[1] = {input_col.get()}; for (size_t i = 0; i < input_col->size(); ++i) { agg_function->add(place, column, i, &_agg_arena_pool); } } template vectorized::DataTypePtr get_data_type() { vectorized::DataTypePtr data_type = get_basic_type(); if constexpr (nullable) { data_type = std::make_shared(data_type); } if constexpr (array) { data_type = std::make_shared(data_type); if constexpr (nullable) { data_type = std::make_shared(data_type); } } return data_type; } template vectorized::DataTypePtr get_basic_type() { using FieldType = typename DataType::FieldType; if constexpr (IsDecimalNumber) { //decimal column get_int will return (data * scale), so let scale be 1. return std::make_shared(27, 1); } return std::make_shared(); } template void test_agg_replace(const std::string& fn_name, size_t input_nums, size_t expect_num) { vectorized::DataTypePtr data_type = get_data_type(); DataTypes data_types = {data_type}; LOG(INFO) << "test_agg_replace for " << fn_name << "(" << data_types[0]->get_name() << ")"; AggregateFunctionSimpleFactory factory = AggregateFunctionSimpleFactory::instance(); auto agg_function = factory.get(fn_name, data_types, nullable); EXPECT_NE(agg_function, nullptr); std::unique_ptr memory(new char[agg_function->size_of_data()]); AggregateDataPtr place = memory.get(); agg_function->create(place); //EXPECT_EQ(3, 0); auto data_column = data_type->create_column(); agg_replace_add_elements(data_column, agg_function, place, input_nums); //EXPECT_EQ(4, 0); auto column_result = data_type->create_column(); agg_function->insert_result_into(place, *column_result); check_column_basic(column_result.get(), expect_num); agg_function->destroy(place); } template void test_agg_array_replace(const std::string& fn_name, size_t input_nums, size_t expect_num) { vectorized::DataTypePtr data_type = get_data_type(); DataTypes data_types = {data_type}; LOG(INFO) << "test_agg_replace for " << fn_name << "(" << data_types[0]->get_name() << ")"; AggregateFunctionSimpleFactory factory = AggregateFunctionSimpleFactory::instance(); auto agg_function = factory.get(fn_name, data_types, nullable); EXPECT_NE(agg_function, nullptr); std::unique_ptr memory(new char[agg_function->size_of_data()]); AggregateDataPtr place = memory.get(); agg_function->create(place); auto input_column = data_type->create_column(); agg_replace_array_add_elements(input_column, agg_function, place, input_nums); auto column_result = data_type->create_column(); agg_function->insert_result_into(place, *column_result); check_column_array(column_result.get(), expect_num); agg_function->destroy(place); } template void test_basic_data(int8_t input_nums) { vectorized::DataTypePtr data_type = get_data_type(); auto data_column = data_type->create_column(); add_elements(data_column, input_nums); EXPECT_EQ(input_nums, data_column->size()); //test Value { Value value; EXPECT_TRUE(value.is_null()); for (int64_t i = 0; i < input_nums; ++i) { value.set_value(data_column.get(), i); EXPECT_FALSE(value.is_null()); auto to_column = data_type->create_column(); if constexpr (nullable) { auto& nullable_col = assert_cast(*to_column); value.insert_into(nullable_col.get_nested_column()); } else { value.insert_into(*to_column); } EXPECT_EQ(1, to_column->size()); check_column_basic(to_column.get(), i); } } //test CopiedValue { CopiedValue value; EXPECT_TRUE(value.is_null()); for (int64_t i = 0; i < input_nums; ++i) { value.set_value(data_column.get(), i); EXPECT_FALSE(value.is_null()); auto to_column = data_type->create_column(); if constexpr (nullable) { auto& nullable_col = assert_cast(*to_column); value.insert_into(nullable_col.get_nested_column()); } else { value.insert_into(*to_column); } EXPECT_EQ(1, to_column->size()); check_column_basic(to_column.get(), i); } } } template void test_array_data(int8_t input_nums) { vectorized::DataTypePtr data_type = get_data_type(); auto data_column = data_type->create_column(); array_add_elements(data_column, input_nums); EXPECT_EQ(input_nums + 1, data_column->size()); //test Value { Value value; EXPECT_TRUE(value.is_null()); for (int64_t i = 0; i <= input_nums; ++i) { value.set_value(data_column.get(), i); EXPECT_FALSE(value.is_null()); auto to_column = data_type->create_column(); if constexpr (nullable) { auto& nullable_col = assert_cast(*to_column); value.insert_into(nullable_col.get_nested_column()); } else { value.insert_into(*to_column); } EXPECT_EQ(1, to_column->size()); check_column_array(to_column.get(), i); } } //test CopiedValue { CopiedValue value; EXPECT_TRUE(value.is_null()); for (int64_t i = 0; i <= input_nums; ++i) { value.set_value(data_column.get(), i); EXPECT_FALSE(value.is_null()); auto to_column = data_type->create_column(); if constexpr (nullable) { auto& nullable_col = assert_cast(*to_column); value.insert_into(nullable_col.get_nested_column()); } else { value.insert_into(*to_column); } EXPECT_EQ(1, to_column->size()); check_column_array(to_column.get(), i); } } } private: Arena _agg_arena_pool; }; TEST_F(VAggReplaceTest, test_basic_data) { test_basic_data(11); test_basic_data(11); test_basic_data(11); test_basic_data(11); test_basic_data(11); test_basic_data, ColumnDecimal128V2, false>(11); test_basic_data(11); test_basic_data(11); test_basic_data(11); test_basic_data(11); } TEST_F(VAggReplaceTest, test_array_data) { test_array_data(11); test_array_data(11); test_array_data(11); test_array_data(11); test_array_data(11); test_array_data, ColumnArray, false>(11); test_array_data(11); test_array_data(11); test_array_data(11); test_array_data(11); } TEST_F(VAggReplaceTest, test_basic_replace_reader) { test_agg_replace("replace_reader", 10, 0); test_agg_replace("replace_reader", 10, 0); test_agg_replace("replace_reader", 10, 0); test_agg_replace("replace_reader", 10, 0); test_agg_replace("replace_reader", 10, 0); test_agg_replace, false>("replace_reader", 10, 0); test_agg_replace("replace_reader", 10, 0); test_agg_replace("replace_reader", 10, 0); test_agg_replace("replace_reader", 10, 0); test_agg_replace("replace_reader", 10, 0); test_agg_replace("replace_reader", 10, 0); test_agg_replace("replace_reader", 10, 0); test_agg_replace("replace_reader", 10, 0); test_agg_replace("replace_reader", 10, 0); test_agg_replace("replace_reader", 10, 0); test_agg_replace("replace_reader", 10, 0); test_agg_replace("replace_reader", 10, 0); test_agg_replace, true>("replace_reader", 10, 0); test_agg_replace("replace_reader", 10, 0); test_agg_replace("replace_reader", 10, 0); test_agg_replace("replace_reader", 10, 0); test_agg_replace("replace_reader", 10, 0); test_agg_replace("replace_reader", 10, 0); } TEST_F(VAggReplaceTest, test_basic_replace_load) { test_agg_replace("replace_load", 10, 9); test_agg_replace("replace_load", 10, 9); test_agg_replace("replace_load", 10, 9); test_agg_replace("replace_load", 10, 9); test_agg_replace("replace_load", 10, 9); test_agg_replace, false>("replace_load", 10, 9); test_agg_replace("replace_load", 10, 9); test_agg_replace("replace_load", 10, 9); test_agg_replace("replace_load", 10, 9); test_agg_replace("replace_load", 10, 9); test_agg_replace("replace_load", 10, 9); test_agg_replace("replace_load", 10, 9); test_agg_replace("replace_load", 10, 9); test_agg_replace("replace_load", 10, 9); test_agg_replace("replace_load", 10, 9); test_agg_replace("replace_load", 10, 9); test_agg_replace, true>("replace_load", 10, 9); test_agg_replace("replace_load", 10, 9); test_agg_replace("replace_load", 10, 9); test_agg_replace("replace_load", 10, 9); test_agg_replace("replace_load", 10, 9); test_agg_replace("replace_load", 10, 9); } TEST_F(VAggReplaceTest, test_array_replace_reader) { test_agg_array_replace("replace_reader", 10, 0); test_agg_array_replace("replace_reader", 10, 0); test_agg_array_replace("replace_reader", 10, 0); test_agg_array_replace("replace_reader", 10, 0); test_agg_array_replace("replace_reader", 10, 0); test_agg_array_replace, false>("replace_reader", 10, 0); test_agg_array_replace("replace_reader", 10, 0); test_agg_array_replace("replace_reader", 10, 0); test_agg_array_replace("replace_reader", 10, 0); test_agg_array_replace("replace_reader", 10, 0); test_agg_array_replace("replace_reader", 10, 0); test_agg_array_replace("replace_reader", 10, 0); test_agg_array_replace("replace_reader", 10, 0); test_agg_array_replace("replace_reader", 10, 0); test_agg_array_replace, true>("replace_reader", 10, 0); test_agg_array_replace("replace_reader", 10, 0); test_agg_array_replace("replace_reader", 10, 0); test_agg_array_replace("replace_reader", 10, 0); } TEST_F(VAggReplaceTest, test_array_replace_load) { test_agg_array_replace("replace_load", 10, 10); test_agg_array_replace("replace_load", 10, 10); test_agg_array_replace("replace_load", 10, 10); test_agg_array_replace("replace_load", 10, 10); test_agg_array_replace("replace_load", 10, 10); test_agg_array_replace, false>("replace_load", 10, 10); test_agg_array_replace("replace_load", 10, 10); test_agg_array_replace("replace_load", 10, 10); test_agg_array_replace("replace_load", 10, 10); test_agg_array_replace("replace_load", 10, 10); test_agg_array_replace("replace_load", 10, 10); test_agg_array_replace("replace_load", 10, 10); test_agg_array_replace("replace_load", 10, 10); test_agg_array_replace("replace_load", 10, 10); test_agg_array_replace, true>("replace_load", 10, 10); test_agg_array_replace("replace_load", 10, 10); test_agg_array_replace("replace_load", 10, 10); test_agg_array_replace("replace_load", 10, 10); } } // namespace doris::vectorized