Files
doris/be/test/vec/columns/column_hash_func_test.cpp
amory b7d6a70868 [FIX](datatype) Implement hash func with array/map/struct type (#21334)
we do not Implement any hash functions in array/map/struct column , so we use sql like this will make be core

select * from (
        select
            bdp.nc_num,
            collect_list(distinct(bd.catalog_name)) as catalog_name,
            material_qty
        from
            dataease.bu_delivery_product bdp
            left join dataease.bu_trans_transfer btt on bdp.delivery_product_id = btt.delivery_product_id
            left join dataease.bu_delivery bd on bdp.delivery_id = bd.delivery_id
        where
            bd.val_status in ('10', '20', '30', '90')
            and bd.delivery_type in (0, 1, 2)
        group by
            nc_num,
            material_qty
        union
        ALL
        select
            bdp.nc_num,
            collect_list(distinct(bd.catalog_name)) as catalog_name,
            material_qty
        from
            dataease.bu_trans_transfer btt
            left join dataease.bu_delivery_product bdp on bdp.delivery_product_id = btt.delivery_product_id
            left join dataease.bu_delivery bd on bdp.delivery_id = bd.delivery_id
        where
            bd.val_status in ('10', '20', '30', '90')
            and bd.delivery_type in (0, 1, 2)
        group by
            nc_num,
            material_qty
) aa;
core :
2023-06-30 17:11:35 +08:00

191 lines
7.6 KiB
C++

// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#include <gtest/gtest-message.h>
#include <gtest/gtest-test-part.h>
#include "gtest/gtest_pred_impl.h"
#include "vec/columns/column_const.h"
#include "vec/core/field.h"
#include "vec/data_types/data_type.h"
#include "vec/data_types/data_type_array.h"
#include "vec/data_types/data_type_date.h"
#include "vec/data_types/data_type_date_time.h"
#include "vec/data_types/data_type_decimal.h"
#include "vec/data_types/data_type_map.h"
#include "vec/data_types/data_type_nullable.h"
#include "vec/data_types/data_type_number.h"
#include "vec/data_types/data_type_string.h"
#include "vec/data_types/data_type_struct.h"
namespace doris::vectorized {
DataTypes create_scala_data_types() {
DataTypePtr dt = std::make_shared<DataTypeNullable>(std::make_shared<DataTypeDateTime>());
DataTypePtr d = std::make_shared<DataTypeNullable>(std::make_shared<DataTypeDate>());
DataTypePtr dc = std::make_shared<DataTypeNullable>(vectorized::create_decimal(10, 2, false));
DataTypePtr dcv2 = std::make_shared<DataTypeNullable>(
std::make_shared<DataTypeDecimal<vectorized::Decimal128>>(27, 9));
DataTypePtr n3 = std::make_shared<DataTypeNullable>(std::make_shared<DataTypeInt128>());
DataTypePtr n1 = std::make_shared<DataTypeNullable>(std::make_shared<DataTypeInt64>());
DataTypePtr s1 = std::make_shared<DataTypeNullable>(std::make_shared<DataTypeString>());
DataTypes dataTypes;
dataTypes.push_back(dt);
dataTypes.push_back(d);
dataTypes.push_back(dc);
dataTypes.push_back(dcv2);
dataTypes.push_back(n3);
dataTypes.push_back(n1);
dataTypes.push_back(s1);
return dataTypes;
}
TEST(HashFuncTest, ArrayTypeTest) {
DataTypes dataTypes = create_scala_data_types();
std::vector<uint64_t> sip_hash_vals(1);
std::vector<uint64_t> xx_hash_vals(1);
std::vector<uint64_t> crc_hash_vals(1);
auto* __restrict sip_hashes = sip_hash_vals.data();
auto* __restrict xx_hashes = xx_hash_vals.data();
auto* __restrict crc_hashes = crc_hash_vals.data();
for (auto d : dataTypes) {
DataTypePtr a = std::make_shared<DataTypeArray>(d);
ColumnPtr col_a = a->create_column_const_with_default_value(1);
// sipHash
std::vector<SipHash> siphashs(1);
col_a->update_hashes_with_value(siphashs);
EXPECT_NO_FATAL_FAILURE(col_a->update_hashes_with_value(siphashs));
sip_hashes[0] = siphashs[0].get64();
std::cout << sip_hashes[0] << std::endl;
// xxHash
EXPECT_NO_FATAL_FAILURE(col_a->update_hashes_with_value(xx_hashes));
std::cout << xx_hashes[0] << std::endl;
// crcHash
EXPECT_NO_FATAL_FAILURE(
col_a->update_crcs_with_value(crc_hash_vals, PrimitiveType::TYPE_ARRAY));
std::cout << crc_hashes[0] << std::endl;
}
}
TEST(HashFuncTest, ArrayCornerCaseTest) {
DataTypes dataTypes = create_scala_data_types();
DataTypePtr d = std::make_shared<DataTypeInt64>();
DataTypePtr a = std::make_shared<DataTypeArray>(d);
MutableColumnPtr array_mutable_col = a->create_column();
Array a1, a2;
a1.push_back(Int64(1));
a1.push_back(Int64(2));
a1.push_back(Int64(3));
array_mutable_col->insert(a1);
array_mutable_col->insert(a1);
a2.push_back(Int64(11));
a2.push_back(Int64(12));
a2.push_back(Int64(13));
array_mutable_col->insert(a2);
EXPECT_EQ(array_mutable_col->size(), 3);
std::vector<uint64_t> sip_hash_vals(3);
std::vector<uint64_t> xx_hash_vals(3);
std::vector<uint64_t> crc_hash_vals(3);
auto* __restrict sip_hashes = sip_hash_vals.data();
auto* __restrict xx_hashes = xx_hash_vals.data();
auto* __restrict crc_hashes = crc_hash_vals.data();
// sipHash
std::vector<SipHash> siphashs(3);
array_mutable_col->update_hashes_with_value(siphashs);
EXPECT_NO_FATAL_FAILURE(array_mutable_col->update_hashes_with_value(siphashs));
sip_hashes[0] = siphashs[0].get64();
sip_hashes[1] = siphashs[1].get64();
sip_hashes[2] = siphashs[2].get64();
EXPECT_EQ(sip_hashes[0], sip_hash_vals[1]);
EXPECT_TRUE(sip_hash_vals[0] != sip_hash_vals[2]);
// xxHash
EXPECT_NO_FATAL_FAILURE(array_mutable_col->update_hashes_with_value(xx_hashes));
EXPECT_EQ(xx_hashes[0], xx_hashes[1]);
EXPECT_TRUE(xx_hashes[0] != xx_hashes[2]);
// crcHash
EXPECT_NO_FATAL_FAILURE(
array_mutable_col->update_crcs_with_value(crc_hash_vals, PrimitiveType::TYPE_ARRAY));
EXPECT_EQ(crc_hashes[0], crc_hashes[1]);
EXPECT_TRUE(xx_hashes[0] != xx_hashes[2]);
}
TEST(HashFuncTest, MapTypeTest) {
DataTypes dataTypes = create_scala_data_types();
std::vector<uint64_t> sip_hash_vals(1);
std::vector<uint64_t> xx_hash_vals(1);
std::vector<uint64_t> crc_hash_vals(1);
auto* __restrict sip_hashes = sip_hash_vals.data();
auto* __restrict xx_hashes = xx_hash_vals.data();
auto* __restrict crc_hashes = crc_hash_vals.data();
// data_type_map
for (int i = 0; i < dataTypes.size() - 1; ++i) {
DataTypePtr a = std::make_shared<DataTypeMap>(dataTypes[i], dataTypes[i + 1]);
ColumnPtr col_a = a->create_column_const_with_default_value(1);
// sipHash
std::vector<SipHash> siphashs(1);
EXPECT_NO_FATAL_FAILURE(unpack_if_const(col_a).first->update_hashes_with_value(siphashs));
sip_hashes[0] = siphashs[0].get64();
std::cout << sip_hashes[0] << std::endl;
// xxHash
EXPECT_NO_FATAL_FAILURE(unpack_if_const(col_a).first->update_hashes_with_value(xx_hashes));
std::cout << xx_hashes[0] << std::endl;
// crcHash
EXPECT_NO_FATAL_FAILURE(unpack_if_const(col_a).first->update_crcs_with_value(
crc_hash_vals, PrimitiveType::TYPE_MAP));
std::cout << crc_hashes[0] << std::endl;
}
}
TEST(HashFuncTest, StructTypeTest) {
DataTypes dataTypes = create_scala_data_types();
std::vector<uint64_t> sip_hash_vals(1);
std::vector<uint64_t> xx_hash_vals(1);
std::vector<uint64_t> crc_hash_vals(1);
auto* __restrict sip_hashes = sip_hash_vals.data();
auto* __restrict xx_hashes = xx_hash_vals.data();
auto* __restrict crc_hashes = crc_hash_vals.data();
// data_type_struct
DataTypePtr a = std::make_shared<DataTypeStruct>(dataTypes);
ColumnPtr col_a = a->create_column_const_with_default_value(1);
// sipHash
std::vector<SipHash> siphashs(1);
EXPECT_NO_FATAL_FAILURE(unpack_if_const(col_a).first->update_hashes_with_value(siphashs));
sip_hashes[0] = siphashs[0].get64();
std::cout << sip_hashes[0] << std::endl;
// xxHash
EXPECT_NO_FATAL_FAILURE(unpack_if_const(col_a).first->update_hashes_with_value(xx_hashes));
std::cout << xx_hashes[0] << std::endl;
// crcHash
EXPECT_NO_FATAL_FAILURE(unpack_if_const(col_a).first->update_crcs_with_value(
crc_hash_vals, PrimitiveType::TYPE_STRUCT));
std::cout << crc_hashes[0] << std::endl;
}
} // namespace doris::vectorized