// Licensed to the Apache Software Foundation (ASF) under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information // regarding copyright ownership. The ASF licenses this file // to you under the Apache License, Version 2.0 (the // "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, // software distributed under the License is distributed on an // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY // KIND, either express or implied. See the License for the // specific language governing permissions and limitations // under the License. #include "olap/hll.h" #include #include #include "gtest/gtest_pred_impl.h" #include "util/hash_util.hpp" #include "util/slice.h" namespace doris { class TestHll : public testing::Test { public: virtual ~TestHll() {} }; static uint64_t hash(uint64_t value) { return HashUtil::murmur_hash64A(&value, 8, 0); } // keep logic same with java version in fe when you change hll_test.cpp,see HllTest.java TEST_F(TestHll, Normal) { uint8_t buf[HLL_REGISTERS_COUNT + 1] = {0}; // empty { Slice str((char*)buf, 0); EXPECT_FALSE(HyperLogLog::is_valid(str)); } // check unknown type { buf[0] = 60; Slice str((char*)buf, 1); EXPECT_FALSE(HyperLogLog::is_valid(str)); } // empty { HyperLogLog empty_hll; int len = empty_hll.serialize(buf); EXPECT_EQ(1, len); HyperLogLog test_hll(Slice((char*)buf, len)); EXPECT_EQ(0, test_hll.estimate_cardinality()); // check serialize { Slice str((char*)buf, len); EXPECT_TRUE(HyperLogLog::is_valid(str)); } { Slice str((char*)buf, len + 1); EXPECT_FALSE(HyperLogLog::is_valid(str)); } } // explicit [0. 100) HyperLogLog explicit_hll; { for (int i = 0; i < 100; ++i) { explicit_hll.update(hash(i)); } int len = explicit_hll.serialize(buf); EXPECT_EQ(1 + 1 + 100 * 8, len); // check serialize { Slice str((char*)buf, len); EXPECT_TRUE(HyperLogLog::is_valid(str)); } { Slice str((char*)buf, 1); EXPECT_FALSE(HyperLogLog::is_valid(str)); } HyperLogLog test_hll(Slice((char*)buf, len)); test_hll.update(hash(0)); { HyperLogLog other_hll; for (int i = 0; i < 100; ++i) { other_hll.update(hash(i)); } test_hll.merge(other_hll); } EXPECT_EQ(100, test_hll.estimate_cardinality()); } // sparse [1024, 2048) HyperLogLog sparse_hll; { for (int i = 0; i < 1024; ++i) { sparse_hll.update(hash(i + 1024)); } int len = sparse_hll.serialize(buf); EXPECT_TRUE(len < HLL_REGISTERS_COUNT + 1); // check serialize { Slice str((char*)buf, len); EXPECT_TRUE(HyperLogLog::is_valid(str)); } { Slice str((char*)buf, 1 + 3); EXPECT_FALSE(HyperLogLog::is_valid(str)); } HyperLogLog test_hll(Slice((char*)buf, len)); test_hll.update(hash(1024)); { HyperLogLog other_hll; for (int i = 0; i < 1024; ++i) { other_hll.update(hash(i + 1024)); } test_hll.merge(other_hll); } auto cardinality = test_hll.estimate_cardinality(); EXPECT_EQ(sparse_hll.estimate_cardinality(), cardinality); // 2% error rate EXPECT_TRUE(cardinality > 1000 && cardinality < 1045); } // full [64 * 1024, 128 * 1024) HyperLogLog full_hll; { for (int i = 0; i < 64 * 1024; ++i) { full_hll.update(hash(64 * 1024 + i)); } int len = full_hll.serialize(buf); EXPECT_EQ(HLL_REGISTERS_COUNT + 1, len); // check serialize { Slice str((char*)buf, len); EXPECT_TRUE(HyperLogLog::is_valid(str)); } { Slice str((char*)buf, len + 1); EXPECT_FALSE(HyperLogLog::is_valid(str)); } HyperLogLog test_hll(Slice((char*)buf, len)); auto cardinality = test_hll.estimate_cardinality(); EXPECT_EQ(full_hll.estimate_cardinality(), cardinality); // 2% error rate EXPECT_TRUE(cardinality > 62 * 1024 && cardinality < 66 * 1024); } // merge explicit to empty_hll { HyperLogLog new_explicit_hll; new_explicit_hll.merge(explicit_hll); EXPECT_EQ(100, new_explicit_hll.estimate_cardinality()); // merge another explicit { HyperLogLog other_hll; for (int i = 100; i < 200; ++i) { other_hll.update(hash(i)); } // this is converted to full other_hll.merge(new_explicit_hll); EXPECT_TRUE(other_hll.estimate_cardinality() > 190); } // merge full { new_explicit_hll.merge(full_hll); EXPECT_TRUE(new_explicit_hll.estimate_cardinality() > full_hll.estimate_cardinality()); } } // merge sparse to empty_hll { HyperLogLog new_sparse_hll; new_sparse_hll.merge(sparse_hll); EXPECT_EQ(sparse_hll.estimate_cardinality(), new_sparse_hll.estimate_cardinality()); // merge explicit new_sparse_hll.merge(explicit_hll); EXPECT_TRUE(new_sparse_hll.estimate_cardinality() > sparse_hll.estimate_cardinality()); // merge full new_sparse_hll.merge(full_hll); EXPECT_TRUE(new_sparse_hll.estimate_cardinality() > full_hll.estimate_cardinality()); } } TEST_F(TestHll, InvalidPtr) { { HyperLogLog hll(Slice((char*)nullptr, 0)); EXPECT_EQ(0, hll.estimate_cardinality()); } { uint8_t buf[64] = {60}; HyperLogLog hll(Slice(buf, 1)); EXPECT_EQ(0, hll.estimate_cardinality()); } } } // namespace doris