There are some long loops and sleeps in unit tests, it will cost a very long time to run all unit tests, especially run in TSAN mode. This patch speed up unit tests by shortening long loops and sleeps, on my environment all unit tests finished in 1 minite. It's useful to do basic functional unit tests. You can switch to run in this mode by adding a new environment variable 'DORIS_ALLOW_SLOW_TESTS'. For example, you can set: export DORIS_ALLOW_SLOW_TESTS=1 and also you can disable it by setting: export DORIS_ALLOW_SLOW_TESTS=0
255 lines
7.7 KiB
C++
255 lines
7.7 KiB
C++
// Licensed to the Apache Software Foundation (ASF) under one
|
|
// or more contributor license agreements. See the NOTICE file
|
|
// distributed with this work for additional information
|
|
// regarding copyright ownership. The ASF licenses this file
|
|
// to you under the Apache License, Version 2.0 (the
|
|
// "License"); you may not use this file except in compliance
|
|
// with the License. You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing,
|
|
// software distributed under the License is distributed on an
|
|
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
// KIND, either express or implied. See the License for the
|
|
// specific language governing permissions and limitations
|
|
// under the License.
|
|
|
|
#include "util/tdigest.h"
|
|
|
|
#include <gtest/gtest.h>
|
|
|
|
#include <random>
|
|
|
|
#include "test_util/test_util.h"
|
|
|
|
namespace doris {
|
|
|
|
class TDigestTest : public ::testing::Test {
|
|
protected:
|
|
// You can remove any or all of the following functions if its body
|
|
// is empty.
|
|
TDigestTest() {
|
|
// You can do set-up work for each test here.
|
|
}
|
|
|
|
virtual ~TDigestTest() {
|
|
// You can do clean-up work that doesn't throw exceptions here.
|
|
}
|
|
|
|
// If the constructor and destructor are not enough for setting up
|
|
// and cleaning up each test, you can define the following methods:
|
|
|
|
virtual void SetUp() {
|
|
// Code here will be called immediately after the constructor (right
|
|
// before each test).
|
|
}
|
|
|
|
virtual void TearDown() {
|
|
// Code here will be called immediately after each test (right
|
|
// before the destructor).
|
|
}
|
|
|
|
static void SetUpTestCase() {
|
|
static bool initialized = false;
|
|
if (!initialized) {
|
|
FLAGS_logtostderr = true;
|
|
google::InstallFailureSignalHandler();
|
|
google::InitGoogleLogging("testing::TDigestTest");
|
|
initialized = true;
|
|
}
|
|
}
|
|
|
|
// Objects declared here can be used by all tests in the test case for Foo.
|
|
};
|
|
|
|
static double quantile(const double q, const std::vector<double>& values) {
|
|
double q1;
|
|
if (values.size() == 0) {
|
|
q1 = NAN;
|
|
} else if (q == 1 || values.size() == 1) {
|
|
q1 = values[values.size() - 1];
|
|
} else {
|
|
auto index = q * values.size();
|
|
if (index < 0.5) {
|
|
q1 = values[0];
|
|
} else if (values.size() - index < 0.5) {
|
|
q1 = values[values.size() - 1];
|
|
} else {
|
|
index -= 0.5;
|
|
const int intIndex = static_cast<int>(index);
|
|
q1 = values[intIndex + 1] * (index - intIndex) +
|
|
values[intIndex] * (intIndex + 1 - index);
|
|
}
|
|
}
|
|
return q1;
|
|
}
|
|
|
|
TEST_F(TDigestTest, CrashAfterMerge) {
|
|
TDigest digest(1000);
|
|
std::uniform_real_distribution<> reals(0.0, 1.0);
|
|
std::random_device gen;
|
|
for (int i = 0; i < LOOP_LESS_OR_MORE(100, 100000); i++) {
|
|
digest.add(reals(gen));
|
|
}
|
|
digest.compress();
|
|
|
|
TDigest digest2(1000);
|
|
digest2.merge(&digest);
|
|
digest2.quantile(0.5);
|
|
}
|
|
|
|
TEST_F(TDigestTest, EmptyDigest) {
|
|
TDigest digest(100);
|
|
EXPECT_EQ(0, digest.processed().size());
|
|
}
|
|
|
|
TEST_F(TDigestTest, SingleValue) {
|
|
TDigest digest(100);
|
|
std::random_device gen;
|
|
std::uniform_real_distribution<> dist(0, 1000);
|
|
const auto value = dist(gen);
|
|
digest.add(value);
|
|
std::uniform_real_distribution<> dist2(0, 1.0);
|
|
const double q = dist2(gen);
|
|
EXPECT_NEAR(value, digest.quantile(0.0), 0.001f);
|
|
EXPECT_NEAR(value, digest.quantile(q), 0.001f);
|
|
EXPECT_NEAR(value, digest.quantile(1.0), 0.001f);
|
|
}
|
|
|
|
TEST_F(TDigestTest, FewValues) {
|
|
// When there are few values in the tree, quantiles should be exact
|
|
TDigest digest(1000);
|
|
|
|
std::random_device gen;
|
|
std::uniform_real_distribution<> reals(0.0, 100.0);
|
|
std::uniform_int_distribution<> dist(0, 10);
|
|
std::uniform_int_distribution<> bools(0, 1);
|
|
std::uniform_real_distribution<> qvalue(0.0, 1.0);
|
|
|
|
const auto length = 10; //dist(gen);
|
|
|
|
std::vector<double> values;
|
|
values.reserve(length);
|
|
for (int i = 0; i < length; ++i) {
|
|
auto const value = (i == 0 || bools(gen)) ? reals(gen) : values[i - 1];
|
|
digest.add(value);
|
|
values.push_back(value);
|
|
}
|
|
std::sort(values.begin(), values.end());
|
|
digest.compress();
|
|
|
|
EXPECT_EQ(digest.processed().size(), values.size());
|
|
|
|
std::vector<double> testValues{0.0, 1.0e-10, qvalue(gen), 0.5, 1.0 - 1e-10, 1.0};
|
|
for (auto q : testValues) {
|
|
double q1 = quantile(q, values);
|
|
auto q2 = digest.quantile(q);
|
|
if (std::isnan(q1)) {
|
|
EXPECT_TRUE(std::isnan(q2));
|
|
} else {
|
|
EXPECT_NEAR(q1, q2, 0.03) << "q = " << q;
|
|
}
|
|
}
|
|
}
|
|
|
|
TEST_F(TDigestTest, MoreThan2BValues) {
|
|
TDigest digest(1000);
|
|
|
|
std::random_device gen;
|
|
std::uniform_real_distribution<> reals(0.0, 1.0);
|
|
for (int i = 0; i < 1000; ++i) {
|
|
const double next = reals(gen);
|
|
digest.add(next);
|
|
}
|
|
for (int i = 0; i < 10; ++i) {
|
|
const double next = reals(gen);
|
|
const auto count = 1L << 28;
|
|
digest.add(next, count);
|
|
}
|
|
EXPECT_EQ(static_cast<long>(1000 + float(10L * (1 << 28))), digest.totalWeight());
|
|
EXPECT_GT(digest.totalWeight(), std::numeric_limits<int32_t>::max());
|
|
std::vector<double> quantiles{0, 0.1, 0.5, 0.9, 1, reals(gen)};
|
|
std::sort(quantiles.begin(), quantiles.end());
|
|
auto prev = std::numeric_limits<double>::min();
|
|
for (double q : quantiles) {
|
|
const double v = digest.quantile(q);
|
|
EXPECT_GE(v, prev) << "q = " << q;
|
|
prev = v;
|
|
}
|
|
}
|
|
|
|
TEST_F(TDigestTest, MergeTest) {
|
|
TDigest digest1(1000);
|
|
TDigest digest2(1000);
|
|
|
|
digest2.add(std::vector<const TDigest*>{&digest1});
|
|
}
|
|
|
|
TEST_F(TDigestTest, TestSorted) {
|
|
TDigest digest(1000);
|
|
std::uniform_real_distribution<> reals(0.0, 1.0);
|
|
std::uniform_int_distribution<> ints(0, 10);
|
|
|
|
std::random_device gen;
|
|
for (int i = 0; i < 10000; ++i) {
|
|
digest.add(reals(gen), 1 + ints(gen));
|
|
}
|
|
digest.compress();
|
|
Centroid previous(0, 0);
|
|
for (auto centroid : digest.processed()) {
|
|
if (previous.weight() != 0) {
|
|
CHECK_LE(previous.mean(), centroid.mean());
|
|
}
|
|
previous = centroid;
|
|
}
|
|
}
|
|
|
|
TEST_F(TDigestTest, ExtremeQuantiles) {
|
|
TDigest digest(1000);
|
|
// t-digest shouldn't merge extreme nodes, but let's still test how it would
|
|
// answer to extreme quantiles in that case ('extreme' in the sense that the
|
|
// quantile is either before the first node or after the last one)
|
|
|
|
digest.add(10, 3);
|
|
digest.add(20, 1);
|
|
digest.add(40, 5);
|
|
// this group tree is roughly equivalent to the following sorted array:
|
|
// [ ?, 10, ?, 20, ?, ?, 50, ?, ? ]
|
|
// and we expect it to compute approximate missing values:
|
|
// [ 5, 10, 15, 20, 30, 40, 50, 60, 70]
|
|
std::vector<double> values{5.0, 10.0, 15.0, 20.0, 30.0, 35.0, 40.0, 45.0, 50.0};
|
|
std::vector<double> quantiles{1.5 / 9.0, 3.5 / 9.0, 6.5 / 9.0};
|
|
for (auto q : quantiles) {
|
|
EXPECT_NEAR(quantile(q, values), digest.quantile(q), 0.01) << "q = " << q;
|
|
}
|
|
}
|
|
|
|
TEST_F(TDigestTest, Montonicity) {
|
|
TDigest digest(1000);
|
|
std::uniform_real_distribution<> reals(0.0, 1.0);
|
|
std::random_device gen;
|
|
for (int i = 0; i < LOOP_LESS_OR_MORE(10, 100000); i++) {
|
|
digest.add(reals(gen));
|
|
}
|
|
|
|
double lastQuantile = -1;
|
|
double lastX = -1;
|
|
for (double z = 0; z <= 1; z += LOOP_LESS_OR_MORE(0.1, 1e-5)) {
|
|
double x = digest.quantile(z);
|
|
EXPECT_GE(x, lastX);
|
|
lastX = x;
|
|
|
|
double q = digest.cdf(z);
|
|
EXPECT_GE(q, lastQuantile);
|
|
lastQuantile = q;
|
|
}
|
|
}
|
|
|
|
} // namespace doris
|
|
|
|
int main(int argc, char** argv) {
|
|
::testing::InitGoogleTest(&argc, argv);
|
|
return RUN_ALL_TESTS();
|
|
}
|