Currently, there are some useless includes in the codebase. We can use a tool named include-what-you-use to optimize these includes. By using a strict include-what-you-use policy, we can get lots of benefits from it.
229 lines
8.7 KiB
C++
229 lines
8.7 KiB
C++
// Licensed to the Apache Software Foundation (ASF) under one
|
|
// or more contributor license agreements. See the NOTICE file
|
|
// distributed with this work for additional information
|
|
// regarding copyright ownership. The ASF licenses this file
|
|
// to you under the Apache License, Version 2.0 (the
|
|
// "License"); you may not use this file except in compliance
|
|
// with the License. You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing,
|
|
// software distributed under the License is distributed on an
|
|
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
// KIND, either express or implied. See the License for the
|
|
// specific language governing permissions and limitations
|
|
// under the License.
|
|
|
|
#include "util/histogram.h"
|
|
|
|
#include <stdio.h>
|
|
|
|
#include <algorithm>
|
|
#include <cinttypes>
|
|
#include <cmath>
|
|
#include <limits>
|
|
#include <utility>
|
|
|
|
namespace doris {
|
|
|
|
HistogramBucketMapper::HistogramBucketMapper() {
|
|
// If you change this, you also need to change
|
|
// size of array buckets_ in HistogramStat
|
|
_bucket_values = {1, 2};
|
|
_value_index_map = {{1, 0}, {2, 1}};
|
|
double bucket_val = static_cast<double>(_bucket_values.back());
|
|
while ((bucket_val = 1.5 * bucket_val) <=
|
|
static_cast<double>(std::numeric_limits<uint64_t>::max())) {
|
|
_bucket_values.push_back(static_cast<uint64_t>(bucket_val));
|
|
// Extracts two most significant digits to make histogram buckets more
|
|
// human-readable. E.g., 172 becomes 170.
|
|
uint64_t pow_of_ten = 1;
|
|
while (_bucket_values.back() / 10 > 10) {
|
|
_bucket_values.back() /= 10;
|
|
pow_of_ten *= 10;
|
|
}
|
|
_bucket_values.back() *= pow_of_ten;
|
|
_value_index_map[_bucket_values.back()] = _bucket_values.size() - 1;
|
|
}
|
|
_max_bucket_value = _bucket_values.back();
|
|
_min_bucket_value = _bucket_values.front();
|
|
}
|
|
|
|
size_t HistogramBucketMapper::index_for_value(const uint64_t& value) const {
|
|
if (value >= _max_bucket_value) {
|
|
return _bucket_values.size() - 1;
|
|
} else if (value >= _min_bucket_value) {
|
|
std::map<uint64_t, uint64_t>::const_iterator lowerBound =
|
|
_value_index_map.lower_bound(value);
|
|
if (lowerBound != _value_index_map.end()) {
|
|
return static_cast<size_t>(lowerBound->second);
|
|
} else {
|
|
return 0;
|
|
}
|
|
} else {
|
|
return 0;
|
|
}
|
|
}
|
|
|
|
namespace {
|
|
const HistogramBucketMapper bucket_mapper;
|
|
}
|
|
|
|
HistogramStat::HistogramStat() : _num_buckets(bucket_mapper.bucket_count()) {
|
|
DCHECK(_num_buckets == sizeof(_buckets) / sizeof(*_buckets));
|
|
clear();
|
|
}
|
|
|
|
void HistogramStat::clear() {
|
|
_min.store(bucket_mapper.last_value(), std::memory_order_relaxed);
|
|
_max.store(0, std::memory_order_relaxed);
|
|
_num.store(0, std::memory_order_relaxed);
|
|
_sum.store(0, std::memory_order_relaxed);
|
|
_sum_squares.store(0, std::memory_order_relaxed);
|
|
for (unsigned int b = 0; b < _num_buckets; b++) {
|
|
_buckets[b].store(0, std::memory_order_relaxed);
|
|
}
|
|
};
|
|
|
|
bool HistogramStat::is_empty() const {
|
|
return num() == 0;
|
|
}
|
|
|
|
void HistogramStat::add(const uint64_t& value) {
|
|
// This function is designed to be lock free, as it's in the critical path
|
|
// of any operation. Each individual value is atomic and the order of updates
|
|
// by concurrent threads is tolerable.
|
|
const size_t index = bucket_mapper.index_for_value(value);
|
|
DCHECK(index < _num_buckets);
|
|
_buckets[index].store(_buckets[index].load(std::memory_order_relaxed) + 1,
|
|
std::memory_order_relaxed);
|
|
|
|
uint64_t old_min = min();
|
|
if (value < old_min) {
|
|
_min.store(value, std::memory_order_relaxed);
|
|
}
|
|
|
|
uint64_t old_max = max();
|
|
if (value > old_max) {
|
|
_max.store(value, std::memory_order_relaxed);
|
|
}
|
|
|
|
_num.store(_num.load(std::memory_order_relaxed) + 1, std::memory_order_relaxed);
|
|
_sum.store(_sum.load(std::memory_order_relaxed) + value, std::memory_order_relaxed);
|
|
_sum_squares.store(_sum_squares.load(std::memory_order_relaxed) + value * value,
|
|
std::memory_order_relaxed);
|
|
}
|
|
|
|
void HistogramStat::merge(const HistogramStat& other) {
|
|
// This function needs to be performed with the outer lock acquired
|
|
// However, atomic operation on every member is still need, since Add()
|
|
// requires no lock and value update can still happen concurrently
|
|
uint64_t old_min = min();
|
|
uint64_t other_min = other.min();
|
|
while (other_min < old_min && !_min.compare_exchange_weak(old_min, other_min)) {
|
|
}
|
|
|
|
uint64_t old_max = max();
|
|
uint64_t other_max = other.max();
|
|
while (other_max > old_max && !_max.compare_exchange_weak(old_max, other_max)) {
|
|
}
|
|
|
|
_num.fetch_add(other.num(), std::memory_order_relaxed);
|
|
_sum.fetch_add(other.sum(), std::memory_order_relaxed);
|
|
_sum_squares.fetch_add(other.sum_squares(), std::memory_order_relaxed);
|
|
for (unsigned int b = 0; b < _num_buckets; b++) {
|
|
_buckets[b].fetch_add(other.bucket_at(b), std::memory_order_relaxed);
|
|
}
|
|
}
|
|
|
|
double HistogramStat::median() const {
|
|
return percentile(50.0);
|
|
}
|
|
|
|
double HistogramStat::percentile(double p) const {
|
|
double threshold = num() * (p / 100.0);
|
|
uint64_t cumulative_sum = 0;
|
|
for (unsigned int b = 0; b < _num_buckets; b++) {
|
|
uint64_t bucket_value = bucket_at(b);
|
|
cumulative_sum += bucket_value;
|
|
if (cumulative_sum >= threshold) {
|
|
// Scale linearly within this bucket
|
|
uint64_t left_point = (b == 0) ? 0 : bucket_mapper.bucket_limit(b - 1);
|
|
uint64_t right_point = bucket_mapper.bucket_limit(b);
|
|
uint64_t left_sum = cumulative_sum - bucket_value;
|
|
uint64_t right_sum = cumulative_sum;
|
|
double pos = 0;
|
|
uint64_t right_left_diff = right_sum - left_sum;
|
|
if (right_left_diff != 0) {
|
|
pos = (threshold - left_sum) / right_left_diff;
|
|
}
|
|
double r = left_point + (right_point - left_point) * pos;
|
|
uint64_t cur_min = min();
|
|
uint64_t cur_max = max();
|
|
if (r < cur_min) r = static_cast<double>(cur_min);
|
|
if (r > cur_max) r = static_cast<double>(cur_max);
|
|
return r;
|
|
}
|
|
}
|
|
return static_cast<double>(max());
|
|
}
|
|
|
|
double HistogramStat::average() const {
|
|
uint64_t cur_num = num();
|
|
uint64_t cur_sum = sum();
|
|
if (cur_num == 0) return 0;
|
|
return static_cast<double>(cur_sum) / static_cast<double>(cur_num);
|
|
}
|
|
|
|
double HistogramStat::standard_deviation() const {
|
|
uint64_t cur_num = num();
|
|
uint64_t cur_sum = sum();
|
|
uint64_t cur_sum_squares = sum_squares();
|
|
if (cur_num == 0) return 0;
|
|
double variance = static_cast<double>(cur_sum_squares * cur_num - cur_sum * cur_sum) /
|
|
static_cast<double>(cur_num * cur_num);
|
|
return std::sqrt(variance);
|
|
}
|
|
std::string HistogramStat::to_string() const {
|
|
uint64_t cur_num = num();
|
|
std::string r;
|
|
char buf[1650];
|
|
snprintf(buf, sizeof(buf), "Count: %" PRIu64 " Average: %.4f StdDev: %.2f\n", cur_num,
|
|
average(), standard_deviation());
|
|
r.append(buf);
|
|
snprintf(buf, sizeof(buf), "Min: %" PRIu64 " Median: %.4f Max: %" PRIu64 "\n",
|
|
(cur_num == 0 ? 0 : min()), median(), (cur_num == 0 ? 0 : max()));
|
|
r.append(buf);
|
|
snprintf(buf, sizeof(buf),
|
|
"Percentiles: "
|
|
"P50: %.2f P75: %.2f P99: %.2f P99.9: %.2f P99.99: %.2f\n",
|
|
percentile(50), percentile(75), percentile(99), percentile(99.9), percentile(99.99));
|
|
r.append(buf);
|
|
r.append("------------------------------------------------------\n");
|
|
if (cur_num == 0) return r; // all buckets are empty
|
|
const double mult = 100.0 / cur_num;
|
|
uint64_t cumulative_sum = 0;
|
|
for (unsigned int b = 0; b < _num_buckets; b++) {
|
|
uint64_t bucket_value = bucket_at(b);
|
|
if (bucket_value <= 0.0) continue;
|
|
cumulative_sum += bucket_value;
|
|
snprintf(buf, sizeof(buf), "%c %7" PRIu64 ", %7" PRIu64 " ] %8" PRIu64 " %7.3f%% %7.3f%% ",
|
|
(b == 0) ? '[' : '(',
|
|
(b == 0) ? 0 : bucket_mapper.bucket_limit(b - 1), // left
|
|
bucket_mapper.bucket_limit(b), // right
|
|
bucket_value, // count
|
|
(mult * bucket_value), // percentage
|
|
(mult * cumulative_sum)); // cumulative percentage
|
|
r.append(buf);
|
|
|
|
// Add hash marks based on percentage; 20 marks for 100%.
|
|
size_t marks = static_cast<size_t>(mult * bucket_value / 5 + 0.5);
|
|
r.append(marks, '#');
|
|
r.push_back('\n');
|
|
}
|
|
return r;
|
|
}
|
|
|
|
} // namespace doris
|