[enhancement](mow) Add bvar for bloom filter and segment (#32355)

This commit is contained in:
abmdocrt
2024-03-21 12:27:27 +08:00
committed by yiguolei
parent b727fd6594
commit fd62af82d2
6 changed files with 74 additions and 3 deletions

View File

@ -109,6 +109,13 @@ Status PrimaryKeyIndexReader::parse_bf(io::FileReaderSPtr file_reader,
std::unique_ptr<segment_v2::BloomFilterIndexIterator> bf_iter;
RETURN_IF_ERROR(bf_index_reader.new_iterator(&bf_iter));
RETURN_IF_ERROR(bf_iter->read_bloom_filter(0, &_bf));
segment_v2::g_pk_total_bloom_filter_num << 1;
segment_v2::g_pk_total_bloom_filter_total_bytes << _bf->size();
segment_v2::g_pk_read_bloom_filter_num << 1;
segment_v2::g_pk_read_bloom_filter_total_bytes << _bf->size();
_bf_num += 1;
_bf_bytes += _bf->size();
_bf_parsed = true;
return Status::OK();

View File

@ -39,6 +39,7 @@ namespace io {
class FileWriter;
} // namespace io
namespace segment_v2 {
class PrimaryKeyIndexMetaPB;
} // namespace segment_v2
@ -98,6 +99,13 @@ class PrimaryKeyIndexReader {
public:
PrimaryKeyIndexReader() : _index_parsed(false), _bf_parsed(false) {}
~PrimaryKeyIndexReader() {
segment_v2::g_pk_total_bloom_filter_num << -_bf_num;
segment_v2::g_pk_total_bloom_filter_total_bytes << -_bf_bytes;
segment_v2::g_pk_read_bloom_filter_num << -_bf_num;
segment_v2::g_pk_read_bloom_filter_total_bytes << -_bf_bytes;
}
Status parse_index(io::FileReaderSPtr file_reader,
const segment_v2::PrimaryKeyIndexMetaPB& meta);
@ -142,6 +150,8 @@ private:
bool _bf_parsed;
std::unique_ptr<segment_v2::IndexedColumnReader> _index_reader;
std::unique_ptr<segment_v2::BloomFilter> _bf;
size_t _bf_num = 0;
uint64 _bf_bytes = 0;
};
} // namespace doris

View File

@ -17,6 +17,7 @@
#pragma once
#include <bvar/reducer.h>
#include <gen_cpp/segment_v2.pb.h>
#include <glog/logging.h>
#include <string.h>
@ -31,6 +32,22 @@
namespace doris {
namespace segment_v2 {
inline bvar::Adder<size_t> g_total_bloom_filter_num("doris_total_bloom_filter_num");
inline bvar::Adder<size_t> g_read_bloom_filter_num("doris_read_bloom_filter_num");
inline bvar::Adder<size_t> g_write_bloom_filter_num("doris_write_bloom_filter_num");
inline bvar::Adder<size_t> g_total_bloom_filter_total_bytes("doris_total_bloom_filter_bytes");
inline bvar::Adder<size_t> g_read_bloom_filter_total_bytes("doris_read_bloom_filter_bytes");
inline bvar::Adder<size_t> g_write_bloom_filter_total_bytes("doris_write_bloom_filter_bytes");
inline bvar::Adder<size_t> g_pk_total_bloom_filter_num("doris_pk_total_bloom_filter_num");
inline bvar::Adder<size_t> g_pk_read_bloom_filter_num("doris_pk_read_bloom_filter_num");
inline bvar::Adder<size_t> g_pk_write_bloom_filter_num("doris_pk_write_bloom_filter_num");
inline bvar::Adder<size_t> g_pk_total_bloom_filter_total_bytes("doris_pk_total_bloom_filter_bytes");
inline bvar::Adder<size_t> g_pk_read_bloom_filter_total_bytes("doris_pk_read_bloom_filter_bytes");
inline bvar::Adder<size_t> g_pk_write_bloom_filter_total_bytes("doris_pk_write_bloom_filter_bytes");
struct BloomFilterOptions {
// false positive probability
double fpp = 0.05;
@ -55,12 +72,23 @@ public:
static Status create(BloomFilterAlgorithmPB algorithm, std::unique_ptr<BloomFilter>* bf,
size_t bf_size = 0);
BloomFilter() : _data(nullptr), _num_bytes(0), _size(0), _has_null(nullptr) {}
BloomFilter() : _data(nullptr), _num_bytes(0), _size(0), _has_null(nullptr) {
g_total_bloom_filter_num << 1;
}
virtual ~BloomFilter() {
if (_data) {
if (_is_write) {
g_write_bloom_filter_total_bytes << -_size;
g_write_bloom_filter_num << -1;
} else {
g_read_bloom_filter_total_bytes << -_size;
g_read_bloom_filter_num << -1;
}
g_total_bloom_filter_total_bytes << -_size;
delete[] _data;
}
g_total_bloom_filter_num << -1;
}
virtual bool is_ngram_bf() const { return false; }
@ -86,6 +114,10 @@ public:
memset(_data, 0, _size);
_has_null = (bool*)(_data + _num_bytes);
*_has_null = false;
_is_write = true;
g_write_bloom_filter_num << 1;
g_write_bloom_filter_total_bytes << _size;
g_total_bloom_filter_total_bytes << _size;
return Status::OK();
}
@ -107,6 +139,9 @@ public:
_num_bytes = _size - 1;
DCHECK((_num_bytes & (_num_bytes - 1)) == 0);
_has_null = (bool*)(_data + _num_bytes);
g_read_bloom_filter_num << 1;
g_read_bloom_filter_total_bytes << _size;
g_total_bloom_filter_total_bytes << _size;
return Status::OK();
}
@ -181,6 +216,8 @@ protected:
uint32_t _size;
// last byte's pointer in data for null flag
bool* _has_null = nullptr;
// is this bf used for write
bool _is_write = false;
private:
std::function<void(const void*, const int, const uint64_t, void*)> _hash_func;

View File

@ -26,6 +26,7 @@
#include <utility>
#include "olap/olap_common.h"
#include "olap/primary_key_index.h"
#include "olap/rowset/segment_v2/bloom_filter.h" // for BloomFilterOptions, BloomFilter
#include "olap/rowset/segment_v2/indexed_column_writer.h"
#include "olap/types.h"
@ -194,6 +195,10 @@ Status PrimaryKeyBloomFilterIndexWriterImpl::flush() {
bf->add_bytes(s->data, s->size);
}
_bf_buffer_size += bf->size();
g_pk_total_bloom_filter_num << 1;
g_pk_total_bloom_filter_total_bytes << bf->size();
g_pk_write_bloom_filter_num << 1;
g_pk_write_bloom_filter_total_bytes << bf->size();
_bfs.push_back(std::move(bf));
_values.clear();
_has_null = false;

View File

@ -76,7 +76,14 @@ public:
_has_null(false),
_bf_buffer_size(0) {}
~PrimaryKeyBloomFilterIndexWriterImpl() override = default;
~PrimaryKeyBloomFilterIndexWriterImpl() override {
for (auto& bf : _bfs) {
g_pk_total_bloom_filter_num << -1;
g_pk_total_bloom_filter_total_bytes << -bf->size();
g_pk_write_bloom_filter_num << -1;
g_pk_write_bloom_filter_total_bytes << -bf->size();
}
};
void add_values(const void* values, size_t count) override;

View File

@ -74,6 +74,8 @@
namespace doris {
namespace segment_v2 {
bvar::Adder<size_t> g_total_segment_num("doris_total_segment_num");
class InvertedIndexIterator;
Status Segment::open(io::FileSystemSPtr fs, const std::string& path, uint32_t segment_id,
@ -94,9 +96,12 @@ Segment::Segment(uint32_t segment_id, RowsetId rowset_id, TabletSchemaSPtr table
_meta_mem_usage(0),
_rowset_id(rowset_id),
_tablet_schema(tablet_schema),
_segment_meta_mem_tracker(StorageEngine::instance()->segment_meta_mem_tracker()) {}
_segment_meta_mem_tracker(StorageEngine::instance()->segment_meta_mem_tracker()) {
g_total_segment_num << 1;
}
Segment::~Segment() {
g_total_segment_num << -1;
#ifndef BE_TEST
_segment_meta_mem_tracker->release(_meta_mem_usage);
#endif