[enhancement](mow) Add bvar for bloom filter and segment (#32355)
This commit is contained in:
@ -109,6 +109,13 @@ Status PrimaryKeyIndexReader::parse_bf(io::FileReaderSPtr file_reader,
|
||||
std::unique_ptr<segment_v2::BloomFilterIndexIterator> bf_iter;
|
||||
RETURN_IF_ERROR(bf_index_reader.new_iterator(&bf_iter));
|
||||
RETURN_IF_ERROR(bf_iter->read_bloom_filter(0, &_bf));
|
||||
segment_v2::g_pk_total_bloom_filter_num << 1;
|
||||
segment_v2::g_pk_total_bloom_filter_total_bytes << _bf->size();
|
||||
segment_v2::g_pk_read_bloom_filter_num << 1;
|
||||
segment_v2::g_pk_read_bloom_filter_total_bytes << _bf->size();
|
||||
_bf_num += 1;
|
||||
_bf_bytes += _bf->size();
|
||||
|
||||
_bf_parsed = true;
|
||||
|
||||
return Status::OK();
|
||||
|
||||
@ -39,6 +39,7 @@ namespace io {
|
||||
class FileWriter;
|
||||
} // namespace io
|
||||
namespace segment_v2 {
|
||||
|
||||
class PrimaryKeyIndexMetaPB;
|
||||
} // namespace segment_v2
|
||||
|
||||
@ -98,6 +99,13 @@ class PrimaryKeyIndexReader {
|
||||
public:
|
||||
PrimaryKeyIndexReader() : _index_parsed(false), _bf_parsed(false) {}
|
||||
|
||||
~PrimaryKeyIndexReader() {
|
||||
segment_v2::g_pk_total_bloom_filter_num << -_bf_num;
|
||||
segment_v2::g_pk_total_bloom_filter_total_bytes << -_bf_bytes;
|
||||
segment_v2::g_pk_read_bloom_filter_num << -_bf_num;
|
||||
segment_v2::g_pk_read_bloom_filter_total_bytes << -_bf_bytes;
|
||||
}
|
||||
|
||||
Status parse_index(io::FileReaderSPtr file_reader,
|
||||
const segment_v2::PrimaryKeyIndexMetaPB& meta);
|
||||
|
||||
@ -142,6 +150,8 @@ private:
|
||||
bool _bf_parsed;
|
||||
std::unique_ptr<segment_v2::IndexedColumnReader> _index_reader;
|
||||
std::unique_ptr<segment_v2::BloomFilter> _bf;
|
||||
size_t _bf_num = 0;
|
||||
uint64 _bf_bytes = 0;
|
||||
};
|
||||
|
||||
} // namespace doris
|
||||
|
||||
@ -17,6 +17,7 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <bvar/reducer.h>
|
||||
#include <gen_cpp/segment_v2.pb.h>
|
||||
#include <glog/logging.h>
|
||||
#include <string.h>
|
||||
@ -31,6 +32,22 @@
|
||||
namespace doris {
|
||||
namespace segment_v2 {
|
||||
|
||||
inline bvar::Adder<size_t> g_total_bloom_filter_num("doris_total_bloom_filter_num");
|
||||
inline bvar::Adder<size_t> g_read_bloom_filter_num("doris_read_bloom_filter_num");
|
||||
inline bvar::Adder<size_t> g_write_bloom_filter_num("doris_write_bloom_filter_num");
|
||||
|
||||
inline bvar::Adder<size_t> g_total_bloom_filter_total_bytes("doris_total_bloom_filter_bytes");
|
||||
inline bvar::Adder<size_t> g_read_bloom_filter_total_bytes("doris_read_bloom_filter_bytes");
|
||||
inline bvar::Adder<size_t> g_write_bloom_filter_total_bytes("doris_write_bloom_filter_bytes");
|
||||
|
||||
inline bvar::Adder<size_t> g_pk_total_bloom_filter_num("doris_pk_total_bloom_filter_num");
|
||||
inline bvar::Adder<size_t> g_pk_read_bloom_filter_num("doris_pk_read_bloom_filter_num");
|
||||
inline bvar::Adder<size_t> g_pk_write_bloom_filter_num("doris_pk_write_bloom_filter_num");
|
||||
|
||||
inline bvar::Adder<size_t> g_pk_total_bloom_filter_total_bytes("doris_pk_total_bloom_filter_bytes");
|
||||
inline bvar::Adder<size_t> g_pk_read_bloom_filter_total_bytes("doris_pk_read_bloom_filter_bytes");
|
||||
inline bvar::Adder<size_t> g_pk_write_bloom_filter_total_bytes("doris_pk_write_bloom_filter_bytes");
|
||||
|
||||
struct BloomFilterOptions {
|
||||
// false positive probability
|
||||
double fpp = 0.05;
|
||||
@ -55,12 +72,23 @@ public:
|
||||
static Status create(BloomFilterAlgorithmPB algorithm, std::unique_ptr<BloomFilter>* bf,
|
||||
size_t bf_size = 0);
|
||||
|
||||
BloomFilter() : _data(nullptr), _num_bytes(0), _size(0), _has_null(nullptr) {}
|
||||
BloomFilter() : _data(nullptr), _num_bytes(0), _size(0), _has_null(nullptr) {
|
||||
g_total_bloom_filter_num << 1;
|
||||
}
|
||||
|
||||
virtual ~BloomFilter() {
|
||||
if (_data) {
|
||||
if (_is_write) {
|
||||
g_write_bloom_filter_total_bytes << -_size;
|
||||
g_write_bloom_filter_num << -1;
|
||||
} else {
|
||||
g_read_bloom_filter_total_bytes << -_size;
|
||||
g_read_bloom_filter_num << -1;
|
||||
}
|
||||
g_total_bloom_filter_total_bytes << -_size;
|
||||
delete[] _data;
|
||||
}
|
||||
g_total_bloom_filter_num << -1;
|
||||
}
|
||||
|
||||
virtual bool is_ngram_bf() const { return false; }
|
||||
@ -86,6 +114,10 @@ public:
|
||||
memset(_data, 0, _size);
|
||||
_has_null = (bool*)(_data + _num_bytes);
|
||||
*_has_null = false;
|
||||
_is_write = true;
|
||||
g_write_bloom_filter_num << 1;
|
||||
g_write_bloom_filter_total_bytes << _size;
|
||||
g_total_bloom_filter_total_bytes << _size;
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
@ -107,6 +139,9 @@ public:
|
||||
_num_bytes = _size - 1;
|
||||
DCHECK((_num_bytes & (_num_bytes - 1)) == 0);
|
||||
_has_null = (bool*)(_data + _num_bytes);
|
||||
g_read_bloom_filter_num << 1;
|
||||
g_read_bloom_filter_total_bytes << _size;
|
||||
g_total_bloom_filter_total_bytes << _size;
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
@ -181,6 +216,8 @@ protected:
|
||||
uint32_t _size;
|
||||
// last byte's pointer in data for null flag
|
||||
bool* _has_null = nullptr;
|
||||
// is this bf used for write
|
||||
bool _is_write = false;
|
||||
|
||||
private:
|
||||
std::function<void(const void*, const int, const uint64_t, void*)> _hash_func;
|
||||
|
||||
@ -26,6 +26,7 @@
|
||||
#include <utility>
|
||||
|
||||
#include "olap/olap_common.h"
|
||||
#include "olap/primary_key_index.h"
|
||||
#include "olap/rowset/segment_v2/bloom_filter.h" // for BloomFilterOptions, BloomFilter
|
||||
#include "olap/rowset/segment_v2/indexed_column_writer.h"
|
||||
#include "olap/types.h"
|
||||
@ -194,6 +195,10 @@ Status PrimaryKeyBloomFilterIndexWriterImpl::flush() {
|
||||
bf->add_bytes(s->data, s->size);
|
||||
}
|
||||
_bf_buffer_size += bf->size();
|
||||
g_pk_total_bloom_filter_num << 1;
|
||||
g_pk_total_bloom_filter_total_bytes << bf->size();
|
||||
g_pk_write_bloom_filter_num << 1;
|
||||
g_pk_write_bloom_filter_total_bytes << bf->size();
|
||||
_bfs.push_back(std::move(bf));
|
||||
_values.clear();
|
||||
_has_null = false;
|
||||
|
||||
@ -76,7 +76,14 @@ public:
|
||||
_has_null(false),
|
||||
_bf_buffer_size(0) {}
|
||||
|
||||
~PrimaryKeyBloomFilterIndexWriterImpl() override = default;
|
||||
~PrimaryKeyBloomFilterIndexWriterImpl() override {
|
||||
for (auto& bf : _bfs) {
|
||||
g_pk_total_bloom_filter_num << -1;
|
||||
g_pk_total_bloom_filter_total_bytes << -bf->size();
|
||||
g_pk_write_bloom_filter_num << -1;
|
||||
g_pk_write_bloom_filter_total_bytes << -bf->size();
|
||||
}
|
||||
};
|
||||
|
||||
void add_values(const void* values, size_t count) override;
|
||||
|
||||
|
||||
@ -74,6 +74,8 @@
|
||||
namespace doris {
|
||||
|
||||
namespace segment_v2 {
|
||||
|
||||
bvar::Adder<size_t> g_total_segment_num("doris_total_segment_num");
|
||||
class InvertedIndexIterator;
|
||||
|
||||
Status Segment::open(io::FileSystemSPtr fs, const std::string& path, uint32_t segment_id,
|
||||
@ -94,9 +96,12 @@ Segment::Segment(uint32_t segment_id, RowsetId rowset_id, TabletSchemaSPtr table
|
||||
_meta_mem_usage(0),
|
||||
_rowset_id(rowset_id),
|
||||
_tablet_schema(tablet_schema),
|
||||
_segment_meta_mem_tracker(StorageEngine::instance()->segment_meta_mem_tracker()) {}
|
||||
_segment_meta_mem_tracker(StorageEngine::instance()->segment_meta_mem_tracker()) {
|
||||
g_total_segment_num << 1;
|
||||
}
|
||||
|
||||
Segment::~Segment() {
|
||||
g_total_segment_num << -1;
|
||||
#ifndef BE_TEST
|
||||
_segment_meta_mem_tracker->release(_meta_mem_usage);
|
||||
#endif
|
||||
|
||||
Reference in New Issue
Block a user