From fd62af82d2e3ec32f8ef2cf30f400eecb458cf72 Mon Sep 17 00:00:00 2001 From: abmdocrt Date: Thu, 21 Mar 2024 12:27:27 +0800 Subject: [PATCH] [enhancement](mow) Add bvar for bloom filter and segment (#32355) --- be/src/olap/primary_key_index.cpp | 7 ++++ be/src/olap/primary_key_index.h | 10 +++++ be/src/olap/rowset/segment_v2/bloom_filter.h | 39 ++++++++++++++++++- .../segment_v2/bloom_filter_index_writer.cpp | 5 +++ .../segment_v2/bloom_filter_index_writer.h | 9 ++++- be/src/olap/rowset/segment_v2/segment.cpp | 7 +++- 6 files changed, 74 insertions(+), 3 deletions(-) diff --git a/be/src/olap/primary_key_index.cpp b/be/src/olap/primary_key_index.cpp index b807b249a7..6d1b1772a9 100644 --- a/be/src/olap/primary_key_index.cpp +++ b/be/src/olap/primary_key_index.cpp @@ -109,6 +109,13 @@ Status PrimaryKeyIndexReader::parse_bf(io::FileReaderSPtr file_reader, std::unique_ptr bf_iter; RETURN_IF_ERROR(bf_index_reader.new_iterator(&bf_iter)); RETURN_IF_ERROR(bf_iter->read_bloom_filter(0, &_bf)); + segment_v2::g_pk_total_bloom_filter_num << 1; + segment_v2::g_pk_total_bloom_filter_total_bytes << _bf->size(); + segment_v2::g_pk_read_bloom_filter_num << 1; + segment_v2::g_pk_read_bloom_filter_total_bytes << _bf->size(); + _bf_num += 1; + _bf_bytes += _bf->size(); + _bf_parsed = true; return Status::OK(); diff --git a/be/src/olap/primary_key_index.h b/be/src/olap/primary_key_index.h index 3fda3a763e..618d11b60d 100644 --- a/be/src/olap/primary_key_index.h +++ b/be/src/olap/primary_key_index.h @@ -39,6 +39,7 @@ namespace io { class FileWriter; } // namespace io namespace segment_v2 { + class PrimaryKeyIndexMetaPB; } // namespace segment_v2 @@ -98,6 +99,13 @@ class PrimaryKeyIndexReader { public: PrimaryKeyIndexReader() : _index_parsed(false), _bf_parsed(false) {} + ~PrimaryKeyIndexReader() { + segment_v2::g_pk_total_bloom_filter_num << -_bf_num; + segment_v2::g_pk_total_bloom_filter_total_bytes << -_bf_bytes; + segment_v2::g_pk_read_bloom_filter_num << -_bf_num; + segment_v2::g_pk_read_bloom_filter_total_bytes << -_bf_bytes; + } + Status parse_index(io::FileReaderSPtr file_reader, const segment_v2::PrimaryKeyIndexMetaPB& meta); @@ -142,6 +150,8 @@ private: bool _bf_parsed; std::unique_ptr _index_reader; std::unique_ptr _bf; + size_t _bf_num = 0; + uint64 _bf_bytes = 0; }; } // namespace doris diff --git a/be/src/olap/rowset/segment_v2/bloom_filter.h b/be/src/olap/rowset/segment_v2/bloom_filter.h index cd57181cdb..13b1558431 100644 --- a/be/src/olap/rowset/segment_v2/bloom_filter.h +++ b/be/src/olap/rowset/segment_v2/bloom_filter.h @@ -17,6 +17,7 @@ #pragma once +#include #include #include #include @@ -31,6 +32,22 @@ namespace doris { namespace segment_v2 { +inline bvar::Adder g_total_bloom_filter_num("doris_total_bloom_filter_num"); +inline bvar::Adder g_read_bloom_filter_num("doris_read_bloom_filter_num"); +inline bvar::Adder g_write_bloom_filter_num("doris_write_bloom_filter_num"); + +inline bvar::Adder g_total_bloom_filter_total_bytes("doris_total_bloom_filter_bytes"); +inline bvar::Adder g_read_bloom_filter_total_bytes("doris_read_bloom_filter_bytes"); +inline bvar::Adder g_write_bloom_filter_total_bytes("doris_write_bloom_filter_bytes"); + +inline bvar::Adder g_pk_total_bloom_filter_num("doris_pk_total_bloom_filter_num"); +inline bvar::Adder g_pk_read_bloom_filter_num("doris_pk_read_bloom_filter_num"); +inline bvar::Adder g_pk_write_bloom_filter_num("doris_pk_write_bloom_filter_num"); + +inline bvar::Adder g_pk_total_bloom_filter_total_bytes("doris_pk_total_bloom_filter_bytes"); +inline bvar::Adder g_pk_read_bloom_filter_total_bytes("doris_pk_read_bloom_filter_bytes"); +inline bvar::Adder g_pk_write_bloom_filter_total_bytes("doris_pk_write_bloom_filter_bytes"); + struct BloomFilterOptions { // false positive probability double fpp = 0.05; @@ -55,12 +72,23 @@ public: static Status create(BloomFilterAlgorithmPB algorithm, std::unique_ptr* bf, size_t bf_size = 0); - BloomFilter() : _data(nullptr), _num_bytes(0), _size(0), _has_null(nullptr) {} + BloomFilter() : _data(nullptr), _num_bytes(0), _size(0), _has_null(nullptr) { + g_total_bloom_filter_num << 1; + } virtual ~BloomFilter() { if (_data) { + if (_is_write) { + g_write_bloom_filter_total_bytes << -_size; + g_write_bloom_filter_num << -1; + } else { + g_read_bloom_filter_total_bytes << -_size; + g_read_bloom_filter_num << -1; + } + g_total_bloom_filter_total_bytes << -_size; delete[] _data; } + g_total_bloom_filter_num << -1; } virtual bool is_ngram_bf() const { return false; } @@ -86,6 +114,10 @@ public: memset(_data, 0, _size); _has_null = (bool*)(_data + _num_bytes); *_has_null = false; + _is_write = true; + g_write_bloom_filter_num << 1; + g_write_bloom_filter_total_bytes << _size; + g_total_bloom_filter_total_bytes << _size; return Status::OK(); } @@ -107,6 +139,9 @@ public: _num_bytes = _size - 1; DCHECK((_num_bytes & (_num_bytes - 1)) == 0); _has_null = (bool*)(_data + _num_bytes); + g_read_bloom_filter_num << 1; + g_read_bloom_filter_total_bytes << _size; + g_total_bloom_filter_total_bytes << _size; return Status::OK(); } @@ -181,6 +216,8 @@ protected: uint32_t _size; // last byte's pointer in data for null flag bool* _has_null = nullptr; + // is this bf used for write + bool _is_write = false; private: std::function _hash_func; diff --git a/be/src/olap/rowset/segment_v2/bloom_filter_index_writer.cpp b/be/src/olap/rowset/segment_v2/bloom_filter_index_writer.cpp index e8bab57003..2791428078 100644 --- a/be/src/olap/rowset/segment_v2/bloom_filter_index_writer.cpp +++ b/be/src/olap/rowset/segment_v2/bloom_filter_index_writer.cpp @@ -26,6 +26,7 @@ #include #include "olap/olap_common.h" +#include "olap/primary_key_index.h" #include "olap/rowset/segment_v2/bloom_filter.h" // for BloomFilterOptions, BloomFilter #include "olap/rowset/segment_v2/indexed_column_writer.h" #include "olap/types.h" @@ -194,6 +195,10 @@ Status PrimaryKeyBloomFilterIndexWriterImpl::flush() { bf->add_bytes(s->data, s->size); } _bf_buffer_size += bf->size(); + g_pk_total_bloom_filter_num << 1; + g_pk_total_bloom_filter_total_bytes << bf->size(); + g_pk_write_bloom_filter_num << 1; + g_pk_write_bloom_filter_total_bytes << bf->size(); _bfs.push_back(std::move(bf)); _values.clear(); _has_null = false; diff --git a/be/src/olap/rowset/segment_v2/bloom_filter_index_writer.h b/be/src/olap/rowset/segment_v2/bloom_filter_index_writer.h index dc4707e6e0..df92f980c5 100644 --- a/be/src/olap/rowset/segment_v2/bloom_filter_index_writer.h +++ b/be/src/olap/rowset/segment_v2/bloom_filter_index_writer.h @@ -76,7 +76,14 @@ public: _has_null(false), _bf_buffer_size(0) {} - ~PrimaryKeyBloomFilterIndexWriterImpl() override = default; + ~PrimaryKeyBloomFilterIndexWriterImpl() override { + for (auto& bf : _bfs) { + g_pk_total_bloom_filter_num << -1; + g_pk_total_bloom_filter_total_bytes << -bf->size(); + g_pk_write_bloom_filter_num << -1; + g_pk_write_bloom_filter_total_bytes << -bf->size(); + } + }; void add_values(const void* values, size_t count) override; diff --git a/be/src/olap/rowset/segment_v2/segment.cpp b/be/src/olap/rowset/segment_v2/segment.cpp index e74c5d2a6b..17539012a7 100644 --- a/be/src/olap/rowset/segment_v2/segment.cpp +++ b/be/src/olap/rowset/segment_v2/segment.cpp @@ -74,6 +74,8 @@ namespace doris { namespace segment_v2 { + +bvar::Adder g_total_segment_num("doris_total_segment_num"); class InvertedIndexIterator; Status Segment::open(io::FileSystemSPtr fs, const std::string& path, uint32_t segment_id, @@ -94,9 +96,12 @@ Segment::Segment(uint32_t segment_id, RowsetId rowset_id, TabletSchemaSPtr table _meta_mem_usage(0), _rowset_id(rowset_id), _tablet_schema(tablet_schema), - _segment_meta_mem_tracker(StorageEngine::instance()->segment_meta_mem_tracker()) {} + _segment_meta_mem_tracker(StorageEngine::instance()->segment_meta_mem_tracker()) { + g_total_segment_num << 1; +} Segment::~Segment() { + g_total_segment_num << -1; #ifndef BE_TEST _segment_meta_mem_tracker->release(_meta_mem_usage); #endif