From 790b771a4934f7d17330a865fc44ec82ee14bdce Mon Sep 17 00:00:00 2001 From: ZenoYang Date: Tue, 4 Jul 2023 09:26:37 +0800 Subject: [PATCH] [improvement](execute) Eliminate virtual function calls when serializing and deserializing aggregate functions (#21427) Eliminate virtual function calls when serializing and deserializing aggregate functions. For example, in AggregateFunctionUniq::deserialize_and_merge method, calling read_pod_binary(ref, buf) in the for loop generates a large number of virtual function calls. void deserialize_and_merge(AggregateDataPtr __restrict place, BufferReadable& buf, Arena* arena) const override { auto& set = this->data(place).set; UInt64 size; read_var_uint(size, buf); set.rehash(size + set.size()); for (size_t i = 0; i < size; ++i) { KeyType ref; read_pod_binary(ref, buf); set.insert(ref); } } template void read_pod_binary(Type& x, BufferReadable& buf) { buf.read(reinterpret_cast(&x), sizeof(x)); } BufferReadable has only one subclass, VectorBufferReader, so it is better to implement the BufferReadable class directly. The following sql was tested on SSB-flat dataset: SELECT COUNT (DISTINCT lo_partkey), COUNT (DISTINCT lo_suppkey) FROM lineorder_flat; before: MergeTime: 415.398ms after opt: MergeTime: 174.660ms --- be/src/vec/common/string_buffer.hpp | 60 +++++++++++++---------------- 1 file changed, 27 insertions(+), 33 deletions(-) diff --git a/be/src/vec/common/string_buffer.hpp b/be/src/vec/common/string_buffer.hpp index 63da8544c2..9a760e3a6b 100644 --- a/be/src/vec/common/string_buffer.hpp +++ b/be/src/vec/common/string_buffer.hpp @@ -24,11 +24,23 @@ #include "vec/common/string_ref.h" namespace doris::vectorized { -class BufferWritable { + +class BufferWritable final { public: - virtual void write(const char* data, int len) = 0; - virtual void commit() = 0; - virtual ~BufferWritable() = default; + explicit BufferWritable(ColumnString& vector) + : _data(vector.get_chars()), _offsets(vector.get_offsets()) {} + + inline void write(const char* data, int len) { + _data.insert(data, data + len); + _now_offset += len; + } + + inline void commit() { + _offsets.push_back(_offsets.back() + _now_offset); + _now_offset = 0; + } + + ~BufferWritable() { DCHECK(_now_offset == 0); } template void write_number(T data) { @@ -36,24 +48,6 @@ public: fmt::format_to(buffer, "{}", data); write(buffer.data(), buffer.size()); } -}; - -class VectorBufferWriter final : public BufferWritable { -public: - explicit VectorBufferWriter(ColumnString& vector) - : _data(vector.get_chars()), _offsets(vector.get_offsets()) {} - - void write(const char* data, int len) override { - _data.insert(data, data + len); - _now_offset += len; - } - - void commit() override { - _offsets.push_back(_offsets.back() + _now_offset); - _now_offset = 0; - } - - ~VectorBufferWriter() override { DCHECK(_now_offset == 0); } private: ColumnString::Chars& _data; @@ -61,25 +55,22 @@ private: size_t _now_offset = 0; }; +using VectorBufferWriter = BufferWritable; +using BufferWriter = BufferWritable; + class BufferReadable { public: - virtual ~BufferReadable() = default; - virtual void read(char* data, int len) = 0; - virtual StringRef read(int len) = 0; -}; + explicit BufferReadable(StringRef& ref) : _data(ref.data) {} + explicit BufferReadable(StringRef&& ref) : _data(ref.data) {} + ~BufferReadable() = default; -class VectorBufferReader final : public BufferReadable { -public: - explicit VectorBufferReader(StringRef& ref) : _data(ref.data) {} - explicit VectorBufferReader(StringRef&& ref) : _data(ref.data) {} - - StringRef read(int len) override { + inline StringRef read(int len) { StringRef ref(_data, len); _data += len; return ref; } - void read(char* data, int len) override { + inline void read(char* data, int len) { memcpy(data, _data, len); _data += len; } @@ -88,4 +79,7 @@ private: const char* _data; }; +using VectorBufferReader = BufferReadable; +using BufferReader = BufferReadable; + } // namespace doris::vectorized