[improvement](execute) Eliminate virtual function calls when serializing and deserializing aggregate functions (#21427)

Eliminate virtual function calls when serializing and deserializing aggregate functions.

For example, in AggregateFunctionUniq::deserialize_and_merge method, calling read_pod_binary(ref, buf) in the for loop generates a large number of virtual function calls.

void deserialize_and_merge(AggregateDataPtr __restrict place, BufferReadable& buf,
                           Arena* arena) const override {
    auto& set = this->data(place).set;
    UInt64 size;
    read_var_uint(size, buf);
    set.rehash(size + set.size());
    for (size_t i = 0; i < size; ++i) {
        KeyType ref;
        read_pod_binary(ref, buf);
        set.insert(ref);
    }
}

template <typename Type>
void read_pod_binary(Type& x, BufferReadable& buf) {
    buf.read(reinterpret_cast<char*>(&x), sizeof(x));
}
BufferReadable has only one subclass, VectorBufferReader, so it is better to implement the BufferReadable class directly.

The following sql was tested on SSB-flat dataset:

SELECT COUNT (DISTINCT lo_partkey), COUNT (DISTINCT lo_suppkey) FROM lineorder_flat;
before: MergeTime: 415.398ms
after opt: MergeTime: 174.660ms
This commit is contained in:
ZenoYang
2023-07-04 09:26:37 +08:00
committed by GitHub
parent 11e18f4c98
commit 790b771a49

View File

@ -24,11 +24,23 @@
#include "vec/common/string_ref.h"
namespace doris::vectorized {
class BufferWritable {
class BufferWritable final {
public:
virtual void write(const char* data, int len) = 0;
virtual void commit() = 0;
virtual ~BufferWritable() = default;
explicit BufferWritable(ColumnString& vector)
: _data(vector.get_chars()), _offsets(vector.get_offsets()) {}
inline void write(const char* data, int len) {
_data.insert(data, data + len);
_now_offset += len;
}
inline void commit() {
_offsets.push_back(_offsets.back() + _now_offset);
_now_offset = 0;
}
~BufferWritable() { DCHECK(_now_offset == 0); }
template <typename T>
void write_number(T data) {
@ -36,24 +48,6 @@ public:
fmt::format_to(buffer, "{}", data);
write(buffer.data(), buffer.size());
}
};
class VectorBufferWriter final : public BufferWritable {
public:
explicit VectorBufferWriter(ColumnString& vector)
: _data(vector.get_chars()), _offsets(vector.get_offsets()) {}
void write(const char* data, int len) override {
_data.insert(data, data + len);
_now_offset += len;
}
void commit() override {
_offsets.push_back(_offsets.back() + _now_offset);
_now_offset = 0;
}
~VectorBufferWriter() override { DCHECK(_now_offset == 0); }
private:
ColumnString::Chars& _data;
@ -61,25 +55,22 @@ private:
size_t _now_offset = 0;
};
using VectorBufferWriter = BufferWritable;
using BufferWriter = BufferWritable;
class BufferReadable {
public:
virtual ~BufferReadable() = default;
virtual void read(char* data, int len) = 0;
virtual StringRef read(int len) = 0;
};
explicit BufferReadable(StringRef& ref) : _data(ref.data) {}
explicit BufferReadable(StringRef&& ref) : _data(ref.data) {}
~BufferReadable() = default;
class VectorBufferReader final : public BufferReadable {
public:
explicit VectorBufferReader(StringRef& ref) : _data(ref.data) {}
explicit VectorBufferReader(StringRef&& ref) : _data(ref.data) {}
StringRef read(int len) override {
inline StringRef read(int len) {
StringRef ref(_data, len);
_data += len;
return ref;
}
void read(char* data, int len) override {
inline void read(char* data, int len) {
memcpy(data, _data, len);
_data += len;
}
@ -88,4 +79,7 @@ private:
const char* _data;
};
using VectorBufferReader = BufferReadable;
using BufferReader = BufferReadable;
} // namespace doris::vectorized