[improvement](execute) Eliminate virtual function calls when serializing and deserializing aggregate functions (#21427)
Eliminate virtual function calls when serializing and deserializing aggregate functions.
For example, in AggregateFunctionUniq::deserialize_and_merge method, calling read_pod_binary(ref, buf) in the for loop generates a large number of virtual function calls.
void deserialize_and_merge(AggregateDataPtr __restrict place, BufferReadable& buf,
Arena* arena) const override {
auto& set = this->data(place).set;
UInt64 size;
read_var_uint(size, buf);
set.rehash(size + set.size());
for (size_t i = 0; i < size; ++i) {
KeyType ref;
read_pod_binary(ref, buf);
set.insert(ref);
}
}
template <typename Type>
void read_pod_binary(Type& x, BufferReadable& buf) {
buf.read(reinterpret_cast<char*>(&x), sizeof(x));
}
BufferReadable has only one subclass, VectorBufferReader, so it is better to implement the BufferReadable class directly.
The following sql was tested on SSB-flat dataset:
SELECT COUNT (DISTINCT lo_partkey), COUNT (DISTINCT lo_suppkey) FROM lineorder_flat;
before: MergeTime: 415.398ms
after opt: MergeTime: 174.660ms
This commit is contained in:
@ -24,11 +24,23 @@
|
||||
#include "vec/common/string_ref.h"
|
||||
|
||||
namespace doris::vectorized {
|
||||
class BufferWritable {
|
||||
|
||||
class BufferWritable final {
|
||||
public:
|
||||
virtual void write(const char* data, int len) = 0;
|
||||
virtual void commit() = 0;
|
||||
virtual ~BufferWritable() = default;
|
||||
explicit BufferWritable(ColumnString& vector)
|
||||
: _data(vector.get_chars()), _offsets(vector.get_offsets()) {}
|
||||
|
||||
inline void write(const char* data, int len) {
|
||||
_data.insert(data, data + len);
|
||||
_now_offset += len;
|
||||
}
|
||||
|
||||
inline void commit() {
|
||||
_offsets.push_back(_offsets.back() + _now_offset);
|
||||
_now_offset = 0;
|
||||
}
|
||||
|
||||
~BufferWritable() { DCHECK(_now_offset == 0); }
|
||||
|
||||
template <typename T>
|
||||
void write_number(T data) {
|
||||
@ -36,24 +48,6 @@ public:
|
||||
fmt::format_to(buffer, "{}", data);
|
||||
write(buffer.data(), buffer.size());
|
||||
}
|
||||
};
|
||||
|
||||
class VectorBufferWriter final : public BufferWritable {
|
||||
public:
|
||||
explicit VectorBufferWriter(ColumnString& vector)
|
||||
: _data(vector.get_chars()), _offsets(vector.get_offsets()) {}
|
||||
|
||||
void write(const char* data, int len) override {
|
||||
_data.insert(data, data + len);
|
||||
_now_offset += len;
|
||||
}
|
||||
|
||||
void commit() override {
|
||||
_offsets.push_back(_offsets.back() + _now_offset);
|
||||
_now_offset = 0;
|
||||
}
|
||||
|
||||
~VectorBufferWriter() override { DCHECK(_now_offset == 0); }
|
||||
|
||||
private:
|
||||
ColumnString::Chars& _data;
|
||||
@ -61,25 +55,22 @@ private:
|
||||
size_t _now_offset = 0;
|
||||
};
|
||||
|
||||
using VectorBufferWriter = BufferWritable;
|
||||
using BufferWriter = BufferWritable;
|
||||
|
||||
class BufferReadable {
|
||||
public:
|
||||
virtual ~BufferReadable() = default;
|
||||
virtual void read(char* data, int len) = 0;
|
||||
virtual StringRef read(int len) = 0;
|
||||
};
|
||||
explicit BufferReadable(StringRef& ref) : _data(ref.data) {}
|
||||
explicit BufferReadable(StringRef&& ref) : _data(ref.data) {}
|
||||
~BufferReadable() = default;
|
||||
|
||||
class VectorBufferReader final : public BufferReadable {
|
||||
public:
|
||||
explicit VectorBufferReader(StringRef& ref) : _data(ref.data) {}
|
||||
explicit VectorBufferReader(StringRef&& ref) : _data(ref.data) {}
|
||||
|
||||
StringRef read(int len) override {
|
||||
inline StringRef read(int len) {
|
||||
StringRef ref(_data, len);
|
||||
_data += len;
|
||||
return ref;
|
||||
}
|
||||
|
||||
void read(char* data, int len) override {
|
||||
inline void read(char* data, int len) {
|
||||
memcpy(data, _data, len);
|
||||
_data += len;
|
||||
}
|
||||
@ -88,4 +79,7 @@ private:
|
||||
const char* _data;
|
||||
};
|
||||
|
||||
using VectorBufferReader = BufferReadable;
|
||||
using BufferReader = BufferReadable;
|
||||
|
||||
} // namespace doris::vectorized
|
||||
|
||||
Reference in New Issue
Block a user