Files
doris/be/src/vec/core/block.cpp
yiguolei adb758dcac [refactor](remove non vec code) remove json functions string functions match functions and some code (#16141)
remove json functions code
remove string functions code
remove math functions code
move MatchPredicate to olap since it is only used in storage predicate process
remove some code in tuple, Tuple structure should be removed in the future.
remove many code in collection value structure, they are useless
2023-01-26 16:21:12 +08:00

982 lines
31 KiB
C++

// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
// This file is copied from
// https://github.com/ClickHouse/ClickHouse/blob/master/src/Core/Block.cpp
// and modified by Doris
#include "vec/core/block.h"
#include <fmt/format.h>
#include <snappy.h>
#include "agent/be_exec_version_manager.h"
#include "common/status.h"
#include "runtime/descriptors.h"
#include "runtime/tuple.h"
#include "udf/udf.h"
#include "util/block_compression.h"
#include "util/exception.h"
#include "util/faststring.h"
#include "util/simd/bits.h"
#include "vec/columns/column.h"
#include "vec/columns/column_array.h"
#include "vec/columns/column_const.h"
#include "vec/columns/column_nullable.h"
#include "vec/columns/column_vector.h"
#include "vec/columns/columns_number.h"
#include "vec/common/assert_cast.h"
#include "vec/common/string_ref.h"
#include "vec/common/typeid_cast.h"
#include "vec/data_types/data_type_factory.hpp"
namespace doris::vectorized {
Block::Block(std::initializer_list<ColumnWithTypeAndName> il) : data {il} {
initialize_index_by_name();
}
Block::Block(const ColumnsWithTypeAndName& data_) : data {data_} {
initialize_index_by_name();
}
Block::Block(const std::vector<SlotDescriptor*>& slots, size_t block_size,
bool ignore_trivial_slot) {
for (const auto slot_desc : slots) {
if (ignore_trivial_slot && !slot_desc->need_materialize()) {
continue;
}
auto column_ptr = slot_desc->get_empty_mutable_column();
column_ptr->reserve(block_size);
insert(ColumnWithTypeAndName(std::move(column_ptr), slot_desc->get_data_type_ptr(),
slot_desc->col_name()));
}
}
Block::Block(const PBlock& pblock) {
int be_exec_version = pblock.has_be_exec_version() ? pblock.be_exec_version() : 0;
CHECK(BeExecVersionManager::check_be_exec_version(be_exec_version));
const char* buf = nullptr;
std::string compression_scratch;
if (pblock.compressed()) {
// Decompress
SCOPED_RAW_TIMER(&_decompress_time_ns);
const char* compressed_data = pblock.column_values().c_str();
size_t compressed_size = pblock.column_values().size();
size_t uncompressed_size = 0;
if (pblock.has_compression_type() && pblock.has_uncompressed_size()) {
BlockCompressionCodec* codec;
get_block_compression_codec(pblock.compression_type(), &codec);
uncompressed_size = pblock.uncompressed_size();
compression_scratch.resize(uncompressed_size);
Slice decompressed_slice(compression_scratch);
codec->decompress(Slice(compressed_data, compressed_size), &decompressed_slice);
DCHECK(uncompressed_size == decompressed_slice.size);
} else {
bool success = snappy::GetUncompressedLength(compressed_data, compressed_size,
&uncompressed_size);
DCHECK(success) << "snappy::GetUncompressedLength failed";
compression_scratch.resize(uncompressed_size);
success = snappy::RawUncompress(compressed_data, compressed_size,
compression_scratch.data());
DCHECK(success) << "snappy::RawUncompress failed";
}
_decompressed_bytes = uncompressed_size;
buf = compression_scratch.data();
} else {
buf = pblock.column_values().data();
}
for (const auto& pcol_meta : pblock.column_metas()) {
DataTypePtr type = DataTypeFactory::instance().create_data_type(pcol_meta);
MutableColumnPtr data_column = type->create_column();
buf = type->deserialize(buf, data_column.get(), pblock.be_exec_version());
data.emplace_back(data_column->get_ptr(), type, pcol_meta.name());
}
initialize_index_by_name();
}
void Block::initialize_index_by_name() {
for (size_t i = 0, size = data.size(); i < size; ++i) {
index_by_name[data[i].name] = i;
}
}
void Block::insert(size_t position, const ColumnWithTypeAndName& elem) {
if (position > data.size()) {
LOG(FATAL) << fmt::format("Position out of bound in Block::insert(), max position = {}",
data.size());
}
for (auto& name_pos : index_by_name) {
if (name_pos.second >= position) {
++name_pos.second;
}
}
index_by_name.emplace(elem.name, position);
data.emplace(data.begin() + position, elem);
}
void Block::insert(size_t position, ColumnWithTypeAndName&& elem) {
if (position > data.size()) {
LOG(FATAL) << fmt::format("Position out of bound in Block::insert(), max position = {}",
data.size());
}
for (auto& name_pos : index_by_name) {
if (name_pos.second >= position) {
++name_pos.second;
}
}
index_by_name.emplace(elem.name, position);
data.emplace(data.begin() + position, std::move(elem));
}
void Block::insert(const ColumnWithTypeAndName& elem) {
index_by_name.emplace(elem.name, data.size());
data.emplace_back(elem);
}
void Block::insert(ColumnWithTypeAndName&& elem) {
index_by_name.emplace(elem.name, data.size());
data.emplace_back(std::move(elem));
}
void Block::insert_unique(const ColumnWithTypeAndName& elem) {
if (index_by_name.end() == index_by_name.find(elem.name)) {
insert(elem);
}
}
void Block::insert_unique(ColumnWithTypeAndName&& elem) {
if (index_by_name.end() == index_by_name.find(elem.name)) {
insert(std::move(elem));
}
}
void Block::erase(const std::set<size_t>& positions) {
for (auto it = positions.rbegin(); it != positions.rend(); ++it) {
erase(*it);
}
}
void Block::erase(size_t position) {
if (data.empty()) {
LOG(FATAL) << "Block is empty";
}
if (position >= data.size()) {
LOG(FATAL) << fmt::format("Position out of bound in Block::erase(), max position = {}",
data.size() - 1);
}
erase_impl(position);
}
void Block::erase_impl(size_t position) {
data.erase(data.begin() + position);
for (auto it = index_by_name.begin(); it != index_by_name.end();) {
if (it->second == position)
index_by_name.erase(it++);
else {
if (it->second > position) --it->second;
++it;
}
}
if (position < row_same_bit.size()) {
row_same_bit.erase(row_same_bit.begin() + position);
}
}
void Block::erase(const String& name) {
auto index_it = index_by_name.find(name);
if (index_it == index_by_name.end()) {
LOG(FATAL) << fmt::format("No such name in Block::erase(): '{}'", name);
}
erase_impl(index_it->second);
}
ColumnWithTypeAndName& Block::safe_get_by_position(size_t position) {
if (data.empty()) {
LOG(FATAL) << "Block is empty";
}
if (position >= data.size()) {
LOG(FATAL) << fmt::format(
"Position {} is out of bound in Block::safe_get_by_position(), max position = {}, "
"there are columns: {}",
position, data.size() - 1, dump_names());
}
return data[position];
}
const ColumnWithTypeAndName& Block::safe_get_by_position(size_t position) const {
if (data.empty()) {
LOG(FATAL) << "Block is empty";
}
if (position >= data.size()) {
LOG(FATAL) << fmt::format(
"Position {} is out of bound in Block::safe_get_by_position(), max position = {}, "
"there are columns: {}",
position, data.size() - 1, dump_names());
}
return data[position];
}
ColumnWithTypeAndName& Block::get_by_name(const std::string& name) {
auto it = index_by_name.find(name);
if (index_by_name.end() == it) {
LOG(FATAL) << fmt::format("Not found column {} in block. There are only columns: {}", name,
dump_names());
}
return data[it->second];
}
const ColumnWithTypeAndName& Block::get_by_name(const std::string& name) const {
auto it = index_by_name.find(name);
if (index_by_name.end() == it) {
LOG(FATAL) << fmt::format("Not found column {} in block. There are only columns: {}", name,
dump_names());
}
return data[it->second];
}
ColumnWithTypeAndName* Block::try_get_by_name(const std::string& name) {
auto it = index_by_name.find(name);
if (index_by_name.end() == it) {
return nullptr;
}
return &data[it->second];
}
const ColumnWithTypeAndName* Block::try_get_by_name(const std::string& name) const {
auto it = index_by_name.find(name);
if (index_by_name.end() == it) {
return nullptr;
}
return &data[it->second];
}
bool Block::has(const std::string& name) const {
return index_by_name.end() != index_by_name.find(name);
}
size_t Block::get_position_by_name(const std::string& name) const {
auto it = index_by_name.find(name);
if (index_by_name.end() == it) {
LOG(FATAL) << fmt::format("Not found column {} in block. There are only columns: {}", name,
dump_names());
}
return it->second;
}
void Block::check_number_of_rows(bool allow_null_columns) const {
ssize_t rows = -1;
for (const auto& elem : data) {
if (!elem.column && allow_null_columns) continue;
if (!elem.column) {
LOG(FATAL) << fmt::format(
"Column {} in block is nullptr, in method check_number_of_rows.", elem.name);
}
ssize_t size = elem.column->size();
if (rows == -1) {
rows = size;
} else if (rows != size) {
LOG(FATAL) << fmt::format("Sizes of columns doesn't match: {}:{},{}:{}",
data.front().name, rows, elem.name, size);
}
}
}
size_t Block::rows() const {
for (const auto& elem : data) {
if (elem.column) {
return elem.column->size();
}
}
return 0;
}
std::string Block::each_col_size() {
std::stringstream ss;
for (const auto& elem : data) {
if (elem.column) {
ss << elem.column->size() << " | ";
} else {
ss << "-1 | ";
}
}
return ss.str();
}
void Block::set_num_rows(size_t length) {
if (rows() > length) {
for (auto& elem : data) {
if (elem.column) {
elem.column = elem.column->cut(0, length);
}
}
if (length < row_same_bit.size()) {
row_same_bit.resize(length);
}
}
}
void Block::skip_num_rows(int64_t& length) {
auto origin_rows = rows();
if (origin_rows <= length) {
clear();
length -= origin_rows;
} else {
for (auto& elem : data) {
if (elem.column) {
elem.column = elem.column->cut(length, origin_rows - length);
}
}
if (length < row_same_bit.size()) {
row_same_bit.assign(row_same_bit.begin() + length, row_same_bit.end());
}
}
}
size_t Block::bytes() const {
size_t res = 0;
for (const auto& elem : data) {
res += elem.column->byte_size();
}
return res;
}
size_t Block::allocated_bytes() const {
size_t res = 0;
for (const auto& elem : data) {
res += elem.column->allocated_bytes();
}
return res;
}
std::string Block::dump_names() const {
std::stringstream out;
for (auto it = data.begin(); it != data.end(); ++it) {
if (it != data.begin()) out << ", ";
out << it->name;
}
return out.str();
}
std::string Block::dump_data(size_t begin, size_t row_limit) const {
std::vector<std::string> headers;
std::vector<size_t> headers_size;
for (const auto& it : data) {
std::string s = fmt::format("{}({})", it.name, it.type->get_name());
headers_size.push_back(s.size() > 15 ? s.size() : 15);
headers.emplace_back(s);
}
std::stringstream out;
// header upper line
auto line = [&]() {
for (size_t i = 0; i < columns(); ++i) {
out << std::setfill('-') << std::setw(1) << "+" << std::setw(headers_size[i]) << "-";
}
out << std::setw(1) << "+" << std::endl;
};
line();
// header text
for (size_t i = 0; i < columns(); ++i) {
out << std::setfill(' ') << std::setw(1) << "|" << std::left << std::setw(headers_size[i])
<< headers[i];
}
out << std::setw(1) << "|" << std::endl;
// header bottom line
line();
if (rows() == 0) {
return out.str();
}
// content
for (size_t row_num = begin; row_num < rows() && row_num < row_limit + begin; ++row_num) {
for (size_t i = 0; i < columns(); ++i) {
std::string s = "";
if (data[i].column) {
s = data[i].to_string(row_num);
}
if (s.length() > headers_size[i]) {
s = s.substr(0, headers_size[i] - 3) + "...";
}
out << std::setfill(' ') << std::setw(1) << "|" << std::setw(headers_size[i])
<< std::right << s;
}
out << std::setw(1) << "|" << std::endl;
}
// bottom line
line();
if (row_limit < rows()) {
out << rows() << " rows in block, only show first " << row_limit << " rows." << std::endl;
}
return out.str();
}
std::string Block::dump_one_line(size_t row, int column_end) const {
assert(column_end <= columns());
fmt::memory_buffer line;
for (int i = 0; i < column_end; ++i) {
if (LIKELY(i != 0)) {
// TODO: need more effective function of to string. now the impl is slow
fmt::format_to(line, " {}", data[i].to_string(row));
} else {
fmt::format_to(line, "{}", data[i].to_string(row));
}
}
return fmt::to_string(line);
}
std::string Block::dump_structure() const {
// WriteBufferFromOwnString out;
std::stringstream out;
for (auto it = data.begin(); it != data.end(); ++it) {
if (it != data.begin()) {
out << ", ";
}
out << it->dump_structure();
}
return out.str();
}
Block Block::clone_empty() const {
Block res;
for (const auto& elem : data) {
res.insert(elem.clone_empty());
}
return res;
}
MutableColumns Block::clone_empty_columns() const {
size_t num_columns = data.size();
MutableColumns columns(num_columns);
for (size_t i = 0; i < num_columns; ++i) {
columns[i] = data[i].column ? data[i].column->clone_empty() : data[i].type->create_column();
}
return columns;
}
Columns Block::get_columns() const {
size_t num_columns = data.size();
Columns columns(num_columns);
for (size_t i = 0; i < num_columns; ++i) {
columns[i] = data[i].column;
}
return columns;
}
MutableColumns Block::mutate_columns() {
size_t num_columns = data.size();
MutableColumns columns(num_columns);
for (size_t i = 0; i < num_columns; ++i) {
columns[i] = data[i].column ? (*std::move(data[i].column)).assume_mutable()
: data[i].type->create_column();
}
return columns;
}
void Block::set_columns(MutableColumns&& columns) {
/// TODO: assert if |columns| doesn't match |data|!
size_t num_columns = data.size();
for (size_t i = 0; i < num_columns; ++i) {
data[i].column = std::move(columns[i]);
}
}
void Block::set_columns(const Columns& columns) {
/// TODO: assert if |columns| doesn't match |data|!
size_t num_columns = data.size();
for (size_t i = 0; i < num_columns; ++i) {
data[i].column = columns[i];
}
}
Block Block::clone_with_columns(MutableColumns&& columns) const {
Block res;
size_t num_columns = data.size();
for (size_t i = 0; i < num_columns; ++i) {
res.insert({std::move(columns[i]), data[i].type, data[i].name});
}
return res;
}
Block Block::clone_with_columns(const Columns& columns) const {
Block res;
size_t num_columns = data.size();
if (num_columns != columns.size()) {
LOG(FATAL) << fmt::format(
"Cannot clone block with columns because block has {} columns, but {} columns "
"given.",
num_columns, columns.size());
}
for (size_t i = 0; i < num_columns; ++i) {
res.insert({columns[i], data[i].type, data[i].name});
}
return res;
}
Block Block::clone_without_columns() const {
Block res;
size_t num_columns = data.size();
for (size_t i = 0; i < num_columns; ++i) {
res.insert({nullptr, data[i].type, data[i].name});
}
return res;
}
Block Block::sort_columns() const {
Block sorted_block;
for (const auto& name : index_by_name) {
sorted_block.insert(data[name.second]);
}
return sorted_block;
}
const ColumnsWithTypeAndName& Block::get_columns_with_type_and_name() const {
return data;
}
Names Block::get_names() const {
Names res;
res.reserve(columns());
for (const auto& elem : data) {
res.push_back(elem.name);
}
return res;
}
DataTypes Block::get_data_types() const {
DataTypes res;
res.reserve(columns());
for (const auto& elem : data) {
res.push_back(elem.type);
}
return res;
}
void Block::clear() {
data.clear();
index_by_name.clear();
row_same_bit.clear();
}
void Block::clear_column_data(int column_size) noexcept {
// data.size() greater than column_size, means here have some
// function exec result in block, need erase it here
if (column_size != -1 and data.size() > column_size) {
for (int i = data.size() - 1; i >= column_size; --i) {
erase(i);
}
}
for (auto& d : data) {
DCHECK_EQ(d.column->use_count(), 1);
(*std::move(d.column)).assume_mutable()->clear();
}
row_same_bit.clear();
}
void Block::swap(Block& other) noexcept {
data.swap(other.data);
index_by_name.swap(other.index_by_name);
row_same_bit.swap(other.row_same_bit);
}
void Block::swap(Block&& other) noexcept {
clear();
data = std::move(other.data);
initialize_index_by_name();
row_same_bit = std::move(other.row_same_bit);
}
void Block::update_hash(SipHash& hash) const {
for (size_t row_no = 0, num_rows = rows(); row_no < num_rows; ++row_no) {
for (const auto& col : data) {
col.column->update_hash_with_value(row_no, hash);
}
}
}
void Block::filter_block_internal(Block* block, const std::vector<uint32_t>& columns_to_filter,
const IColumn::Filter& filter) {
size_t count = filter.size() - simd::count_zero_num((int8_t*)filter.data(), filter.size());
if (count == 0) {
for (auto& col : columns_to_filter) {
std::move(*block->get_by_position(col).column).assume_mutable()->clear();
}
} else {
for (auto& col : columns_to_filter) {
if (block->get_by_position(col).column->size() != count) {
block->get_by_position(col).column =
block->get_by_position(col).column->filter(filter, count);
}
}
}
}
void Block::filter_block_internal(Block* block, const IColumn::Filter& filter,
uint32_t column_to_keep) {
std::vector<uint32_t> columns_to_filter;
columns_to_filter.resize(column_to_keep);
for (uint32_t i = 0; i < column_to_keep; ++i) {
columns_to_filter[i] = i;
}
filter_block_internal(block, columns_to_filter, filter);
}
Block Block::copy_block(const std::vector<int>& column_offset) const {
ColumnsWithTypeAndName columns_with_type_and_name;
for (auto offset : column_offset) {
DCHECK(offset < data.size());
columns_with_type_and_name.emplace_back(data[offset]);
}
return columns_with_type_and_name;
}
void Block::append_block_by_selector(MutableColumns& columns,
const IColumn::Selector& selector) const {
DCHECK(data.size() == columns.size());
for (size_t i = 0; i < data.size(); i++) {
data[i].column->append_data_by_selector(columns[i], selector);
}
}
Status Block::filter_block(Block* block, const std::vector<uint32_t>& columns_to_filter,
int filter_column_id, int column_to_keep) {
ColumnPtr filter_column = block->get_by_position(filter_column_id).column;
if (auto* nullable_column = check_and_get_column<ColumnNullable>(*filter_column)) {
ColumnPtr nested_column = nullable_column->get_nested_column_ptr();
MutableColumnPtr mutable_holder =
nested_column->use_count() == 1
? nested_column->assume_mutable()
: nested_column->clone_resized(nested_column->size());
ColumnUInt8* concrete_column = typeid_cast<ColumnUInt8*>(mutable_holder.get());
if (!concrete_column) {
return Status::InvalidArgument(
"Illegal type {} of column for filter. Must be UInt8 or Nullable(UInt8).",
filter_column->get_name());
}
auto* __restrict null_map = nullable_column->get_null_map_data().data();
IColumn::Filter& filter = concrete_column->get_data();
auto* __restrict filter_data = filter.data();
const size_t size = filter.size();
for (size_t i = 0; i < size; ++i) {
filter_data[i] &= !null_map[i];
}
filter_block_internal(block, columns_to_filter, filter);
} else if (auto* const_column = check_and_get_column<ColumnConst>(*filter_column)) {
bool ret = const_column->get_bool(0);
if (!ret) {
for (auto& col : columns_to_filter) {
std::move(*block->get_by_position(col).column).assume_mutable()->clear();
}
}
} else {
const IColumn::Filter& filter =
assert_cast<const doris::vectorized::ColumnVector<UInt8>&>(*filter_column)
.get_data();
filter_block_internal(block, columns_to_filter, filter);
}
erase_useless_column(block, column_to_keep);
return Status::OK();
}
Status Block::filter_block(Block* block, int filter_column_id, int column_to_keep) {
std::vector<uint32_t> columns_to_filter;
columns_to_filter.resize(column_to_keep);
for (uint32_t i = 0; i < column_to_keep; ++i) {
columns_to_filter[i] = i;
}
return filter_block(block, columns_to_filter, filter_column_id, column_to_keep);
}
Status Block::serialize(int be_exec_version, PBlock* pblock,
/*std::string* compressed_buffer,*/ size_t* uncompressed_bytes,
size_t* compressed_bytes, segment_v2::CompressionTypePB compression_type,
bool allow_transfer_large_data) const {
pblock->set_be_exec_version(be_exec_version);
// calc uncompressed size for allocation
size_t content_uncompressed_size = 0;
for (const auto& c : *this) {
PColumnMeta* pcm = pblock->add_column_metas();
c.to_pb_column_meta(pcm);
// get serialized size
content_uncompressed_size +=
c.type->get_uncompressed_serialized_bytes(*(c.column), pblock->be_exec_version());
}
// serialize data values
// when data type is HLL, content_uncompressed_size maybe larger than real size.
std::string column_values;
try {
column_values.resize(content_uncompressed_size);
} catch (...) {
std::exception_ptr p = std::current_exception();
std::string msg =
fmt::format("Try to alloc {} bytes for pblock column values failed. reason {}",
content_uncompressed_size, get_current_exception_type_name(p));
LOG(WARNING) << msg;
return Status::BufferAllocFailed(msg);
}
char* buf = column_values.data();
for (const auto& c : *this) {
buf = c.type->serialize(*(c.column), buf, pblock->be_exec_version());
}
*uncompressed_bytes = content_uncompressed_size;
// compress
if (config::compress_rowbatches && content_uncompressed_size > 0) {
SCOPED_RAW_TIMER(&_compress_time_ns);
pblock->set_compression_type(compression_type);
pblock->set_uncompressed_size(content_uncompressed_size);
BlockCompressionCodec* codec;
RETURN_IF_ERROR(get_block_compression_codec(compression_type, &codec));
faststring buf_compressed;
RETURN_IF_ERROR(codec->compress(Slice(column_values.data(), content_uncompressed_size),
&buf_compressed));
size_t compressed_size = buf_compressed.size();
if (LIKELY(compressed_size < content_uncompressed_size)) {
pblock->set_column_values(buf_compressed.data(), buf_compressed.size());
pblock->set_compressed(true);
*compressed_bytes = compressed_size;
} else {
pblock->set_column_values(std::move(column_values));
*compressed_bytes = content_uncompressed_size;
}
VLOG_ROW << "uncompressed size: " << content_uncompressed_size
<< ", compressed size: " << compressed_size;
}
if (!allow_transfer_large_data && *compressed_bytes >= std::numeric_limits<int32_t>::max()) {
return Status::InternalError("The block is large than 2GB({}), can not send by Protobuf.",
*compressed_bytes);
}
return Status::OK();
}
inline bool Block::is_column_data_null(const doris::TypeDescriptor& type_desc,
const StringRef& data_ref, const IColumn* column, int row) {
if (type_desc.type != TYPE_ARRAY) {
return data_ref.data == nullptr;
} else {
Field array;
column->get(row, array);
return array.is_null();
}
}
MutableBlock::MutableBlock(const std::vector<TupleDescriptor*>& tuple_descs, int reserve_size,
bool ignore_trivial_slot) {
for (auto tuple_desc : tuple_descs) {
for (auto slot_desc : tuple_desc->slots()) {
if (ignore_trivial_slot && !slot_desc->need_materialize()) {
continue;
}
_data_types.emplace_back(slot_desc->get_data_type_ptr());
_columns.emplace_back(_data_types.back()->create_column());
if (reserve_size != 0) {
_columns.back()->reserve(reserve_size);
}
}
}
}
size_t MutableBlock::rows() const {
for (const auto& column : _columns) {
if (column) {
return column->size();
}
}
return 0;
}
void MutableBlock::swap(MutableBlock& another) noexcept {
_columns.swap(another._columns);
_data_types.swap(another._data_types);
}
void MutableBlock::swap(MutableBlock&& another) noexcept {
clear();
_columns = std::move(another._columns);
_data_types = std::move(another._data_types);
}
void MutableBlock::add_row(const Block* block, int row) {
auto& block_data = block->get_columns_with_type_and_name();
for (size_t i = 0; i < _columns.size(); ++i) {
_columns[i]->insert_from(*block_data[i].column.get(), row);
}
}
void MutableBlock::add_rows(const Block* block, const int* row_begin, const int* row_end) {
auto& block_data = block->get_columns_with_type_and_name();
for (size_t i = 0; i < _columns.size(); ++i) {
auto& dst = _columns[i];
auto& src = *block_data[i].column.get();
dst->insert_indices_from(src, row_begin, row_end);
}
}
void MutableBlock::add_rows(const Block* block, size_t row_begin, size_t length) {
auto& block_data = block->get_columns_with_type_and_name();
for (size_t i = 0; i < _columns.size(); ++i) {
auto& dst = _columns[i];
auto& src = *block_data[i].column.get();
dst->insert_range_from(src, row_begin, length);
}
}
Block MutableBlock::to_block(int start_column) {
return to_block(start_column, _columns.size());
}
Block MutableBlock::to_block(int start_column, int end_column) {
ColumnsWithTypeAndName columns_with_schema;
for (size_t i = start_column; i < end_column; ++i) {
columns_with_schema.emplace_back(std::move(_columns[i]), _data_types[i], "");
}
return {columns_with_schema};
}
std::string MutableBlock::dump_data(size_t row_limit) const {
std::vector<std::string> headers;
std::vector<size_t> headers_size;
for (size_t i = 0; i < columns(); ++i) {
std::string s = _data_types[i]->get_name();
headers_size.push_back(s.size() > 15 ? s.size() : 15);
headers.emplace_back(s);
}
std::stringstream out;
// header upper line
auto line = [&]() {
for (size_t i = 0; i < columns(); ++i) {
out << std::setfill('-') << std::setw(1) << "+" << std::setw(headers_size[i]) << "-";
}
out << std::setw(1) << "+" << std::endl;
};
line();
// header text
for (size_t i = 0; i < columns(); ++i) {
out << std::setfill(' ') << std::setw(1) << "|" << std::left << std::setw(headers_size[i])
<< headers[i];
}
out << std::setw(1) << "|" << std::endl;
// header bottom line
line();
if (rows() == 0) {
return out.str();
}
// content
for (size_t row_num = 0; row_num < rows() && row_num < row_limit; ++row_num) {
for (size_t i = 0; i < columns(); ++i) {
std::string s = _data_types[i]->to_string(*_columns[i].get(), row_num);
if (s.length() > headers_size[i]) {
s = s.substr(0, headers_size[i] - 3) + "...";
}
out << std::setfill(' ') << std::setw(1) << "|" << std::setw(headers_size[i])
<< std::right << s;
}
out << std::setw(1) << "|" << std::endl;
}
// bottom line
line();
if (row_limit < rows()) {
out << rows() << " rows in block, only show first " << row_limit << " rows." << std::endl;
}
return out.str();
}
std::unique_ptr<Block> Block::create_same_struct_block(size_t size) const {
auto temp_block = std::make_unique<Block>();
for (const auto& d : data) {
auto column = d.type->create_column();
column->resize(size);
temp_block->insert({std::move(column), d.type, d.name});
}
return temp_block;
}
void Block::shrink_char_type_column_suffix_zero(const std::vector<size_t>& char_type_idx) {
for (auto idx : char_type_idx) {
if (idx < data.size()) {
auto& col_and_name = this->get_by_position(idx);
col_and_name.column = col_and_name.column->assume_mutable()->get_shrinked_column();
}
}
}
size_t MutableBlock::allocated_bytes() const {
size_t res = 0;
for (const auto& col : _columns) {
res += col->allocated_bytes();
}
return res;
}
void MutableBlock::clear_column_data() noexcept {
for (auto& col : _columns) {
if (col) {
col->clear();
}
}
}
} // namespace doris::vectorized