950 lines
32 KiB
C++
950 lines
32 KiB
C++
// Licensed to the Apache Software Foundation (ASF) under one
|
|
// or more contributor license agreements. See the NOTICE file
|
|
// distributed with this work for additional information
|
|
// regarding copyright ownership. The ASF licenses this file
|
|
// to you under the Apache License, Version 2.0 (the
|
|
// "License"); you may not use this file except in compliance
|
|
// with the License. You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing,
|
|
// software distributed under the License is distributed on an
|
|
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
// KIND, either express or implied. See the License for the
|
|
// specific language governing permissions and limitations
|
|
// under the License.
|
|
|
|
#include "runtime/dpp_sink.h"
|
|
|
|
#include <memory>
|
|
#include <sstream>
|
|
|
|
#include "agent/cgroups_mgr.h"
|
|
#include "common/object_pool.h"
|
|
#include "exprs/expr.h"
|
|
#include "exprs/slot_ref.h"
|
|
#include "common/object_pool.h"
|
|
#include "runtime/dpp_writer.h"
|
|
#include "runtime/exec_env.h"
|
|
#include "runtime/tuple_row.h"
|
|
#include "runtime/runtime_state.h"
|
|
#include "runtime/row_batch.h"
|
|
#include "runtime/qsorter.h"
|
|
#include "runtime/descriptors.h"
|
|
#include "gen_cpp/Types_types.h"
|
|
#include "util/count_down_latch.hpp"
|
|
#include "util/debug_util.h"
|
|
#include "util/thread_pool.hpp"
|
|
#include "olap/field.h"
|
|
|
|
namespace doris {
|
|
|
|
template<typename T>
|
|
static void update_min(SlotRef *ref, TupleRow *agg_row, TupleRow *row) {
|
|
void* slot = ref->get_slot(agg_row);
|
|
bool agg_row_null = ref->is_null_bit_set(agg_row);
|
|
void* value = SlotRef::get_value(ref, row);
|
|
if (!agg_row_null && value != NULL) {
|
|
T* t_slot = static_cast<T*>(slot);
|
|
*t_slot = std::min(*t_slot, *static_cast<T*>(value));
|
|
} else if (!agg_row_null && value == NULL) {
|
|
Tuple* agg_tuple = ref->get_tuple(agg_row);
|
|
agg_tuple->set_null(ref->null_indicator_offset());
|
|
}
|
|
}
|
|
|
|
template<typename T>
|
|
static void update_max(SlotRef* ref, TupleRow* agg_row, TupleRow *row) {
|
|
void* slot = ref->get_slot(agg_row);
|
|
bool agg_row_null = ref->is_null_bit_set(agg_row);
|
|
void* value = SlotRef::get_value(ref, row);
|
|
if (!agg_row_null && value != NULL) {
|
|
T* t_slot = static_cast<T*>(slot);
|
|
*t_slot = std::max(*t_slot, *static_cast<T*>(value));
|
|
} else if (agg_row_null && value != NULL) {
|
|
T* t_slot = static_cast<T*>(slot);
|
|
*t_slot = *static_cast<T*>(value);
|
|
Tuple* agg_tuple = ref->get_tuple(agg_row);
|
|
agg_tuple->set_not_null(ref->null_indicator_offset());
|
|
}
|
|
}
|
|
|
|
template<typename T>
|
|
static void update_sum(SlotRef* ref, TupleRow* agg_row, TupleRow *row) {
|
|
void* slot = ref->get_slot(agg_row);
|
|
bool agg_row_null = ref->is_null_bit_set(agg_row);
|
|
void* value = SlotRef::get_value(ref, row);
|
|
if (!agg_row_null && value != NULL) {
|
|
T* t_slot = static_cast<T*>(slot);
|
|
*t_slot += *static_cast<T*>(value);
|
|
} else if (agg_row_null && value != NULL) {
|
|
T* t_slot = static_cast<T*>(slot);
|
|
*t_slot = *static_cast<T*>(value);
|
|
Tuple* agg_tuple = ref->get_tuple(agg_row);
|
|
agg_tuple->set_not_null(ref->null_indicator_offset());
|
|
}
|
|
}
|
|
|
|
// Do nothing, just used to fill vector
|
|
static void fake_update(SlotRef* ref, TupleRow* agg_row, TupleRow *row) {
|
|
// Assert maybe good!
|
|
}
|
|
|
|
class HllDppSinkMerge {
|
|
|
|
// used in merge
|
|
struct HllMergeValue {
|
|
HllDataType type;
|
|
std::set<uint64_t> hash_set;
|
|
std::map<int, uint8_t> index_to_value;
|
|
};
|
|
|
|
public:
|
|
|
|
// init set
|
|
void prepare(int count, MemPool* pool);
|
|
|
|
// update set
|
|
void update_hll_set(TupleRow* agg_row, TupleRow* row,
|
|
ExprContext* ctx, int index);
|
|
|
|
// merge all in vector which have same key
|
|
void finalize_one_merge(TupleRow* agg_row, MemPool* pool,
|
|
const RollupSchema& rollup_schema);
|
|
|
|
// release set
|
|
void close();
|
|
|
|
private:
|
|
|
|
std::vector<HllMergeValue*> _hll_last_row;
|
|
};
|
|
|
|
// same tablet which (partition, rollup, bucket) all equals
|
|
// this is used by next steps
|
|
// 1. new one Translator
|
|
// 2. prepare
|
|
// 3. add batch until there is no data
|
|
// 4. process, this will process data added and send this to writer.
|
|
class Translator {
|
|
public:
|
|
Translator(
|
|
const TabletDesc& tablet_desc,
|
|
const RowDescriptor& row_desc,
|
|
const std::string& rollup_name,
|
|
const RollupSchema& rollup_schema,
|
|
ObjectPool* obj_pool);
|
|
|
|
~Translator();
|
|
|
|
// Prepare this translator, includes
|
|
// 1. new sorter for sort data
|
|
// 2. new comparator for aggreage data with same key
|
|
// 3. new witer for write data later
|
|
// 4. create value updaters
|
|
Status prepare(RuntimeState* state);
|
|
|
|
Status close(RuntimeState* state);
|
|
|
|
Status add_batch(RowBatch* batch);
|
|
|
|
// NOTE: called when all data is added by 'add_batch'
|
|
Status process(RuntimeState* state);
|
|
|
|
const std::string& name() const {
|
|
return _rollup_name;
|
|
}
|
|
|
|
RuntimeProfile* profile() {
|
|
return _profile;
|
|
}
|
|
|
|
const std::string& output_path() const {
|
|
return _output_path;
|
|
}
|
|
|
|
private:
|
|
void format_output_path(RuntimeState* state);
|
|
// create profile information.
|
|
Status create_profile(RuntimeState* state);
|
|
// Create sorter for this translator,
|
|
// sorter created is referenced by '_sorter' field,
|
|
// and put to object pool of 'state', so no need to delete it
|
|
Status create_sorter(RuntimeState* state);
|
|
|
|
// Helper to create comparetor used to aggregate same rows
|
|
Status create_comparetor(RuntimeState* state);
|
|
|
|
// Create writer to write data
|
|
// same with sorter, so don't worry about its lifecycle
|
|
Status create_writer(RuntimeState* state);
|
|
|
|
// Create value updaters
|
|
Status create_value_updaters();
|
|
|
|
// Following function is used to process batch
|
|
|
|
// Check if this two rows are equal with each other
|
|
// Compare these rows in keys slot
|
|
bool eq_tuple_row(TupleRow* last, TupleRow* cur);
|
|
|
|
// Deep copy 'row' to the '_batch_to_write',
|
|
// call this function after make sure that '_batch_to_write' is not full
|
|
void copy_row(TupleRow* row);
|
|
|
|
// Update value of 'row' to 'agg_row'
|
|
// call this after 'eq_tuple_row' return true.
|
|
void update_row(TupleRow* agg_row, TupleRow* row);
|
|
|
|
// process one row input
|
|
Status process_one_row(TupleRow* row);
|
|
|
|
// need this to construct RowBatch
|
|
TabletDesc _tablet_desc;
|
|
// need this to construct RowBatch
|
|
const RowDescriptor& _row_desc;
|
|
const std::string& _rollup_name;
|
|
// Use this information to construct sorter and
|
|
const RollupSchema& _rollup_schema;
|
|
|
|
// All the object is stored in object_pool in runtime_state
|
|
// '_sorter': used to sort data in this translator.
|
|
Sorter* _sorter;
|
|
// not owned
|
|
ObjectPool* _obj_pool;
|
|
|
|
// used to compare between two tuple_row after sorted.
|
|
// Same rows will be aggregate to one row
|
|
std::vector<ExprContext*> _last_row_expr_ctxs;
|
|
std::vector<ExprContext*> _cur_row_expr_ctxs;
|
|
|
|
// path to write
|
|
std::string _output_path;
|
|
|
|
// used by writer
|
|
std::vector<ExprContext*> _output_row_expr_ctxs;
|
|
|
|
// Used to write result to file
|
|
DppWriter* _writer;
|
|
|
|
// we use batch here to release memory as need
|
|
// when one batch is send, memory must be free.
|
|
// because input data maybe huge which can not
|
|
// hold in memory.
|
|
std::unique_ptr<RowBatch> _batch_to_write;
|
|
|
|
typedef void (*update_func) (SlotRef*, TupleRow*, TupleRow*);
|
|
std::vector<update_func> _value_updaters;
|
|
|
|
// variable used to profile
|
|
RuntimeProfile* _profile;
|
|
RuntimeProfile::Counter* _add_batch_timer;
|
|
RuntimeProfile::Counter* _sort_timer;
|
|
RuntimeProfile::Counter* _agg_timer;
|
|
RuntimeProfile::Counter* _writer_timer;
|
|
HllDppSinkMerge _hll_merge;
|
|
};
|
|
|
|
|
|
Translator::Translator(
|
|
const TabletDesc& tablet_desc,
|
|
const RowDescriptor& row_desc,
|
|
const std::string& rollup_name,
|
|
const RollupSchema& rollup_schema,
|
|
ObjectPool* obj_pool) :
|
|
_tablet_desc(tablet_desc),
|
|
_row_desc(row_desc),
|
|
_rollup_name(rollup_name),
|
|
_rollup_schema(rollup_schema),
|
|
_sorter(nullptr),
|
|
_obj_pool(obj_pool),
|
|
_writer(nullptr),
|
|
_profile(nullptr),
|
|
_add_batch_timer(nullptr),
|
|
_sort_timer(nullptr),
|
|
_agg_timer(nullptr),
|
|
_writer_timer(nullptr) {
|
|
}
|
|
|
|
Translator::~Translator() {
|
|
}
|
|
|
|
Status Translator::create_sorter(RuntimeState* state) {
|
|
QSorter* sorter = _obj_pool->add(new QSorter(_row_desc, _rollup_schema.keys(), state));
|
|
RETURN_IF_ERROR(sorter->prepare(state));
|
|
_sorter = sorter;
|
|
return Status::OK();
|
|
}
|
|
|
|
Status Translator::create_comparetor(RuntimeState* state) {
|
|
RETURN_IF_ERROR(Expr::clone_if_not_exists(_rollup_schema.keys(), state, &_last_row_expr_ctxs));
|
|
RETURN_IF_ERROR(Expr::clone_if_not_exists(_rollup_schema.keys(), state, &_cur_row_expr_ctxs));
|
|
return Status::OK();
|
|
}
|
|
|
|
void Translator::format_output_path(RuntimeState* state) {
|
|
std::stringstream ss;
|
|
ss << state->load_dir() << "/"
|
|
<< state->import_label() << "."
|
|
<< _tablet_desc.partition_id << "."
|
|
<< _rollup_name << "."
|
|
<< _tablet_desc.bucket_id;
|
|
_output_path = ss.str();
|
|
}
|
|
|
|
Status Translator::create_writer(RuntimeState* state) {
|
|
// 1. create output expr
|
|
std::vector<ExprContext*> ctxs;
|
|
RETURN_IF_ERROR(Expr::clone_if_not_exists(_rollup_schema.keys(), state, &ctxs));
|
|
for (auto ctx : ctxs) {
|
|
_output_row_expr_ctxs.push_back(ctx);
|
|
}
|
|
ctxs.clear();
|
|
RETURN_IF_ERROR(Expr::clone_if_not_exists(_rollup_schema.values(), state, &ctxs));
|
|
for (auto ctx : ctxs) {
|
|
_output_row_expr_ctxs.push_back(ctx);
|
|
}
|
|
// 2. create file
|
|
FileHandler* fh = _obj_pool->add(new FileHandler());
|
|
if (fh->open_with_mode(_output_path, O_CREAT | O_TRUNC | O_WRONLY,
|
|
S_IRWXU | S_IRWXU) != OLAP_SUCCESS) {
|
|
std::stringstream ss;
|
|
ss << "open file failed; [file=" << _output_path << "]";
|
|
return Status::InternalError("open file failed.");
|
|
}
|
|
|
|
// 3. Create writer
|
|
_writer = _obj_pool->add(new DppWriter(1, _output_row_expr_ctxs, fh));
|
|
RETURN_IF_ERROR(_writer->open());
|
|
|
|
return Status::OK();
|
|
}
|
|
|
|
Status Translator::create_value_updaters() {
|
|
if (_rollup_schema.values().size() != _rollup_schema.value_ops().size()) {
|
|
return Status::InternalError("size of values and value_ops are not equal.");
|
|
}
|
|
|
|
int num_values = _rollup_schema.values().size();
|
|
|
|
std::string keys_type = _rollup_schema.keys_type();
|
|
if ("DUP_KEYS" == keys_type) {
|
|
return Status::OK();
|
|
} else if ("UNIQUE_KEYS" == keys_type) {
|
|
for (int i = 0; i < num_values; ++i) {
|
|
_value_updaters.push_back(fake_update);
|
|
}
|
|
return Status::OK();
|
|
}
|
|
|
|
for (int i = 0; i < num_values; ++i) {
|
|
switch (_rollup_schema.values()[i]->root()->type().type) {
|
|
case TYPE_TINYINT: {
|
|
switch (_rollup_schema.value_ops()[i]) {
|
|
case TAggregationType::MAX:
|
|
_value_updaters.push_back(update_max<int8_t>);
|
|
break;
|
|
case TAggregationType::MIN:
|
|
_value_updaters.push_back(update_min<int8_t>);
|
|
break;
|
|
case TAggregationType::SUM:
|
|
_value_updaters.push_back(update_sum<int8_t>);
|
|
break;
|
|
default:
|
|
_value_updaters.push_back(fake_update);
|
|
}
|
|
break;
|
|
}
|
|
case TYPE_SMALLINT: {
|
|
switch (_rollup_schema.value_ops()[i]) {
|
|
case TAggregationType::MAX:
|
|
_value_updaters.push_back(update_max<int16_t>);
|
|
break;
|
|
case TAggregationType::MIN:
|
|
_value_updaters.push_back(update_min<int16_t>);
|
|
break;
|
|
case TAggregationType::SUM:
|
|
_value_updaters.push_back(update_sum<int16_t>);
|
|
break;
|
|
default:
|
|
_value_updaters.push_back(fake_update);
|
|
}
|
|
break;
|
|
}
|
|
case TYPE_INT: {
|
|
switch (_rollup_schema.value_ops()[i]) {
|
|
case TAggregationType::MAX:
|
|
_value_updaters.push_back(update_max<int32_t>);
|
|
break;
|
|
case TAggregationType::MIN:
|
|
_value_updaters.push_back(update_min<int32_t>);
|
|
break;
|
|
case TAggregationType::SUM:
|
|
_value_updaters.push_back(update_sum<int32_t>);
|
|
break;
|
|
default:
|
|
_value_updaters.push_back(fake_update);
|
|
}
|
|
break;
|
|
}
|
|
case TYPE_BIGINT: {
|
|
switch (_rollup_schema.value_ops()[i]) {
|
|
case TAggregationType::MAX:
|
|
_value_updaters.push_back(update_max<int64_t>);
|
|
break;
|
|
case TAggregationType::MIN:
|
|
_value_updaters.push_back(update_min<int64_t>);
|
|
break;
|
|
case TAggregationType::SUM:
|
|
_value_updaters.push_back(update_sum<int64_t>);
|
|
break;
|
|
default:
|
|
_value_updaters.push_back(fake_update);
|
|
}
|
|
break;
|
|
}
|
|
case TYPE_LARGEINT: {
|
|
switch (_rollup_schema.value_ops()[i]) {
|
|
case TAggregationType::MAX:
|
|
_value_updaters.push_back(update_max<__int128>);
|
|
break;
|
|
case TAggregationType::MIN:
|
|
_value_updaters.push_back(update_min<__int128>);
|
|
break;
|
|
case TAggregationType::SUM:
|
|
_value_updaters.push_back(update_sum<__int128>);
|
|
break;
|
|
default:
|
|
_value_updaters.push_back(fake_update);
|
|
}
|
|
break;
|
|
}
|
|
case TYPE_FLOAT: {
|
|
switch (_rollup_schema.value_ops()[i]) {
|
|
case TAggregationType::MAX:
|
|
_value_updaters.push_back(update_max<float>);
|
|
break;
|
|
case TAggregationType::MIN:
|
|
_value_updaters.push_back(update_min<float>);
|
|
break;
|
|
case TAggregationType::SUM:
|
|
_value_updaters.push_back(update_sum<float>);
|
|
break;
|
|
default:
|
|
_value_updaters.push_back(fake_update);
|
|
}
|
|
break;
|
|
}
|
|
case TYPE_DOUBLE: {
|
|
switch (_rollup_schema.value_ops()[i]) {
|
|
case TAggregationType::MAX:
|
|
_value_updaters.push_back(update_max<double>);
|
|
break;
|
|
case TAggregationType::MIN:
|
|
_value_updaters.push_back(update_min<double>);
|
|
break;
|
|
case TAggregationType::SUM:
|
|
_value_updaters.push_back(update_sum<double>);
|
|
break;
|
|
default:
|
|
_value_updaters.push_back(fake_update);
|
|
}
|
|
break;
|
|
}
|
|
case TYPE_DECIMAL: {
|
|
switch (_rollup_schema.value_ops()[i]) {
|
|
case TAggregationType::MAX:
|
|
_value_updaters.push_back(update_max<DecimalValue>);
|
|
break;
|
|
case TAggregationType::MIN:
|
|
_value_updaters.push_back(update_min<DecimalValue>);
|
|
break;
|
|
case TAggregationType::SUM:
|
|
_value_updaters.push_back(update_sum<DecimalValue>);
|
|
break;
|
|
default:
|
|
_value_updaters.push_back(fake_update);
|
|
}
|
|
break;
|
|
}
|
|
case TYPE_DECIMALV2: {
|
|
switch (_rollup_schema.value_ops()[i]) {
|
|
case TAggregationType::MAX:
|
|
_value_updaters.push_back(update_max<__int128>);
|
|
break;
|
|
case TAggregationType::MIN:
|
|
_value_updaters.push_back(update_min<__int128>);
|
|
break;
|
|
case TAggregationType::SUM:
|
|
_value_updaters.push_back(update_sum<__int128>);
|
|
break;
|
|
default:
|
|
_value_updaters.push_back(fake_update);
|
|
}
|
|
break;
|
|
}
|
|
|
|
case TYPE_DATE:
|
|
case TYPE_DATETIME: {
|
|
switch (_rollup_schema.value_ops()[i]) {
|
|
case TAggregationType::MAX:
|
|
_value_updaters.push_back(update_max<DateTimeValue>);
|
|
break;
|
|
case TAggregationType::MIN:
|
|
_value_updaters.push_back(update_min<DateTimeValue>);
|
|
break;
|
|
case TAggregationType::SUM:
|
|
return Status::InternalError("Unsupport sum operation on date/datetime column.");
|
|
default:
|
|
// replace
|
|
_value_updaters.push_back(fake_update);
|
|
break;
|
|
}
|
|
break;
|
|
}
|
|
case TYPE_CHAR:
|
|
case TYPE_VARCHAR: {
|
|
switch (_rollup_schema.value_ops()[i]) {
|
|
case TAggregationType::MAX:
|
|
case TAggregationType::MIN:
|
|
case TAggregationType::SUM:
|
|
return Status::InternalError("Unsupport max/min/sum operation on char/varchar column.");
|
|
default:
|
|
// Only replace has meaning
|
|
_value_updaters.push_back(fake_update);
|
|
break;
|
|
}
|
|
break;
|
|
}
|
|
case TYPE_HLL: {
|
|
switch (_rollup_schema.value_ops()[i]) {
|
|
case TAggregationType::HLL_UNION:
|
|
// only placeholder,merge in Translator::update_merge
|
|
_value_updaters.push_back(fake_update);
|
|
break;
|
|
case TAggregationType::MAX:
|
|
case TAggregationType::MIN:
|
|
case TAggregationType::SUM:
|
|
return Status::InternalError("Unsupport max/min/sum operation on hll column.");
|
|
default:
|
|
_value_updaters.push_back(fake_update);
|
|
break;
|
|
}
|
|
break;
|
|
}
|
|
default: {
|
|
std::stringstream ss;
|
|
ss << "Unsupported column type(" << _rollup_schema.values()[i]->root()->type() << ")";
|
|
// No operation, just pusb back a fake update
|
|
return Status::InternalError(ss.str());
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
return Status::OK();
|
|
}
|
|
|
|
Status Translator::create_profile(RuntimeState* state) {
|
|
// Add profile
|
|
std::stringstream ss;
|
|
ss << "Dpp translator(" << _tablet_desc.partition_id << "_" << _tablet_desc.bucket_id
|
|
<< "_" << _rollup_name << ")";
|
|
_profile = state->obj_pool()->add(new RuntimeProfile(state->obj_pool(), ss.str()));
|
|
|
|
_add_batch_timer = ADD_TIMER(_profile, "add batch time");
|
|
_sort_timer = ADD_TIMER(_profile, "sort time");
|
|
_agg_timer = ADD_TIMER(_profile, "aggregate time");
|
|
_writer_timer = ADD_TIMER(_profile, "write to file time");
|
|
return Status::OK();
|
|
}
|
|
|
|
Status Translator::prepare(RuntimeState* state) {
|
|
// create output path
|
|
format_output_path(state);
|
|
// create profile first
|
|
RETURN_IF_ERROR(create_profile(state));
|
|
// 1. Create sorter
|
|
RETURN_IF_ERROR(create_sorter(state));
|
|
|
|
// 2. Create comparetor
|
|
RETURN_IF_ERROR(create_comparetor(state));
|
|
|
|
// 3. Create writer
|
|
RETURN_IF_ERROR(create_writer(state));
|
|
|
|
// 4. new batch for writer
|
|
_batch_to_write.reset(
|
|
new RowBatch(_row_desc, state->batch_size(), state->instance_mem_tracker()));
|
|
if (_batch_to_write.get() == nullptr) {
|
|
return Status::InternalError("No memory to allocate RowBatch.");
|
|
}
|
|
|
|
// 5. prepare value updater
|
|
RETURN_IF_ERROR(create_value_updaters());
|
|
|
|
int hll_column_count = 0;
|
|
for (int i = 0; i < _rollup_schema.values().size(); ++i) {
|
|
if (_rollup_schema.value_ops()[i] == TAggregationType::HLL_UNION) {
|
|
hll_column_count++;
|
|
}
|
|
}
|
|
_hll_merge.prepare(hll_column_count,
|
|
((QSorter*)_sorter)->get_mem_pool());
|
|
return Status::OK();
|
|
}
|
|
|
|
Status Translator::add_batch(RowBatch* batch) {
|
|
SCOPED_TIMER(_add_batch_timer);
|
|
return _sorter->add_batch(batch);
|
|
}
|
|
|
|
bool Translator::eq_tuple_row(TupleRow* last, TupleRow* cur) {
|
|
int num_exprs = _last_row_expr_ctxs.size();
|
|
for (int i = 0; i < num_exprs; ++i) {
|
|
void* last_value = _last_row_expr_ctxs[i]->get_value(last);
|
|
void* cur_value = _cur_row_expr_ctxs[i]->get_value(cur);
|
|
|
|
if (RawValue::compare(
|
|
last_value, cur_value, _last_row_expr_ctxs[i]->root()->type()) != 0) {
|
|
return false;
|
|
}
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
void Translator::copy_row(TupleRow* row) {
|
|
int row_idx = _batch_to_write->add_row();
|
|
TupleRow* last_row = _batch_to_write->get_row(row_idx);
|
|
row->deep_copy(last_row,
|
|
_batch_to_write->row_desc().tuple_descriptors(),
|
|
_batch_to_write->tuple_data_pool(),
|
|
false);
|
|
// NOTE: Don't commit this row.
|
|
}
|
|
|
|
// merge value must be slot expr,
|
|
void Translator::update_row(TupleRow* agg_row, TupleRow* row) {
|
|
int index = 0;
|
|
for (int i = 0; i < _rollup_schema.values().size(); ++i) {
|
|
ExprContext* ctx = _rollup_schema.values()[i];
|
|
SlotRef* ref = (SlotRef*)(ctx->root());
|
|
if (_rollup_schema.value_ops()[i] == TAggregationType::HLL_UNION) {
|
|
_hll_merge.update_hll_set(agg_row, row, ctx, index);
|
|
index++;
|
|
} else {
|
|
_value_updaters[i](ref, agg_row, row);
|
|
}
|
|
}
|
|
}
|
|
|
|
void HllDppSinkMerge::prepare(int count, MemPool* pool) {
|
|
while (count--) {
|
|
HllMergeValue* value = new HllMergeValue();
|
|
value->type = HLL_DATA_EMPTY;
|
|
_hll_last_row.push_back(value);
|
|
}
|
|
}
|
|
|
|
void HllDppSinkMerge::update_hll_set(TupleRow* agg_row, TupleRow* row,
|
|
ExprContext* ctx, int index){
|
|
HllMergeValue* value = _hll_last_row[index];
|
|
StringValue* row_sv = static_cast<StringValue*>(SlotRef::get_value(ctx->root(), row));
|
|
HllSetResolver row_resolver;
|
|
HllSetResolver agg_row_resolver;
|
|
row_resolver.init(row_sv->ptr, row_sv->len);
|
|
row_resolver.parse();
|
|
const int REGISTERS_SIZE = (int)std::pow(2, HLL_COLUMN_PRECISION);
|
|
if (value->type == HLL_DATA_EMPTY) {
|
|
value->type = HLL_DATA_EXPLICIT;
|
|
StringValue* agg_row_sv = static_cast<StringValue*>(SlotRef::get_value(ctx->root(), agg_row));
|
|
agg_row_resolver.init(agg_row_sv->ptr, agg_row_sv->len);
|
|
agg_row_resolver.parse();
|
|
if (agg_row_resolver.get_hll_data_type() == HLL_DATA_EXPLICIT) {
|
|
value->hash_set.insert(agg_row_resolver.get_explicit_value(0));
|
|
}
|
|
if (row_resolver.get_hll_data_type() == HLL_DATA_EXPLICIT) {
|
|
value->hash_set.insert(row_resolver.get_explicit_value(0));
|
|
}
|
|
} else if (value->type == HLL_DATA_EXPLICIT) {
|
|
value->hash_set.insert(row_resolver.get_explicit_value(0));
|
|
if (value->hash_set.size() > HLL_EXPLICLIT_INT64_NUM) {
|
|
value->type = HLL_DATA_SPRASE;
|
|
for (std::set<uint64_t>::iterator iter = value->hash_set.begin(); iter != value->hash_set.end(); iter++) {
|
|
uint64_t hash = *iter;
|
|
int idx = hash % REGISTERS_SIZE;
|
|
uint8_t first_one_bit = __builtin_ctzl(hash >> HLL_COLUMN_PRECISION) + 1;
|
|
if (value->index_to_value.find(idx) != value->index_to_value.end()) {
|
|
value->index_to_value[idx] = value->index_to_value[idx] < first_one_bit ? first_one_bit : value->index_to_value[idx];
|
|
} else {
|
|
value->index_to_value[idx] = first_one_bit;
|
|
}
|
|
}
|
|
}
|
|
} else if (value->type == HLL_DATA_SPRASE) {
|
|
uint64_t hash = row_resolver.get_explicit_value(0);
|
|
int idx = hash % REGISTERS_SIZE;
|
|
uint8_t first_one_bit = __builtin_ctzl(hash >> HLL_COLUMN_PRECISION) + 1;
|
|
if (value->index_to_value.find(idx) != value->index_to_value.end()) {
|
|
value->index_to_value[idx] = value->index_to_value[idx] < first_one_bit ? first_one_bit : value->index_to_value[idx];
|
|
} else {
|
|
value->index_to_value[idx] = first_one_bit;
|
|
}
|
|
} else {
|
|
}
|
|
}
|
|
|
|
void HllDppSinkMerge::finalize_one_merge(TupleRow* agg_row, MemPool* pool,
|
|
const RollupSchema& rollup_schema) {
|
|
if (_hll_last_row.size() <= 0) {
|
|
return;
|
|
}
|
|
const int REGISTERS_SIZE = (int)std::pow(2, HLL_COLUMN_PRECISION);
|
|
int index = 0;
|
|
for (int i = 0; i < rollup_schema.values().size(); ++i) {
|
|
ExprContext* ctx = rollup_schema.values()[i];
|
|
StringValue* agg_row_sv = static_cast<StringValue*>(SlotRef::get_value(ctx->root(), agg_row));
|
|
if (rollup_schema.value_ops()[i] == TAggregationType::HLL_UNION) {
|
|
HllMergeValue* value = _hll_last_row[index++];
|
|
// explicit set
|
|
if (value->type == HLL_DATA_EXPLICIT) {
|
|
int set_len = 1 + 1 + value->hash_set.size() * 8;
|
|
char *result = (char*)pool->allocate(set_len);
|
|
memset(result, 0, set_len);
|
|
HllSetHelper::set_explicit(result, value->hash_set, set_len);
|
|
agg_row_sv->replace(result, set_len);
|
|
} else if (value->type == HLL_DATA_SPRASE) {
|
|
// full explicit set
|
|
if (value->index_to_value.size() * (sizeof(HllSetResolver::SparseIndexType)
|
|
+ sizeof(HllSetResolver::SparseValueType))
|
|
+ sizeof(HllSetResolver::SparseLengthValueType) > REGISTERS_SIZE) {
|
|
int set_len = 1 + REGISTERS_SIZE;
|
|
char *result = (char*)pool->allocate(set_len);
|
|
memset(result, 0, set_len);
|
|
HllSetHelper::set_full(result, value->index_to_value, REGISTERS_SIZE, set_len);
|
|
agg_row_sv->replace(result, set_len);
|
|
} else {
|
|
// sparse explicit set
|
|
int set_len = 1 + sizeof(HllSetResolver::SparseLengthValueType) + 3 * value->index_to_value.size();
|
|
char *result = (char*)pool->allocate(set_len);
|
|
memset(result, 0, set_len);
|
|
HllSetHelper::set_sparse(result, value->index_to_value, set_len);
|
|
agg_row_sv->replace(result, set_len);
|
|
}
|
|
} else {
|
|
}
|
|
}
|
|
}
|
|
for (int i = 0; i < _hll_last_row.size(); i++) {
|
|
HllMergeValue* value = _hll_last_row[i];
|
|
value->hash_set.clear();
|
|
value->index_to_value.clear();
|
|
value->type = HLL_DATA_EMPTY;
|
|
}
|
|
}
|
|
|
|
void HllDppSinkMerge::close() {
|
|
for (int i = 0; i < _hll_last_row.size(); i++) {
|
|
HllMergeValue* value = _hll_last_row[i];
|
|
delete value;
|
|
}
|
|
_hll_last_row.clear();
|
|
}
|
|
|
|
// use batch to release data
|
|
Status Translator::process_one_row(TupleRow* row) {
|
|
if (row == nullptr) {
|
|
// Something strange happend
|
|
std::stringstream ss;
|
|
ss << "row is nullptr.";
|
|
LOG(ERROR) << ss.str();
|
|
return Status::InternalError(ss.str());
|
|
}
|
|
// first row
|
|
// Just deep copy, and don't reuse its data.
|
|
if (!_batch_to_write->in_flight()) {
|
|
copy_row(row);
|
|
return Status::OK();
|
|
}
|
|
|
|
int row_idx = _batch_to_write->add_row();
|
|
TupleRow* last_row = _batch_to_write->get_row(row_idx);
|
|
std::string keys_type = _rollup_schema.keys_type();
|
|
if ("AGG_KEYS" == keys_type || "UNIQUE_KEYS" == keys_type) {
|
|
if (eq_tuple_row(last_row, row)) {
|
|
// Just merge to last row and return
|
|
update_row(last_row, row);
|
|
return Status::OK();
|
|
}
|
|
}
|
|
_hll_merge.finalize_one_merge(last_row,
|
|
((QSorter*)_sorter)->get_mem_pool(),
|
|
_rollup_schema);
|
|
// Commit last row and check if batch is full
|
|
_batch_to_write->commit_last_row();
|
|
if (_batch_to_write->is_full()) {
|
|
SCOPED_TIMER(_writer_timer);
|
|
// output this batch
|
|
RETURN_IF_ERROR(_writer->add_batch(_batch_to_write.get()));
|
|
// reset batch to free memory
|
|
_batch_to_write->reset();
|
|
}
|
|
|
|
// deep copy the new row
|
|
copy_row(row);
|
|
|
|
return Status::OK();
|
|
}
|
|
|
|
Status Translator::process(RuntimeState* state) {
|
|
// 1. sort all the data in sorter.
|
|
{
|
|
SCOPED_TIMER(_sort_timer);
|
|
RETURN_IF_ERROR(_sorter->input_done());
|
|
}
|
|
|
|
// 2. read data from sorter and aggregate them
|
|
{
|
|
SCOPED_TIMER(_agg_timer);
|
|
bool eos = false;
|
|
while (!eos) {
|
|
RowBatch batch(_row_desc, state->batch_size(), state->instance_mem_tracker());
|
|
|
|
RETURN_IF_ERROR(_sorter->get_next(&batch, &eos));
|
|
|
|
int num_rows = batch.num_rows();
|
|
for (int i = 0; i < num_rows; ++i) {
|
|
TupleRow* row = batch.get_row(i);
|
|
RETURN_IF_ERROR(process_one_row(row));
|
|
}
|
|
|
|
}
|
|
// merge last set in vector
|
|
int row_idx = _batch_to_write->add_row();
|
|
TupleRow* last_row = _batch_to_write->get_row(row_idx);
|
|
_hll_merge.finalize_one_merge(last_row,
|
|
((QSorter*)_sorter)->get_mem_pool(),
|
|
_rollup_schema);
|
|
}
|
|
|
|
// Send the last batch if there any
|
|
if (_batch_to_write->in_flight()) {
|
|
_batch_to_write->commit_last_row();
|
|
RETURN_IF_ERROR(_writer->add_batch(_batch_to_write.get()));
|
|
}
|
|
|
|
{
|
|
SCOPED_TIMER(_writer_timer);
|
|
RETURN_IF_ERROR(_writer->close());
|
|
}
|
|
|
|
// Output last row
|
|
return Status::OK();
|
|
}
|
|
|
|
Status Translator::close(RuntimeState* state) {
|
|
if (_sorter != nullptr) {
|
|
_sorter->close(state);
|
|
}
|
|
Expr::close(_last_row_expr_ctxs, state);
|
|
Expr::close(_cur_row_expr_ctxs, state);
|
|
Expr::close(_output_row_expr_ctxs, state);
|
|
_batch_to_write.reset();
|
|
_hll_merge.close();
|
|
return Status::OK();
|
|
}
|
|
|
|
Status DppSink::init(RuntimeState* state) {
|
|
_profile = state->obj_pool()->add(new RuntimeProfile(state->obj_pool(), "Dpp sink"));
|
|
return Status::OK();
|
|
}
|
|
|
|
Status DppSink::get_or_create_translator(
|
|
ObjectPool* obj_pool,
|
|
RuntimeState* state,
|
|
const TabletDesc& tablet_desc,
|
|
std::vector<Translator*>** trans_vec) {
|
|
auto iter = _translator_map.find(tablet_desc);
|
|
if (iter != _translator_map.end()) {
|
|
*trans_vec = &iter->second;
|
|
return Status::OK();
|
|
}
|
|
// new one
|
|
_translator_map.insert(std::make_pair(tablet_desc, std::vector<Translator*>()));
|
|
*trans_vec = &_translator_map[tablet_desc];
|
|
// create one translator for every rollup
|
|
for (auto& it : _rollup_map) {
|
|
// Translator* translator = state->obj_pool()->add(
|
|
Translator* translator = obj_pool->add(
|
|
new Translator(tablet_desc, _row_desc, it.first, *it.second, obj_pool));
|
|
RETURN_IF_ERROR(translator->prepare(state));
|
|
_profile->add_child(translator->profile(), true, nullptr);
|
|
(*trans_vec)->push_back(translator);
|
|
}
|
|
_translator_count += (*trans_vec)->size();
|
|
return Status::OK();
|
|
}
|
|
|
|
Status DppSink::add_batch(
|
|
ObjectPool* obj_pool,
|
|
RuntimeState* state,
|
|
const TabletDesc& desc,
|
|
RowBatch* batch) {
|
|
SCOPED_TIMER(_profile->total_time_counter());
|
|
std::vector<Translator*>* trans_vec = nullptr;
|
|
RETURN_IF_ERROR(get_or_create_translator(obj_pool, state, desc, &trans_vec));
|
|
// add for every one
|
|
for (auto& trans : *trans_vec) {
|
|
RETURN_IF_ERROR(trans->add_batch(batch));
|
|
}
|
|
// add this batch to appoint translator
|
|
return Status::OK();
|
|
}
|
|
|
|
void DppSink::process(RuntimeState* state, Translator* trans, CountDownLatch* latch) {
|
|
// add dpp into cgroups
|
|
CgroupsMgr::apply_system_cgroup();
|
|
Status status = trans->process(state);
|
|
if (!status.ok()) {
|
|
state->set_process_status(status);
|
|
}
|
|
latch->count_down();
|
|
}
|
|
|
|
Status DppSink::finish(RuntimeState* state) {
|
|
SCOPED_TIMER(_profile->total_time_counter());
|
|
|
|
CountDownLatch latch(_translator_count);
|
|
for (auto& iter : _translator_map) {
|
|
for (auto& trans : iter.second) {
|
|
state->exec_env()->etl_thread_pool()->offer(
|
|
boost::bind<void>(&DppSink::process, this, state, trans, &latch));
|
|
}
|
|
}
|
|
|
|
latch.await();
|
|
|
|
// Set output files in runtime state
|
|
collect_output(&state->output_files());
|
|
|
|
for (auto& iter : _translator_map) {
|
|
for (auto& trans : iter.second) {
|
|
trans->close(state);
|
|
}
|
|
}
|
|
|
|
return state->query_status();
|
|
}
|
|
|
|
void DppSink::collect_output(std::vector<std::string>* files) {
|
|
for (auto& iter : _translator_map) {
|
|
for (auto& trans : iter.second) {
|
|
files->push_back(trans->output_path());
|
|
}
|
|
}
|
|
}
|
|
|
|
}
|