baidu palo
This commit is contained in:
333
be/src/runtime/data_spliter.cpp
Normal file
333
be/src/runtime/data_spliter.cpp
Normal file
@ -0,0 +1,333 @@
|
||||
// Copyright (c) 2017, Baidu.com, Inc. All Rights Reserved
|
||||
|
||||
// Licensed to the Apache Software Foundation (ASF) under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing,
|
||||
// software distributed under the License is distributed on an
|
||||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
#include "runtime/data_spliter.h"
|
||||
|
||||
#include <sstream>
|
||||
|
||||
#include <thrift/protocol/TDebugProtocol.h>
|
||||
|
||||
#include "exprs/expr.h"
|
||||
#include "common/object_pool.h"
|
||||
#include "runtime/runtime_state.h"
|
||||
#include "runtime/raw_value.h"
|
||||
#include "runtime/row_batch.h"
|
||||
#include "runtime/tuple_row.h"
|
||||
#include "runtime/dpp_sink.h"
|
||||
#include "runtime/load_path_mgr.h"
|
||||
#include "runtime/mem_tracker.h"
|
||||
#include "util/runtime_profile.h"
|
||||
#include "util/debug_util.h"
|
||||
#include "util/file_utils.h"
|
||||
#include "gen_cpp/DataSinks_types.h"
|
||||
|
||||
namespace palo {
|
||||
|
||||
DataSpliter::DataSpliter(const RowDescriptor& row_desc) :
|
||||
_obj_pool(new ObjectPool()),
|
||||
_row_desc(row_desc) {
|
||||
}
|
||||
|
||||
DataSpliter::~DataSpliter() {
|
||||
}
|
||||
|
||||
// We use the ParttitionRange to compare here. It should not be a member function of PartitionInfo
|
||||
// class becaurce there are some other member in it.
|
||||
static bool compare_part_use_range(const PartitionInfo* v1, const PartitionInfo* v2) {
|
||||
return v1->range() < v2->range();
|
||||
}
|
||||
|
||||
Status DataSpliter::from_thrift(
|
||||
ObjectPool* pool, const TDataSplitSink& t_sink, DataSpliter* spliter) {
|
||||
VLOG_ROW << "TDataSplitSink: " << apache::thrift::ThriftDebugString(t_sink);
|
||||
|
||||
// Partition Exprs
|
||||
RETURN_IF_ERROR(Expr::create_expr_trees(
|
||||
pool, t_sink.partition_exprs, &spliter->_partition_expr_ctxs));
|
||||
// Partition infos
|
||||
int num_parts = t_sink.partition_infos.size();
|
||||
if (num_parts == 0) {
|
||||
return Status("Empty partition info.");
|
||||
}
|
||||
for (int i = 0; i < num_parts; ++i) {
|
||||
PartitionInfo* info = pool->add(new PartitionInfo());
|
||||
RETURN_IF_ERROR(PartitionInfo::from_thrift(pool, t_sink.partition_infos[i], info));
|
||||
spliter->_partition_infos.push_back(info);
|
||||
}
|
||||
|
||||
// partitions should be in ascending order
|
||||
std::sort(spliter->_partition_infos.begin(),
|
||||
spliter->_partition_infos.end(),
|
||||
compare_part_use_range);
|
||||
|
||||
// schema infos
|
||||
for (auto& iter : t_sink.rollup_schemas) {
|
||||
RollupSchema* schema = pool->add(new RollupSchema());
|
||||
RETURN_IF_ERROR(RollupSchema::from_thrift(pool, iter.second, schema));
|
||||
spliter->_rollup_map[iter.first] = schema;
|
||||
}
|
||||
|
||||
return Status::OK;
|
||||
}
|
||||
|
||||
Status DataSpliter::prepare(RuntimeState* state) {
|
||||
std::stringstream title;
|
||||
title << "DataSplitSink (dst_id=" << state->fragment_instance_id() << ")";
|
||||
RETURN_IF_ERROR(DataSink::prepare(state));
|
||||
RETURN_IF_ERROR(Expr::prepare(
|
||||
_partition_expr_ctxs, state, _row_desc, _expr_mem_tracker.get()));
|
||||
for (auto& iter : _rollup_map) {
|
||||
RETURN_IF_ERROR(iter.second->prepare(state, _row_desc, _expr_mem_tracker.get()));
|
||||
}
|
||||
_profile = state->obj_pool()->add(new RuntimeProfile(state->obj_pool(), title.str()));
|
||||
for (auto iter : _partition_infos) {
|
||||
RETURN_IF_ERROR(iter->prepare(state, _row_desc, _expr_mem_tracker.get()));
|
||||
}
|
||||
return Status::OK;
|
||||
}
|
||||
|
||||
Status DataSpliter::open(RuntimeState* state) {
|
||||
RETURN_IF_ERROR(Expr::open(_partition_expr_ctxs, state));
|
||||
|
||||
for (auto& iter : _rollup_map) {
|
||||
RETURN_IF_ERROR(iter.second->open(state));
|
||||
}
|
||||
|
||||
RETURN_IF_ERROR(state->create_load_dir());
|
||||
|
||||
for (auto iter : _partition_infos) {
|
||||
RETURN_IF_ERROR(iter->open(state));
|
||||
|
||||
DppSink* dpp_sink = _obj_pool->add(new DppSink(_row_desc, _rollup_map));
|
||||
_dpp_sink_vec.push_back(dpp_sink);
|
||||
|
||||
RETURN_IF_ERROR(dpp_sink->init(state));
|
||||
_profile->add_child(dpp_sink->profile(), true, nullptr);
|
||||
}
|
||||
|
||||
_split_timer = ADD_TIMER(_profile, "process batch");
|
||||
_finish_timer = ADD_TIMER(_profile, "sort time");
|
||||
|
||||
return Status::OK;
|
||||
}
|
||||
|
||||
int DataSpliter::binary_find_partition(const PartRangeKey& key) const {
|
||||
int low = 0;
|
||||
int high = _partition_infos.size() - 1;
|
||||
|
||||
VLOG_ROW << "range key: " << key.debug_string() << std::endl;
|
||||
while (low <= high) {
|
||||
int mid = low + (high - low) / 2;
|
||||
int cmp = _partition_infos[mid]->range().compare_key(key);
|
||||
if (cmp == 0) {
|
||||
return mid;
|
||||
} else if (cmp < 0) { // current < partition[mid]
|
||||
low = mid + 1;
|
||||
} else {
|
||||
high = mid - 1;
|
||||
}
|
||||
}
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
Status DataSpliter::process_partition(
|
||||
RuntimeState* state, TupleRow* row, PartitionInfo** info, int32_t* part_index) {
|
||||
if (_partition_expr_ctxs.size() == 0) {
|
||||
*part_index = 0;
|
||||
*info = _partition_infos[0];
|
||||
return Status::OK;
|
||||
} else {
|
||||
// use binary search to get the right partition.
|
||||
ExprContext* ctx = _partition_expr_ctxs[0];
|
||||
void* partition_val = ctx->get_value(row);
|
||||
// construct a PartRangeKey
|
||||
PartRangeKey tmpPartKey;
|
||||
if (NULL != partition_val) {
|
||||
RETURN_IF_ERROR(PartRangeKey::from_value(
|
||||
ctx->root()->type().type, partition_val, &tmpPartKey));
|
||||
} else {
|
||||
tmpPartKey = PartRangeKey::neg_infinite();
|
||||
}
|
||||
|
||||
*part_index = binary_find_partition(tmpPartKey);
|
||||
if (*part_index < 0) {
|
||||
std::stringstream error_log;
|
||||
error_log << "there is no corresponding partition for this key: ";
|
||||
ctx->print_value(row, &error_log);
|
||||
return Status(error_log.str(), true);
|
||||
}
|
||||
*info = _partition_infos[*part_index];
|
||||
}
|
||||
return Status::OK;
|
||||
}
|
||||
|
||||
Status DataSpliter::process_distribute(
|
||||
RuntimeState* state, TupleRow* row,
|
||||
const PartitionInfo* part, uint32_t* mod) {
|
||||
uint32_t hash_val = 0;
|
||||
|
||||
for (auto& ctx : part->distributed_expr_ctxs()) {
|
||||
void* partition_val = ctx->get_value(row);
|
||||
if (partition_val != NULL) {
|
||||
hash_val = RawValue::zlib_crc32(partition_val, ctx->root()->type(), hash_val);
|
||||
} else {
|
||||
//NULL is treat as 0 when hash
|
||||
static const int INT_VALUE = 0;
|
||||
static const TypeDescriptor INT_TYPE(TYPE_INT);
|
||||
hash_val = RawValue::zlib_crc32(&INT_VALUE, INT_TYPE, hash_val);
|
||||
}
|
||||
}
|
||||
|
||||
*mod = hash_val % part->distributed_bucket();
|
||||
|
||||
return Status::OK;
|
||||
}
|
||||
|
||||
Status DataSpliter::send_row(
|
||||
RuntimeState* state, const TabletDesc& desc, TupleRow* row, DppSink* dpp_sink) {
|
||||
RowBatch* batch = nullptr;
|
||||
auto batch_iter = _batch_map.find(desc);
|
||||
if (batch_iter == _batch_map.end()) {
|
||||
batch = _obj_pool->add(new RowBatch(_row_desc, state->batch_size(), _expr_mem_tracker.get()));
|
||||
_batch_map[desc] = batch;
|
||||
} else {
|
||||
batch = batch_iter->second;
|
||||
}
|
||||
|
||||
// Add this row to this batch
|
||||
int idx = batch->add_row();
|
||||
// Just deep copy this row
|
||||
row->deep_copy(batch->get_row(idx), _row_desc.tuple_descriptors(),
|
||||
batch->tuple_data_pool(), false);
|
||||
batch->commit_last_row();
|
||||
|
||||
// If this batch is full send this to dpp_sink
|
||||
if (batch->is_full()) {
|
||||
RETURN_IF_ERROR(dpp_sink->add_batch(_obj_pool.get(), state, desc, batch));
|
||||
batch->reset();
|
||||
}
|
||||
return Status::OK;
|
||||
}
|
||||
|
||||
Status DataSpliter::process_one_row(RuntimeState* state, TupleRow* row) {
|
||||
TabletDesc desc;
|
||||
int32_t part_index = 0;
|
||||
|
||||
// process partition
|
||||
PartitionInfo* part = nullptr;
|
||||
Status status = process_partition(state, row, &part, &part_index);
|
||||
// TODO(lingbin): adjust 'process_partition' function's return value. It is a little inelegant
|
||||
// to return another OK when pri-status is not OK.
|
||||
// If find no partition, this row should be omitted.
|
||||
if (!status.ok()) {
|
||||
state->set_error_row_number(state->get_error_row_number() + 1);
|
||||
state->set_normal_row_number(state->get_normal_row_number() - 1);
|
||||
|
||||
state->append_error_msg_to_file(
|
||||
print_row(row, _row_desc),
|
||||
status.get_error_msg());
|
||||
return Status::OK;
|
||||
}
|
||||
|
||||
desc.partition_id = part->id();
|
||||
|
||||
// process distribute
|
||||
RETURN_IF_ERROR(process_distribute(state, row, part, &desc.bucket_id));
|
||||
|
||||
// construct dpp_sink map
|
||||
_sink_map[desc] = _dpp_sink_vec[part_index];
|
||||
|
||||
// process distribute
|
||||
RETURN_IF_ERROR(send_row(state, desc, row, _dpp_sink_vec[part_index]));
|
||||
|
||||
return Status::OK;
|
||||
}
|
||||
|
||||
Status DataSpliter::send(RuntimeState* state, RowBatch* batch) {
|
||||
SCOPED_TIMER(_split_timer);
|
||||
int num_rows = batch->num_rows();
|
||||
for (int i = 0; i < num_rows; ++i) {
|
||||
RETURN_IF_ERROR(process_one_row(state, batch->get_row(i)));
|
||||
}
|
||||
return Status::OK;
|
||||
}
|
||||
|
||||
Status DataSpliter::close(RuntimeState* state, Status close_status) {
|
||||
bool is_ok = true;
|
||||
Status err_status;
|
||||
if (_closed) {
|
||||
return Status::OK;
|
||||
}
|
||||
if (close_status.ok()) {
|
||||
SCOPED_TIMER(_finish_timer);
|
||||
// Flush data have not been sent
|
||||
for (const auto& iter : _batch_map) {
|
||||
if (iter.second->num_rows() > 0) {
|
||||
DppSink* dpp_sink = _sink_map[iter.first];
|
||||
Status status = dpp_sink->add_batch(_obj_pool.get(), state, iter.first, iter.second);
|
||||
if (UNLIKELY(is_ok && !status.ok())) {
|
||||
LOG(WARNING) << "add_batch error"
|
||||
<< " err_msg=" << status.get_error_msg();
|
||||
is_ok = false;
|
||||
err_status = status;
|
||||
}
|
||||
iter.second->reset();
|
||||
}
|
||||
}
|
||||
}
|
||||
// finish sink
|
||||
for (const auto& iter : _dpp_sink_vec) {
|
||||
Status status = iter->finish(state);
|
||||
if (UNLIKELY(is_ok && !status.ok())) {
|
||||
LOG(WARNING) << "finish dpp_sink error"
|
||||
<< " err_msg=" << status.get_error_msg();
|
||||
is_ok = false;
|
||||
err_status = status;
|
||||
}
|
||||
}
|
||||
Expr::close(_partition_expr_ctxs, state);
|
||||
for (auto& iter : _rollup_map) {
|
||||
Status status = iter.second->close(state);
|
||||
if (UNLIKELY(is_ok && !status.ok())) {
|
||||
LOG(WARNING) << "close rollup_map error"
|
||||
<< " err_msg=" << status.get_error_msg();
|
||||
is_ok = false;
|
||||
err_status = status;
|
||||
}
|
||||
}
|
||||
for (auto iter : _partition_infos) {
|
||||
Status status = iter->close(state);
|
||||
if (UNLIKELY(is_ok && !status.ok())) {
|
||||
LOG(WARNING) << "close partition_info error"
|
||||
<< " err_msg=" << status.get_error_msg();
|
||||
is_ok = false;
|
||||
err_status = status;
|
||||
}
|
||||
}
|
||||
|
||||
_closed = true;
|
||||
if (is_ok) {
|
||||
return Status::OK;
|
||||
} else {
|
||||
return err_status;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
Reference in New Issue
Block a user