Files
doris/be/src/exec/broker_scan_node.cpp
Zhao Chun a2b299e3b9 Reduce UT binary size (#314)
* Reduce UT binary size

Almost every module depend on ExecEnv, and ExecEnv contains all
singleton, which make UT binary contains all object files.

This patch seperate ExecEnv's initial and destory to anthor file to
avoid other file's dependence. And status.cc include debug_util.h which
depend tuple.h tuple_row.h, and I move get_stack_trace() to
stack_util.cpp to reduce status.cc's dependence.

I add USE_RTTI=1 to build rocksdb to avoid linking librocksdb.a

Issue: #292

* Update
2018-11-15 16:17:23 +08:00

470 lines
16 KiB
C++

// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#include "exec/broker_scan_node.h"
#include <chrono>
#include <sstream>
#include "common/object_pool.h"
#include "runtime/runtime_state.h"
#include "runtime/row_batch.h"
#include "runtime/dpp_sink_internal.h"
#include "exec/broker_scanner.h"
#include "exprs/expr.h"
#include "util/runtime_profile.h"
namespace doris {
BrokerScanNode::BrokerScanNode(
ObjectPool* pool, const TPlanNode& tnode, const DescriptorTbl& descs) :
ScanNode(pool, tnode, descs),
_tuple_id(tnode.broker_scan_node.tuple_id),
_runtime_state(nullptr),
_tuple_desc(nullptr),
_num_running_scanners(0),
_scan_finished(false),
_max_buffered_batches(1024),
_wait_scanner_timer(nullptr) {
}
BrokerScanNode::~BrokerScanNode() {
}
// We use the ParttitionRange to compare here. It should not be a member function of PartitionInfo
// class becaurce there are some other member in it.
static bool compare_part_use_range(const PartitionInfo* v1, const PartitionInfo* v2) {
return v1->range() < v2->range();
}
Status BrokerScanNode::init(const TPlanNode& tnode, RuntimeState* state) {
RETURN_IF_ERROR(ScanNode::init(tnode));
auto& broker_scan_node = tnode.broker_scan_node;
if (broker_scan_node.__isset.partition_exprs) {
// ASSERT broker_scan_node.__isset.partition_infos == true
RETURN_IF_ERROR(Expr::create_expr_trees(
_pool, broker_scan_node.partition_exprs, &_partition_expr_ctxs));
for (auto& t_partition_info : broker_scan_node.partition_infos) {
PartitionInfo* info = _pool->add(new PartitionInfo());
RETURN_IF_ERROR(PartitionInfo::from_thrift(_pool, t_partition_info, info));
_partition_infos.emplace_back(info);
}
// partitions should be in ascending order
std::sort(_partition_infos.begin(),
_partition_infos.end(),
compare_part_use_range);
}
return Status::OK;
}
Status BrokerScanNode::prepare(RuntimeState* state) {
VLOG_QUERY << "BrokerScanNode prepare";
RETURN_IF_ERROR(ScanNode::prepare(state));
// get tuple desc
_runtime_state = state;
_tuple_desc = state->desc_tbl().get_tuple_descriptor(_tuple_id);
if (_tuple_desc == nullptr) {
std::stringstream ss;
ss << "Failed to get tuple descriptor, _tuple_id=" << _tuple_id;
return Status(ss.str());
}
// Initialize slots map
for (auto slot : _tuple_desc->slots()) {
auto pair = _slots_map.emplace(slot->col_name(), slot);
if (!pair.second) {
std::stringstream ss;
ss << "Failed to insert slot, col_name=" << slot->col_name();
return Status(ss.str());
}
}
// prepare partition
if (_partition_expr_ctxs.size() > 0) {
RETURN_IF_ERROR(Expr::prepare(
_partition_expr_ctxs, state, row_desc(), expr_mem_tracker()));
for (auto iter : _partition_infos) {
RETURN_IF_ERROR(iter->prepare(state, row_desc(), expr_mem_tracker()));
}
}
// Profile
_wait_scanner_timer = ADD_TIMER(runtime_profile(), "WaitScannerTime");
return Status::OK;
}
Status BrokerScanNode::open(RuntimeState* state) {
SCOPED_TIMER(_runtime_profile->total_time_counter());
RETURN_IF_ERROR(ExecNode::open(state));
RETURN_IF_ERROR(exec_debug_action(TExecNodePhase::OPEN));
RETURN_IF_CANCELLED(state);
// Open partition
if (_partition_expr_ctxs.size() > 0) {
RETURN_IF_ERROR(Expr::open(_partition_expr_ctxs, state));
for (auto iter : _partition_infos) {
RETURN_IF_ERROR(iter->open(state));
}
}
RETURN_IF_ERROR(start_scanners());
return Status::OK;
}
Status BrokerScanNode::start_scanners() {
{
std::unique_lock<std::mutex> l(_batch_queue_lock);
_num_running_scanners = 1;
}
_scanner_threads.emplace_back(&BrokerScanNode::scanner_worker, this, 0, _scan_ranges.size());
return Status::OK;
}
Status BrokerScanNode::get_next(RuntimeState* state, RowBatch* row_batch, bool* eos) {
SCOPED_TIMER(_runtime_profile->total_time_counter());
// check if CANCELLED.
if (state->is_cancelled()) {
std::unique_lock<std::mutex> l(_batch_queue_lock);
if (update_status(Status::CANCELLED)) {
// Notify all scanners
_queue_writer_cond.notify_all();
}
}
if (_scan_finished.load()) {
*eos = true;
return Status::OK;
}
std::shared_ptr<RowBatch> scanner_batch;
{
std::unique_lock<std::mutex> l(_batch_queue_lock);
while (_process_status.ok() &&
!_runtime_state->is_cancelled() &&
_num_running_scanners > 0 &&
_batch_queue.empty()) {
SCOPED_TIMER(_wait_scanner_timer);
_queue_reader_cond.wait_for(l, std::chrono::seconds(1));
}
if (!_process_status.ok()) {
// Some scanner process failed.
return _process_status;
}
if (_runtime_state->is_cancelled()) {
if (update_status(Status::CANCELLED)) {
_queue_writer_cond.notify_all();
}
return _process_status;
}
if (!_batch_queue.empty()) {
scanner_batch = _batch_queue.front();
_batch_queue.pop_front();
}
}
// All scanner has been finished, and all cached batch has been read
if (scanner_batch == nullptr) {
_scan_finished.store(true);
*eos = true;
return Status::OK;
}
// notify one scanner
_queue_writer_cond.notify_one();
// get scanner's batch memory
row_batch->acquire_state(scanner_batch.get());
_num_rows_returned += row_batch->num_rows();
COUNTER_SET(_rows_returned_counter, _num_rows_returned);
// This is first time reach limit.
// Only valid when query 'select * from table1 limit 20'
if (reached_limit()) {
int num_rows_over = _num_rows_returned - _limit;
row_batch->set_num_rows(row_batch->num_rows() - num_rows_over);
_num_rows_returned -= num_rows_over;
COUNTER_SET(_rows_returned_counter, _num_rows_returned);
_scan_finished.store(true);
_queue_writer_cond.notify_all();
*eos = true;
} else {
*eos = false;
}
if (VLOG_ROW_IS_ON) {
for (int i = 0; i < row_batch->num_rows(); ++i) {
TupleRow* row = row_batch->get_row(i);
VLOG_ROW << "BrokerScanNode output row: "
<< Tuple::to_string(row->get_tuple(0), *_tuple_desc);
}
}
return Status::OK;
}
Status BrokerScanNode::close(RuntimeState* state) {
if (is_closed()) {
return Status::OK;
}
RETURN_IF_ERROR(exec_debug_action(TExecNodePhase::CLOSE));
SCOPED_TIMER(_runtime_profile->total_time_counter());
_scan_finished.store(true);
_queue_writer_cond.notify_all();
_queue_reader_cond.notify_all();
for (int i = 0; i < _scanner_threads.size(); ++i) {
_scanner_threads[i].join();
}
// Open partition
if (_partition_expr_ctxs.size() > 0) {
Expr::close(_partition_expr_ctxs, state);
for (auto iter : _partition_infos) {
iter->close(state);
}
}
// Close
_batch_queue.clear();
return ExecNode::close(state);
}
// This function is called after plan node has been prepared.
Status BrokerScanNode::set_scan_ranges(const std::vector<TScanRangeParams>& scan_ranges) {
_scan_ranges = scan_ranges;
// Now we initialize partition information
if (_partition_expr_ctxs.size() > 0) {
for (auto& range : _scan_ranges) {
auto& params = range.scan_range.broker_scan_range.params;
if (params.__isset.partition_ids) {
std::sort(params.partition_ids.begin(), params.partition_ids.end());
}
}
}
return Status::OK;
}
void BrokerScanNode::debug_string(int ident_level, std::stringstream* out) const {
(*out) << "BrokerScanNode";
}
Status BrokerScanNode::scanner_scan(
const TBrokerScanRange& scan_range,
const std::vector<ExprContext*>& conjunct_ctxs,
const std::vector<ExprContext*>& partition_expr_ctxs,
BrokerScanCounter* counter) {
std::unique_ptr<BrokerScanner> scanner(new BrokerScanner(
_runtime_state,
runtime_profile(),
scan_range.params,
scan_range.ranges,
scan_range.broker_addresses,
counter));
RETURN_IF_ERROR(scanner->open());
bool scanner_eof = false;
while (!scanner_eof) {
// Fill one row batch
std::shared_ptr<RowBatch> row_batch(
new RowBatch(row_desc(), _runtime_state->batch_size(), mem_tracker()));
// create new tuple buffer for row_batch
MemPool* tuple_pool = row_batch->tuple_data_pool();
int tuple_buffer_size = row_batch->capacity() * _tuple_desc->byte_size();
void* tuple_buffer = tuple_pool->allocate(tuple_buffer_size);
if (tuple_buffer == nullptr) {
return Status("Allocate memory for row batch failed.");
}
Tuple* tuple = reinterpret_cast<Tuple*>(tuple_buffer);
while (!scanner_eof) {
RETURN_IF_CANCELLED(_runtime_state);
// If we have finished all works
if (_scan_finished.load()) {
return Status::OK;
}
// This row batch has been filled up, and break this
if (row_batch->is_full()) {
break;
}
int row_idx = row_batch->add_row();
TupleRow* row = row_batch->get_row(row_idx);
// scan node is the first tuple of tuple row
row->set_tuple(0, tuple);
memset(tuple, 0, _tuple_desc->num_null_bytes());
// Get from scanner
RETURN_IF_ERROR(scanner->get_next(tuple, tuple_pool, &scanner_eof));
if (scanner_eof) {
continue;
}
if (scan_range.params.__isset.partition_ids) {
int64_t partition_id = get_partition_id(partition_expr_ctxs, row);
if (partition_id == -1 ||
!std::binary_search(scan_range.params.partition_ids.begin(),
scan_range.params.partition_ids.end(),
partition_id)) {
counter->num_rows_filtered++;
std::stringstream error_msg;
error_msg << "No corresponding partition, partition id: " << partition_id;
_runtime_state->append_error_msg_to_file(Tuple::to_string(tuple, *_tuple_desc),
error_msg.str());
continue;
}
}
// eval conjuncts of this row.
if (eval_conjuncts(&conjunct_ctxs[0], conjunct_ctxs.size(), row)) {
row_batch->commit_last_row();
char* new_tuple = reinterpret_cast<char*>(tuple);
new_tuple += _tuple_desc->byte_size();
tuple = reinterpret_cast<Tuple*>(new_tuple);
counter->num_rows_returned++;
} else {
counter->num_rows_filtered++;
}
}
// Row batch has been filled, push this to the queue
if (row_batch->num_rows() > 0) {
std::unique_lock<std::mutex> l(_batch_queue_lock);
while (_process_status.ok() &&
!_scan_finished.load() &&
!_runtime_state->is_cancelled() &&
_batch_queue.size() >= _max_buffered_batches) {
_queue_writer_cond.wait_for(l, std::chrono::seconds(1));
}
// Process already set failed, so we just return OK
if (!_process_status.ok()) {
return Status::OK;
}
// Scan already finished, just return
if (_scan_finished.load()) {
return Status::OK;
}
// Runtime state is canceled, just return cancel
if (_runtime_state->is_cancelled()) {
return Status::CANCELLED;
}
// Queue size Must be samller than _max_buffered_batches
_batch_queue.push_back(row_batch);
// Notify reader to
_queue_reader_cond.notify_one();
}
}
return Status::OK;
}
void BrokerScanNode::scanner_worker(int start_idx, int length) {
// Clone expr context
std::vector<ExprContext*> scanner_expr_ctxs;
auto status = Expr::clone_if_not_exists(_conjunct_ctxs, _runtime_state, &scanner_expr_ctxs);
if (!status.ok()) {
LOG(WARNING) << "Clone conjuncts failed.";
}
std::vector<ExprContext*> partition_expr_ctxs;;
if (status.ok()) {
status = Expr::clone_if_not_exists(
_partition_expr_ctxs, _runtime_state, &partition_expr_ctxs);
if (!status.ok()) {
LOG(WARNING) << "Clone conjuncts failed.";
}
}
BrokerScanCounter counter;
for (int i = 0; i < length && status.ok(); ++i) {
const TBrokerScanRange& scan_range =
_scan_ranges[start_idx + i].scan_range.broker_scan_range;
status = scanner_scan(scan_range, scanner_expr_ctxs, partition_expr_ctxs, &counter);
if (!status.ok()) {
LOG(WARNING) << "Scanner[" << start_idx + i << "] prcess failed. status="
<< status.get_error_msg();
}
}
// Update stats
_runtime_state->update_num_rows_load_success(counter.num_rows_returned);
_runtime_state->update_num_rows_load_filtered(counter.num_rows_filtered);
// scanner is going to finish
{
std::lock_guard<std::mutex> l(_batch_queue_lock);
if (!status.ok()) {
update_status(status);
}
// This scanner will finish
_num_running_scanners--;
}
_queue_reader_cond.notify_all();
// If one scanner failed, others don't need scan any more
if (!status.ok()) {
_queue_writer_cond.notify_all();
}
Expr::close(scanner_expr_ctxs, _runtime_state);
Expr::close(partition_expr_ctxs, _runtime_state);
}
int64_t BrokerScanNode::binary_find_partition_id(const PartRangeKey& key) const {
int low = 0;
int high = _partition_infos.size() - 1;
while (low <= high) {
int mid = low + (high - low) / 2;
int cmp = _partition_infos[mid]->range().compare_key(key);
if (cmp == 0) {
return _partition_infos[mid]->id();
} else if (cmp < 0) { // current < partition[mid]
low = mid + 1;
} else {
high = mid - 1;
}
}
return -1;
}
int64_t BrokerScanNode::get_partition_id(
const std::vector<ExprContext*>& partition_expr_ctxs, TupleRow* row) const {
if (_partition_infos.size() == 0) {
return -1;
}
// construct a PartRangeKey
PartRangeKey part_key;
// use binary search to get the right partition.
ExprContext* ctx = partition_expr_ctxs[0];
void* partition_val = ctx->get_value(row);
if (partition_val != nullptr) {
PartRangeKey::from_value(ctx->root()->type().type, partition_val, &part_key);
} else {
part_key = PartRangeKey::neg_infinite();
}
return binary_find_partition_id(part_key);
}
}