baidu palo
This commit is contained in:
329
be/src/exec/schema_scan_node.cpp
Normal file
329
be/src/exec/schema_scan_node.cpp
Normal file
@ -0,0 +1,329 @@
|
||||
// Modifications copyright (C) 2017, Baidu.com, Inc.
|
||||
// Copyright 2017 The Apache Software Foundation
|
||||
|
||||
// Licensed to the Apache Software Foundation (ASF) under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing,
|
||||
// software distributed under the License is distributed on an
|
||||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
#include "schema_scan_node.h"
|
||||
|
||||
#include <boost/algorithm/string.hpp>
|
||||
#include <boost/foreach.hpp>
|
||||
|
||||
#include "exec/text_converter.hpp"
|
||||
#include "exec/schema_scanner/frontend_helper.h"
|
||||
#include "gen_cpp/PlanNodes_types.h"
|
||||
#include "runtime/runtime_state.h"
|
||||
#include "runtime/row_batch.h"
|
||||
#include "runtime/string_value.h"
|
||||
#include "runtime/tuple_row.h"
|
||||
#include "util/runtime_profile.h"
|
||||
|
||||
namespace palo {
|
||||
|
||||
SchemaScanNode::SchemaScanNode(ObjectPool* pool, const TPlanNode& tnode,
|
||||
const DescriptorTbl& descs)
|
||||
: ScanNode(pool, tnode, descs),
|
||||
_is_init(false),
|
||||
_table_name(tnode.schema_scan_node.table_name),
|
||||
_tuple_id(tnode.schema_scan_node.tuple_id),
|
||||
_src_tuple_desc(NULL),
|
||||
_dest_tuple_desc(NULL),
|
||||
_tuple_idx(0),
|
||||
_slot_num(0),
|
||||
_tuple_pool(NULL),
|
||||
_schema_scanner(NULL),
|
||||
_src_tuple(NULL),
|
||||
_dest_tuple(NULL) {
|
||||
}
|
||||
|
||||
SchemaScanNode::~SchemaScanNode() {
|
||||
delete[] reinterpret_cast<char*>(_src_tuple);
|
||||
_src_tuple = NULL;
|
||||
}
|
||||
|
||||
Status SchemaScanNode::init(const TPlanNode& tnode) {
|
||||
RETURN_IF_ERROR(ExecNode::init(tnode));
|
||||
if (tnode.schema_scan_node.__isset.db) {
|
||||
_scanner_param.db = _pool->add(new std::string(tnode.schema_scan_node.db));
|
||||
}
|
||||
|
||||
if (tnode.schema_scan_node.__isset.table) {
|
||||
_scanner_param.table = _pool->add(new std::string(tnode.schema_scan_node.table));
|
||||
}
|
||||
|
||||
if (tnode.schema_scan_node.__isset.wild) {
|
||||
_scanner_param.wild = _pool->add(new std::string(tnode.schema_scan_node.wild));
|
||||
}
|
||||
|
||||
if (tnode.schema_scan_node.__isset.user) {
|
||||
_scanner_param.user = _pool->add(new std::string(tnode.schema_scan_node.user));
|
||||
}
|
||||
|
||||
if (tnode.schema_scan_node.__isset.ip) {
|
||||
_scanner_param.ip = _pool->add(new std::string(tnode.schema_scan_node.ip));
|
||||
}
|
||||
|
||||
if (tnode.schema_scan_node.__isset.port) {
|
||||
_scanner_param.port = tnode.schema_scan_node.port;
|
||||
}
|
||||
|
||||
if (tnode.schema_scan_node.__isset.thread_id) {
|
||||
_scanner_param.thread_id = tnode.schema_scan_node.thread_id;
|
||||
}
|
||||
return Status::OK;
|
||||
}
|
||||
|
||||
Status SchemaScanNode::prepare(RuntimeState* state) {
|
||||
if (_is_init) {
|
||||
return Status::OK;
|
||||
}
|
||||
|
||||
if (NULL == state) {
|
||||
return Status("input pointer is NULL.");
|
||||
}
|
||||
|
||||
RETURN_IF_ERROR(ScanNode::prepare(state));
|
||||
|
||||
// new one mem pool
|
||||
_tuple_pool.reset(new(std::nothrow) MemPool(mem_tracker()));
|
||||
|
||||
if (NULL == _tuple_pool.get()) {
|
||||
return Status("Allocate MemPool failed.");
|
||||
}
|
||||
|
||||
// get dest tuple desc
|
||||
_dest_tuple_desc = state->desc_tbl().get_tuple_descriptor(_tuple_id);
|
||||
|
||||
if (NULL == _dest_tuple_desc) {
|
||||
return Status("Failed to get tuple descriptor.");
|
||||
}
|
||||
|
||||
_slot_num = _dest_tuple_desc->slots().size();
|
||||
// get src tuple desc
|
||||
const SchemaTableDescriptor* schema_table =
|
||||
static_cast<const SchemaTableDescriptor*>(_dest_tuple_desc->table_desc());
|
||||
|
||||
if (NULL == schema_table) {
|
||||
return Status("Failed to get schema table descriptor.");
|
||||
}
|
||||
|
||||
// new one scanner
|
||||
_schema_scanner.reset(SchemaScanner::create(schema_table->schema_table_type()));
|
||||
|
||||
if (NULL == _schema_scanner.get()) {
|
||||
return Status("schema scanner get NULL pointer.");
|
||||
}
|
||||
|
||||
RETURN_IF_ERROR(_schema_scanner->init(&_scanner_param, _pool));
|
||||
// get column info from scanner
|
||||
_src_tuple_desc = _schema_scanner->tuple_desc();
|
||||
|
||||
if (NULL == _src_tuple_desc) {
|
||||
return Status("failed to get src schema tuple desc.");
|
||||
}
|
||||
|
||||
_src_tuple = reinterpret_cast<Tuple*>(new(std::nothrow) char[_src_tuple_desc->byte_size()]);
|
||||
|
||||
if (NULL == _src_tuple) {
|
||||
return Status("new src tuple failed.");
|
||||
}
|
||||
|
||||
// if src tuple desc slots is zero, it's the dummy slots.
|
||||
if (0 == _src_tuple_desc->slots().size()) {
|
||||
_slot_num = 0;
|
||||
}
|
||||
|
||||
// check if type is ok.
|
||||
if (_slot_num > 0) {
|
||||
_index_map.resize(_slot_num);
|
||||
}
|
||||
for (int i = 0; i < _slot_num; ++i) {
|
||||
// TODO(zhaochun): Is this slow?
|
||||
int j = 0;
|
||||
for (; j < _src_tuple_desc->slots().size(); ++j) {
|
||||
if (boost::iequals(_dest_tuple_desc->slots()[i]->col_name(),
|
||||
_src_tuple_desc->slots()[j]->col_name())) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (j >= _src_tuple_desc->slots().size()) {
|
||||
LOG(WARNING) << "no match column for this column("
|
||||
<< _dest_tuple_desc->slots()[i]->col_name() << ")";
|
||||
return Status("no match column for this column.");
|
||||
}
|
||||
|
||||
if (_src_tuple_desc->slots()[j]->type() != _dest_tuple_desc->slots()[i]->type()) {
|
||||
LOG(WARNING) << "schema not match. input is " << _src_tuple_desc->slots()[j]->type()
|
||||
<< " and output is " << _dest_tuple_desc->slots()[i]->type();
|
||||
return Status("schema not match.");
|
||||
}
|
||||
_index_map[i] = j;
|
||||
}
|
||||
|
||||
// TODO(marcel): add int _tuple_idx indexed by TupleId somewhere in runtime_state.h
|
||||
_tuple_idx = 0;
|
||||
_is_init = true;
|
||||
|
||||
return Status::OK;
|
||||
}
|
||||
|
||||
Status SchemaScanNode::open(RuntimeState* state) {
|
||||
if (!_is_init) {
|
||||
return Status("Open before Init.");
|
||||
}
|
||||
|
||||
if (NULL == state) {
|
||||
return Status("input pointer is NULL.");
|
||||
}
|
||||
|
||||
SCOPED_TIMER(_runtime_profile->total_time_counter());
|
||||
RETURN_IF_ERROR(exec_debug_action(TExecNodePhase::OPEN));
|
||||
RETURN_IF_CANCELLED(state);
|
||||
RETURN_IF_ERROR(ExecNode::open(state));
|
||||
|
||||
if (_scanner_param.user) {
|
||||
TSetSessionParams param;
|
||||
param.__set_user(*_scanner_param.user);
|
||||
//TStatus t_status;
|
||||
//RETURN_IF_ERROR(SchemaJniHelper::set_session(param, &t_status));
|
||||
//RETURN_IF_ERROR(Status(t_status));
|
||||
}
|
||||
|
||||
return _schema_scanner->start(state);
|
||||
}
|
||||
|
||||
void SchemaScanNode::copy_one_row() {
|
||||
memset(_dest_tuple, 0, _dest_tuple_desc->num_null_bytes());
|
||||
|
||||
for (int i = 0; i < _slot_num; ++i) {
|
||||
int j = _index_map[i];
|
||||
|
||||
if (_src_tuple->is_null(_src_tuple_desc->slots()[j]->null_indicator_offset())) {
|
||||
_dest_tuple->set_null(_dest_tuple_desc->slots()[i]->null_indicator_offset());
|
||||
} else {
|
||||
void* dest_slot = _dest_tuple->get_slot(_dest_tuple_desc->slots()[i]->tuple_offset());
|
||||
void* src_slot = _src_tuple->get_slot(_src_tuple_desc->slots()[j]->tuple_offset());
|
||||
int slot_size = _src_tuple_desc->slots()[j]->type().get_slot_size();
|
||||
memcpy(dest_slot, src_slot, slot_size);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Status SchemaScanNode::get_next(RuntimeState* state, RowBatch* row_batch,
|
||||
bool* eos) {
|
||||
if (!_is_init) {
|
||||
return Status("GetNext before Init.");
|
||||
}
|
||||
|
||||
if (NULL == state || NULL == row_batch || NULL == eos) {
|
||||
return Status("input pointer is NULL.");
|
||||
}
|
||||
|
||||
RETURN_IF_CANCELLED(state);
|
||||
SCOPED_TIMER(_runtime_profile->total_time_counter());
|
||||
SCOPED_TIMER(materialize_tuple_timer());
|
||||
|
||||
if (reached_limit()) {
|
||||
*eos = true;
|
||||
return Status::OK;
|
||||
}
|
||||
|
||||
// create new tuple buffer for row_batch
|
||||
int tuple_buffer_size = row_batch->capacity() * _dest_tuple_desc->byte_size();
|
||||
void* tuple_buffer = _tuple_pool->allocate(tuple_buffer_size);
|
||||
|
||||
if (NULL == tuple_buffer) {
|
||||
return Status("Allocate tuple buffer failed.");
|
||||
}
|
||||
|
||||
// no use to clear, because CopyOneRow can clear
|
||||
_dest_tuple = reinterpret_cast<Tuple*>(tuple_buffer);
|
||||
// Indicates whether there are more rows to process. Set in _schema_scanner.get_next().
|
||||
bool scanner_eos = false;
|
||||
|
||||
while (true) {
|
||||
RETURN_IF_CANCELLED(state);
|
||||
|
||||
if (reached_limit() || row_batch->is_full()) {
|
||||
// hang on to last allocated chunk in pool, we'll keep writing into it in the
|
||||
// next get_next() call
|
||||
row_batch->tuple_data_pool()->acquire_data(_tuple_pool.get(), !reached_limit());
|
||||
*eos = reached_limit();
|
||||
return Status::OK;
|
||||
}
|
||||
|
||||
RETURN_IF_ERROR(_schema_scanner->get_next_row(_src_tuple,
|
||||
_tuple_pool.get(), &scanner_eos));
|
||||
|
||||
if (scanner_eos) {
|
||||
row_batch->tuple_data_pool()->acquire_data(_tuple_pool.get(), false);
|
||||
*eos = true;
|
||||
return Status::OK;
|
||||
}
|
||||
|
||||
int row_idx = row_batch->add_row();
|
||||
TupleRow* row = row_batch->get_row(row_idx);
|
||||
row->set_tuple(_tuple_idx, _dest_tuple);
|
||||
copy_one_row();
|
||||
|
||||
// Error logging: Flush error stream and add name of HBase table and current row key.
|
||||
// check now
|
||||
if (eval_conjuncts(&_conjunct_ctxs[0], _conjunct_ctxs.size(), row)) {
|
||||
row_batch->commit_last_row();
|
||||
++_num_rows_returned;
|
||||
COUNTER_SET(_rows_returned_counter, _num_rows_returned);
|
||||
char* new_tuple = reinterpret_cast<char*>(_dest_tuple);
|
||||
new_tuple += _dest_tuple_desc->byte_size();
|
||||
_dest_tuple = reinterpret_cast<Tuple*>(new_tuple);
|
||||
}
|
||||
}
|
||||
|
||||
return Status::OK;
|
||||
}
|
||||
|
||||
Status SchemaScanNode::close(RuntimeState* state) {
|
||||
if (is_closed()) {
|
||||
return Status::OK;
|
||||
}
|
||||
RETURN_IF_ERROR(exec_debug_action(TExecNodePhase::CLOSE));
|
||||
SCOPED_TIMER(_runtime_profile->total_time_counter());
|
||||
|
||||
if (memory_used_counter() != NULL) {
|
||||
COUNTER_UPDATE(memory_used_counter(), _tuple_pool->peak_allocated_bytes());
|
||||
}
|
||||
|
||||
return ExecNode::close(state);
|
||||
}
|
||||
|
||||
void SchemaScanNode::debug_string(int indentation_level, stringstream* out) const {
|
||||
*out << string(indentation_level * 2, ' ');
|
||||
*out << "SchemaScanNode(tupleid=" << _tuple_id << " table=" << _table_name;
|
||||
*out << ")" << std::endl;
|
||||
|
||||
for (int i = 0; i < _children.size(); ++i) {
|
||||
_children[i]->debug_string(indentation_level + 1, out);
|
||||
}
|
||||
}
|
||||
|
||||
Status SchemaScanNode::set_scan_ranges(const vector<TScanRangeParams>& scan_ranges) {
|
||||
return Status::OK;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/* vim: set ts=4 sw=4 sts=4 tw=100 : */
|
||||
Reference in New Issue
Block a user